bedrock-agentcore 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/src/memory/integrations/strands/factory.d.ts +78 -0
- package/dist/src/memory/integrations/strands/factory.d.ts.map +1 -0
- package/dist/src/memory/integrations/strands/factory.js +108 -0
- package/dist/src/memory/integrations/strands/factory.js.map +1 -0
- package/dist/src/memory/integrations/strands/format.d.ts +17 -0
- package/dist/src/memory/integrations/strands/format.d.ts.map +1 -0
- package/dist/src/memory/integrations/strands/format.js +29 -0
- package/dist/src/memory/integrations/strands/format.js.map +1 -0
- package/dist/src/memory/integrations/strands/index.d.ts +11 -0
- package/dist/src/memory/integrations/strands/index.d.ts.map +1 -0
- package/dist/src/memory/integrations/strands/index.js +6 -0
- package/dist/src/memory/integrations/strands/index.js.map +1 -0
- package/dist/src/memory/integrations/strands/logger.d.ts +19 -0
- package/dist/src/memory/integrations/strands/logger.d.ts.map +1 -0
- package/dist/src/memory/integrations/strands/logger.js +27 -0
- package/dist/src/memory/integrations/strands/logger.js.map +1 -0
- package/dist/src/memory/integrations/strands/sender.d.ts +79 -0
- package/dist/src/memory/integrations/strands/sender.d.ts.map +1 -0
- package/dist/src/memory/integrations/strands/sender.js +170 -0
- package/dist/src/memory/integrations/strands/sender.js.map +1 -0
- package/dist/src/memory/integrations/strands/store.d.ts +28 -0
- package/dist/src/memory/integrations/strands/store.d.ts.map +1 -0
- package/dist/src/memory/integrations/strands/store.js +154 -0
- package/dist/src/memory/integrations/strands/store.js.map +1 -0
- package/dist/src/memory/integrations/strands/types.d.ts +122 -0
- package/dist/src/memory/integrations/strands/types.d.ts.map +1 -0
- package/dist/src/memory/integrations/strands/types.js +73 -0
- package/dist/src/memory/integrations/strands/types.js.map +1 -0
- package/dist/src/runtime/app.d.ts +7 -2
- package/dist/src/runtime/app.d.ts.map +1 -1
- package/dist/src/runtime/app.js +7 -4
- package/dist/src/runtime/app.js.map +1 -1
- package/dist/src/runtime/client.d.ts +79 -11
- package/dist/src/runtime/client.d.ts.map +1 -1
- package/dist/src/runtime/client.js +230 -79
- package/dist/src/runtime/client.js.map +1 -1
- package/dist/src/runtime/index.d.ts +5 -0
- package/dist/src/runtime/index.d.ts.map +1 -1
- package/dist/src/runtime/index.js +3 -0
- package/dist/src/runtime/index.js.map +1 -1
- package/dist/src/runtime/shell/config.d.ts +81 -0
- package/dist/src/runtime/shell/config.d.ts.map +1 -0
- package/dist/src/runtime/shell/config.js +15 -0
- package/dist/src/runtime/shell/config.js.map +1 -0
- package/dist/src/runtime/shell/index.d.ts +9 -0
- package/dist/src/runtime/shell/index.d.ts.map +1 -0
- package/dist/src/runtime/shell/index.js +6 -0
- package/dist/src/runtime/shell/index.js.map +1 -0
- package/dist/src/runtime/shell/protocol.d.ts +45 -0
- package/dist/src/runtime/shell/protocol.d.ts.map +1 -0
- package/dist/src/runtime/shell/protocol.js +99 -0
- package/dist/src/runtime/shell/protocol.js.map +1 -0
- package/dist/src/runtime/shell/session.d.ts +240 -0
- package/dist/src/runtime/shell/session.d.ts.map +1 -0
- package/dist/src/runtime/shell/session.js +880 -0
- package/dist/src/runtime/shell/session.js.map +1 -0
- package/dist/src/runtime/shell/validation.d.ts +8 -0
- package/dist/src/runtime/shell/validation.d.ts.map +1 -0
- package/dist/src/runtime/shell/validation.js +17 -0
- package/dist/src/runtime/shell/validation.js.map +1 -0
- package/dist/src/runtime/types.d.ts +89 -0
- package/dist/src/runtime/types.d.ts.map +1 -1
- package/package.json +13 -4
|
@@ -0,0 +1,880 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ShellSession — async-iterable interactive PTY WebSocket session.
|
|
3
|
+
*
|
|
4
|
+
* Connects on `connect()`, reads the initial STATUS confirmation frame, and exposes
|
|
5
|
+
* typed `send()` / `resize()` / `[Symbol.asyncIterator]()` / `close()`.
|
|
6
|
+
*
|
|
7
|
+
* When `reconnectConfig` is provided, transparently reconnects on unexpected disconnects
|
|
8
|
+
* using the same `shellId` so the shell's working directory, environment, background jobs,
|
|
9
|
+
* and up to 256 KB of buffered output are preserved on the server.
|
|
10
|
+
*
|
|
11
|
+
* Reconnect restores the *connection* on its own (it is driven by the socket close event,
|
|
12
|
+
* not by your read loop, and `send()`/`resize()` wait for it). However, on reattach the
|
|
13
|
+
* server replays the buffered output as inbound frames — to receive that replay (and to see
|
|
14
|
+
* `bytesDropped` updated and `exitCode` set) you must be consuming the session with
|
|
15
|
+
* `for await (const frame of shell)`. A write-only caller that never iterates stays
|
|
16
|
+
* connected across drops but will not observe the replayed output. Keep a `for await` loop
|
|
17
|
+
* running for the life of the session.
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* ```typescript
|
|
21
|
+
* const shell = await client.openShell({ runtimeArn })
|
|
22
|
+
* try {
|
|
23
|
+
* await shell.send('cat /etc/os-release\n')
|
|
24
|
+
* for await (const frame of shell) {
|
|
25
|
+
* if (frame.channel === ShellChannel.STDOUT) process.stdout.write(frame.text)
|
|
26
|
+
* }
|
|
27
|
+
* } finally {
|
|
28
|
+
* await shell.close()
|
|
29
|
+
* }
|
|
30
|
+
* ```
|
|
31
|
+
*/
|
|
32
|
+
import WebSocket from 'ws';
|
|
33
|
+
import { once, on } from 'events';
|
|
34
|
+
import { randomUUID } from 'crypto';
|
|
35
|
+
import { Buffer } from 'buffer';
|
|
36
|
+
import { ShellFramer, ShellChannel } from './protocol.js';
|
|
37
|
+
import { validateShellId } from './validation.js';
|
|
38
|
+
import { DEFAULT_BASE_DELAY, DEFAULT_KEEPALIVE_INTERVAL, DEFAULT_MAX_DELAY, DEFAULT_MAX_RETRIES, DEFAULT_METADATA_TIMEOUT, DEFAULT_OUTER_LOOP_DELAY, DEFAULT_RECONNECT_WINDOW, noopLogger, } from './config.js';
|
|
39
|
+
/** Header names in the 101 Switching Protocols response (lowercase per HTTP/1.1). */
|
|
40
|
+
const SESSION_HEADER = 'x-amzn-bedrock-agentcore-runtime-session-id';
|
|
41
|
+
const SHELL_ID_HEADER = 'x-amzn-bedrock-agentcore-shell-id';
|
|
42
|
+
/**
|
|
43
|
+
* Async-iterable shell session wrapping a live PTY WebSocket.
|
|
44
|
+
*
|
|
45
|
+
* Read-only observable attributes (updated by the session as events arrive):
|
|
46
|
+
* - `shellId` — Server-confirmed shell identifier. Preserve to reconnect to the same PTY.
|
|
47
|
+
* - `sessionId` — Runtime session ID routing to the VM.
|
|
48
|
+
* - `reconnected` — True when the most recent connect reattached an existing PTY.
|
|
49
|
+
* - `kicked` — True when another client connected with the same shellId (close 4000).
|
|
50
|
+
* Check this after the `for await` loop exits to distinguish a kick from
|
|
51
|
+
* a clean shell exit.
|
|
52
|
+
* - `bytesDropped` — PTY ring-buffer bytes lost during the most recent disconnect, as
|
|
53
|
+
* reported by the server in the reconnect confirmation frame.
|
|
54
|
+
* Zero if no overflow occurred or on a fresh connection.
|
|
55
|
+
* - `exitCode` — Shell process exit code. `null` until the shell exits; `0` for a clean
|
|
56
|
+
* exit. Check this after the `for await` loop exits alongside `kicked`.
|
|
57
|
+
*/
|
|
58
|
+
export class ShellSession {
|
|
59
|
+
_shellId;
|
|
60
|
+
_sessionId;
|
|
61
|
+
_reconnected = false;
|
|
62
|
+
_kicked = false;
|
|
63
|
+
_bytesDropped = 0;
|
|
64
|
+
_exitCode = null;
|
|
65
|
+
/** Server-confirmed shell identifier. */
|
|
66
|
+
get shellId() {
|
|
67
|
+
return this._shellId;
|
|
68
|
+
}
|
|
69
|
+
/** Runtime session ID routing to the VM. */
|
|
70
|
+
get sessionId() {
|
|
71
|
+
return this._sessionId;
|
|
72
|
+
}
|
|
73
|
+
/** True when the most recent connect reattached an existing PTY. */
|
|
74
|
+
get reconnected() {
|
|
75
|
+
return this._reconnected;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* True when another client connected with the same shellId (close 4000).
|
|
79
|
+
* Check after the `for await` loop exits to distinguish a kick from a clean exit.
|
|
80
|
+
*/
|
|
81
|
+
get kicked() {
|
|
82
|
+
return this._kicked;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* PTY ring-buffer bytes lost during the most recent disconnect.
|
|
86
|
+
* Zero when no overflow occurred or on a fresh connection.
|
|
87
|
+
*/
|
|
88
|
+
get bytesDropped() {
|
|
89
|
+
return this._bytesDropped;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Shell process exit code. `null` until the shell exits; `0` for a clean exit.
|
|
93
|
+
* Check after the `for await` loop exits alongside `kicked`.
|
|
94
|
+
*/
|
|
95
|
+
get exitCode() {
|
|
96
|
+
return this._exitCode;
|
|
97
|
+
}
|
|
98
|
+
connectFn;
|
|
99
|
+
reconnectConfig;
|
|
100
|
+
keepaliveIntervalMs;
|
|
101
|
+
log;
|
|
102
|
+
framer = new ShellFramer();
|
|
103
|
+
_wsFactory;
|
|
104
|
+
_state = { status: 'idle' };
|
|
105
|
+
_abortController = null;
|
|
106
|
+
_sessionController = new AbortController();
|
|
107
|
+
_closeError = null;
|
|
108
|
+
/**
|
|
109
|
+
* Set while a reconnect is in flight, cleared when it settles. Shared so that the
|
|
110
|
+
* iterator, the close/dead-detection handler, and send()/resize() all await the same
|
|
111
|
+
* attempt rather than racing or each starting their own. Resolves to the reconnect
|
|
112
|
+
* outcome (true = recovered, false = gave up). This is what makes *connection* recovery
|
|
113
|
+
* iterator-independent — the socket is restored without a `for await` loop. Consuming the
|
|
114
|
+
* replayed output still requires an active iterator (see the class docstring).
|
|
115
|
+
*/
|
|
116
|
+
_reconnectPromise = null;
|
|
117
|
+
constructor(opts) {
|
|
118
|
+
this.connectFn = opts.connectFn;
|
|
119
|
+
if (opts.shellId != null)
|
|
120
|
+
validateShellId(opts.shellId);
|
|
121
|
+
this._shellId = opts.shellId ?? randomUUID();
|
|
122
|
+
this._sessionId = opts.sessionId ?? randomUUID();
|
|
123
|
+
this.reconnectConfig = opts.reconnectConfig;
|
|
124
|
+
this.keepaliveIntervalMs = opts.keepaliveIntervalMs ?? DEFAULT_KEEPALIVE_INTERVAL;
|
|
125
|
+
this.log = opts.logger ?? noopLogger;
|
|
126
|
+
this._wsFactory =
|
|
127
|
+
opts._wsFactory ??
|
|
128
|
+
((url, protocols, options) => protocols?.length ? new WebSocket(url, protocols, options) : new WebSocket(url, options));
|
|
129
|
+
}
|
|
130
|
+
/** Connect and read the initial STATUS metadata frame. */
|
|
131
|
+
async connect() {
|
|
132
|
+
if (this._state.status === 'closed')
|
|
133
|
+
throw new Error('ShellSession is closed');
|
|
134
|
+
if (this._state.status !== 'idle') {
|
|
135
|
+
throw new Error(`ShellSession.connect() requires idle state (current: ${this._state.status})`);
|
|
136
|
+
}
|
|
137
|
+
await this._connectWithUpgrade();
|
|
138
|
+
// _connectWithUpgrade returns void (not throw) when close() fires mid-flight.
|
|
139
|
+
if (this._isClosed())
|
|
140
|
+
throw new Error('ShellSession was closed during connect()');
|
|
141
|
+
return this;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Send text or raw bytes to the shell's stdin.
|
|
145
|
+
* Pass a string for text commands; pass a Buffer for binary/escape sequences.
|
|
146
|
+
*
|
|
147
|
+
* If a reconnect is in flight, this waits for it and sends on the recovered
|
|
148
|
+
* connection. Throws a descriptive `Error` (never the raw `ws` "readyState 3"
|
|
149
|
+
* error) when the session is closed or could not be recovered.
|
|
150
|
+
*/
|
|
151
|
+
async send(data) {
|
|
152
|
+
await this._wsSend(await this._writableSocket(), this.framer.encodeStdin(data));
|
|
153
|
+
}
|
|
154
|
+
/** Send a HEARTBEAT frame (0x05) to the server. */
|
|
155
|
+
async sendHeartbeat() {
|
|
156
|
+
await this._wsSend(await this._writableSocket(), this.framer.encodeHeartbeat());
|
|
157
|
+
}
|
|
158
|
+
/** Resize the terminal PTY. */
|
|
159
|
+
async resize(width, height) {
|
|
160
|
+
await this._wsSend(await this._writableSocket(), this.framer.encodeResize(width, height));
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Resolve the live socket for a write, healing first if needed. Awaits an in-flight
|
|
164
|
+
* reconnect (transparent recovery) and validates the *real* socket
|
|
165
|
+
* readyState — not just the `_state` flag, which can lag a silently-dropped socket.
|
|
166
|
+
* Throws a descriptive `Error` instead of leaking the raw `ws`
|
|
167
|
+
* "readyState 3 (CLOSED)" error.
|
|
168
|
+
*/
|
|
169
|
+
async _writableSocket() {
|
|
170
|
+
// Wait out an in-flight reconnect, looping so back-to-back drops (a fresh reconnect
|
|
171
|
+
// starting while we awaited the previous one) are also awaited rather than throwing a
|
|
172
|
+
// spurious "not connected" mid-recovery. The reconnect passes through 'reconnecting'
|
|
173
|
+
// then 'connecting' before reaching 'open', so we wait whenever a reconnect promise is
|
|
174
|
+
// set AND we are not yet open/closed. Crucially we stop once status is 'open': an
|
|
175
|
+
// onReconnect callback that calls send()/resize() runs after the new socket is promoted
|
|
176
|
+
// to 'open' but before _reconnectPromise settles — gating on 'open' lets it through
|
|
177
|
+
// instead of awaiting its own in-flight promise and deadlocking.
|
|
178
|
+
while (this._reconnectPromise && this._state.status !== 'open' && this._state.status !== 'closed') {
|
|
179
|
+
await this._reconnectPromise;
|
|
180
|
+
}
|
|
181
|
+
if (this._isClosed())
|
|
182
|
+
throw new Error('ShellSession is closed');
|
|
183
|
+
if (this._state.status !== 'open') {
|
|
184
|
+
throw new Error(`ShellSession is not connected (status: ${this._state.status})`);
|
|
185
|
+
}
|
|
186
|
+
if (this._state.ws.readyState !== WebSocket.OPEN) {
|
|
187
|
+
throw new Error(`ShellSession connection is not open (readyState ${this._state.ws.readyState})`);
|
|
188
|
+
}
|
|
189
|
+
return this._state.ws;
|
|
190
|
+
}
|
|
191
|
+
/** Send a CLOSE frame (0xFF) to permanently kill the shell, then close the WebSocket.
|
|
192
|
+
* The server kills the shell process (SIGHUP → SIGKILL) and responds with its own [0xFF].
|
|
193
|
+
* Unlike dropping the WebSocket (which detaches and allows reconnection), this is permanent. */
|
|
194
|
+
async close() {
|
|
195
|
+
const prev = this._state;
|
|
196
|
+
if (prev.status === 'closed') {
|
|
197
|
+
this.log.debug(`ShellSession: close() called on already-closed session (shellId=${this.shellId})`);
|
|
198
|
+
return;
|
|
199
|
+
}
|
|
200
|
+
// Atomic transition — any concurrent code checks _state.status === 'closed'.
|
|
201
|
+
this._state = { status: 'closed' };
|
|
202
|
+
// Abort per-connection iterator (unblocks any suspended _recvRaw) and reconnect sleeps.
|
|
203
|
+
this._abortController?.abort();
|
|
204
|
+
this._sessionController.abort();
|
|
205
|
+
if (prev.status === 'connecting' && prev.ws !== null) {
|
|
206
|
+
// Terminate any in-progress TLS handshake — the socket is unreachable from
|
|
207
|
+
// the _connectWithUpgrade local, so close() must kill it here.
|
|
208
|
+
try {
|
|
209
|
+
prev.ws.terminate();
|
|
210
|
+
}
|
|
211
|
+
catch (err) {
|
|
212
|
+
this.log.debug(`ShellSession: ws.terminate() threw during connecting (shellId=${this.shellId}): ${String(err)}`);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
else if (prev.status === 'open') {
|
|
216
|
+
this._stopKeepalive(prev.keepaliveTimer);
|
|
217
|
+
// ws.send() throws synchronously if the socket is already closing/closed.
|
|
218
|
+
// Swallow it — the intent is best-effort notification, not guaranteed delivery.
|
|
219
|
+
try {
|
|
220
|
+
prev.ws.send(this.framer.encodeClose());
|
|
221
|
+
}
|
|
222
|
+
catch (err) {
|
|
223
|
+
this.log.debug(`ShellSession: CLOSE frame not sent — socket already closing/closed (shellId=${this.shellId}): ${String(err)}`);
|
|
224
|
+
}
|
|
225
|
+
try {
|
|
226
|
+
prev.ws.close();
|
|
227
|
+
}
|
|
228
|
+
catch (err) {
|
|
229
|
+
this.log.debug(`ShellSession: ws.close() threw (shellId=${this.shellId}): ${String(err)}`);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Forcibly terminates the underlying WebSocket without a clean handshake.
|
|
235
|
+
* Useful in tests to simulate an abrupt network drop and trigger the reconnect path.
|
|
236
|
+
* Has no effect if the session is not currently open.
|
|
237
|
+
* @internal
|
|
238
|
+
*/
|
|
239
|
+
_terminateConnection() {
|
|
240
|
+
if (this._state.status === 'open')
|
|
241
|
+
this._state.ws.terminate();
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Async iterator — yields inbound ShellFrames, reconnecting on drop if configured.
|
|
245
|
+
*
|
|
246
|
+
* The loop exits silently (no throw) in three cases: shell exit, kicked by a new
|
|
247
|
+
* client, or reconnect budget exhausted. Check `exitCode`, `kicked`, and
|
|
248
|
+
* `bytesDropped` after the loop to distinguish them:
|
|
249
|
+
*
|
|
250
|
+
* ```typescript
|
|
251
|
+
* for await (const frame of shell) { ... }
|
|
252
|
+
* if (shell.kicked) { ... } // another client took over
|
|
253
|
+
* if (shell.exitCode !== null) { ... } // shell process exited
|
|
254
|
+
* if (shell.bytesDropped > 0) { ... } // ring-buffer overflow on reconnect
|
|
255
|
+
* ```
|
|
256
|
+
*/
|
|
257
|
+
[Symbol.asyncIterator]() {
|
|
258
|
+
return this._iterate();
|
|
259
|
+
}
|
|
260
|
+
// ── Internal ──────────────────────────────────────────────────────────────
|
|
261
|
+
_startKeepalive(ws, signal) {
|
|
262
|
+
if (this.keepaliveIntervalMs <= 0)
|
|
263
|
+
return null;
|
|
264
|
+
// SDK clients MUST send a Ping every 30s AND treat "no Pong within ~60s" as a dead
|
|
265
|
+
// connection. ws emits 'pong' for each RFC 6455 Pong received. Without this,
|
|
266
|
+
// a silently-dropped socket reports readyState OPEN for up to ~60s (KARP idle timeout),
|
|
267
|
+
// during which the session would sit stale on a dead connection.
|
|
268
|
+
//
|
|
269
|
+
// Liveness is pong-only by design: a 'message' listener here would (a) double-dispatch
|
|
270
|
+
// on every inbound frame — the read path (`on(ws,'message')` iterator) already consumes
|
|
271
|
+
// them — and (b) mask a write-dead-but-read-alive half-close, since streamed output
|
|
272
|
+
// would keep refreshing the timer while our pings go unanswered. The 'pong' listener is
|
|
273
|
+
// removed when this connection's controller aborts (close or drop) so it never leaks.
|
|
274
|
+
let lastPongAt = Date.now();
|
|
275
|
+
const markAlive = () => {
|
|
276
|
+
lastPongAt = Date.now();
|
|
277
|
+
};
|
|
278
|
+
ws.on('pong', markAlive);
|
|
279
|
+
signal.addEventListener('abort', () => ws.removeListener('pong', markAlive), { once: true });
|
|
280
|
+
const deadAfterMs = this.keepaliveIntervalMs * 2; // ~60s with the 30s default
|
|
281
|
+
return globalThis.setInterval(() => {
|
|
282
|
+
if (ws.readyState !== WebSocket.OPEN)
|
|
283
|
+
return;
|
|
284
|
+
if (Date.now() - lastPongAt > deadAfterMs) {
|
|
285
|
+
// Silent death: pings went unanswered. Terminate so 'close' fires and reconnect
|
|
286
|
+
// engages — instead of leaving the session stale on a dead socket.
|
|
287
|
+
this.log.warn(`ShellSession: no Pong within ${deadAfterMs}ms — connection presumed dead, ` +
|
|
288
|
+
`terminating (shellId=${this.shellId})`);
|
|
289
|
+
ws.terminate();
|
|
290
|
+
return;
|
|
291
|
+
}
|
|
292
|
+
ws.ping();
|
|
293
|
+
}, this.keepaliveIntervalMs);
|
|
294
|
+
}
|
|
295
|
+
_stopKeepalive(timer) {
|
|
296
|
+
if (timer !== null)
|
|
297
|
+
globalThis.clearInterval(timer);
|
|
298
|
+
}
|
|
299
|
+
_wsSend(ws, data) {
|
|
300
|
+
return new Promise((resolve, reject) => {
|
|
301
|
+
ws.send(data, (err) => (err ? reject(err) : resolve()));
|
|
302
|
+
});
|
|
303
|
+
}
|
|
304
|
+
/** Open WebSocket, capture 101 upgrade headers, then read metadata frame. */
|
|
305
|
+
async _connectWithUpgrade() {
|
|
306
|
+
// Guard must be the very first check — any state mutation below would violate the
|
|
307
|
+
// closed invariant if close() has already fired.
|
|
308
|
+
if (this._isClosed())
|
|
309
|
+
return;
|
|
310
|
+
// Abort any previous per-connection iterator before creating a new one.
|
|
311
|
+
this._abortController?.abort();
|
|
312
|
+
// A reconnect attempt enters here with status 'reconnecting'; a fresh connect with 'idle'.
|
|
313
|
+
const isReconnect = this._state.status === 'reconnecting';
|
|
314
|
+
this._closeError = null;
|
|
315
|
+
this._reconnected = false;
|
|
316
|
+
this._kicked = false;
|
|
317
|
+
this._exitCode = null;
|
|
318
|
+
// bytesDropped reports loss for the most recent disconnect (per-disconnect, not
|
|
319
|
+
// cumulative). Reset it only on a FRESH connect — resetting on every reconnect attempt
|
|
320
|
+
// would clobber a value the iterator has not yet read when reconnects happen back-to-back.
|
|
321
|
+
if (!isReconnect)
|
|
322
|
+
this._bytesDropped = 0;
|
|
323
|
+
this._state = { status: 'connecting', ws: null };
|
|
324
|
+
let connectResult;
|
|
325
|
+
try {
|
|
326
|
+
connectResult = await this.connectFn(this.shellId, this.sessionId);
|
|
327
|
+
}
|
|
328
|
+
catch (err) {
|
|
329
|
+
// connectFn threw before any WebSocket was created — reset to idle so the
|
|
330
|
+
// state doesn't stay 'connecting' across the backoff sleep between retries.
|
|
331
|
+
if (!this._isClosed())
|
|
332
|
+
this._state = { status: 'idle' };
|
|
333
|
+
throw err;
|
|
334
|
+
}
|
|
335
|
+
// close() may have fired while connectFn was suspended — bail out before
|
|
336
|
+
// creating a new WebSocket so the session doesn't revive after an explicit close.
|
|
337
|
+
if (this._isClosed())
|
|
338
|
+
return;
|
|
339
|
+
const ws = this._wsFactory(connectResult.url, connectResult.protocols, { headers: connectResult.headers });
|
|
340
|
+
// close() may have fired in the gap between wsFactory() and this state update —
|
|
341
|
+
// terminate the socket and bail out instead of re-registering in 'connecting'.
|
|
342
|
+
if (this._isClosed()) {
|
|
343
|
+
ws.terminate();
|
|
344
|
+
return;
|
|
345
|
+
}
|
|
346
|
+
// Register ws in the connecting state so close() can terminate the socket
|
|
347
|
+
// during the TLS handshake, before the open race resolves.
|
|
348
|
+
this._state = { status: 'connecting', ws };
|
|
349
|
+
// Capture controller in a local variable so each WebSocket's handlers close over
|
|
350
|
+
// their own controller — not this._abortController, which is replaced on reconnect.
|
|
351
|
+
this._abortController = new AbortController();
|
|
352
|
+
const controller = this._abortController;
|
|
353
|
+
// Single iterator used for both the STATUS handshake and subsequent frame reads.
|
|
354
|
+
// On the timeout path, one frame may be lost (the abandoned .next() from the
|
|
355
|
+
// Promise.race in _readMetadataFrame consumes it), but that is a degraded scenario.
|
|
356
|
+
// Using two independent iterators would cause every pre-STATUS frame to be yielded
|
|
357
|
+
// twice (once via pendingFrames, once via the independent listener queue).
|
|
358
|
+
const messageIterator = on(ws, 'message', { signal: controller.signal });
|
|
359
|
+
ws.on('close', (code, reason) => {
|
|
360
|
+
const err = Object.assign(new Error(`WebSocket closed: ${code} ${reason?.toString() ?? ''}`), { closeCode: code });
|
|
361
|
+
this._closeError = err;
|
|
362
|
+
controller.abort(err);
|
|
363
|
+
// Trigger reconnect from the close event itself so recovery does not depend on a
|
|
364
|
+
// `for await` loop being active. Only act on closes for the *current*
|
|
365
|
+
// connection: ignore if we have already moved on (closed, reconnecting, or this
|
|
366
|
+
// socket is no longer the live one). The iterator, if running, joins the same
|
|
367
|
+
// in-flight attempt via _reconnectPromise rather than starting a second one.
|
|
368
|
+
if (this._state.status === 'open' && this._state.ws === ws) {
|
|
369
|
+
void this._ensureReconnect(code).catch(() => { });
|
|
370
|
+
}
|
|
371
|
+
});
|
|
372
|
+
ws.on('error', (err) => {
|
|
373
|
+
controller.abort(err);
|
|
374
|
+
// Do NOT trigger reconnect here. The `ws` library always emits 'close' after 'error'
|
|
375
|
+
// on a connected socket, and that close carries the authoritative close code (the
|
|
376
|
+
// close code drives the reconnect decision). An 'error' has no closeCode, so
|
|
377
|
+
// reconnecting from it (with a null code) would pre-empt a terminal close that must
|
|
378
|
+
// NOT reconnect — e.g. 4000 (kicked) or 1003 (text frames) — by flipping state to
|
|
379
|
+
// 'reconnecting' before the real code arrives. The 'close' handler below owns the
|
|
380
|
+
// decision; this listener exists only so the 'error' event is not unhandled.
|
|
381
|
+
this.log.debug(`ShellSession: WebSocket error (deferring to close, shellId=${this.shellId}): ${String(err)}`);
|
|
382
|
+
});
|
|
383
|
+
// Capture shellId/sessionId from 101 upgrade response headers before open fires.
|
|
384
|
+
ws.once('upgrade', (response) => {
|
|
385
|
+
const resHeaders = response.headers;
|
|
386
|
+
const hShellId = resHeaders[SHELL_ID_HEADER];
|
|
387
|
+
const hSessionId = resHeaders[SESSION_HEADER];
|
|
388
|
+
if (typeof hShellId === 'string')
|
|
389
|
+
this._shellId = hShellId;
|
|
390
|
+
if (typeof hSessionId === 'string')
|
|
391
|
+
this._sessionId = hSessionId;
|
|
392
|
+
});
|
|
393
|
+
// Race open against unexpected-response (non-101) and pre-open close.
|
|
394
|
+
// events.once(ws, 'open') rejects automatically if 'error' fires first.
|
|
395
|
+
// AbortController cleans up the two losing once() listeners immediately after the race settles.
|
|
396
|
+
const openRaceAc = new AbortController();
|
|
397
|
+
try {
|
|
398
|
+
await Promise.race([
|
|
399
|
+
once(ws, 'open', { signal: openRaceAc.signal }),
|
|
400
|
+
once(ws, 'unexpected-response', { signal: openRaceAc.signal }).then((args) => {
|
|
401
|
+
const res = args[1];
|
|
402
|
+
res.resume();
|
|
403
|
+
throw new Error(`Server rejected WebSocket connection: HTTP ${res.statusCode ?? 0}`);
|
|
404
|
+
}),
|
|
405
|
+
once(ws, 'close', { signal: openRaceAc.signal }).then((args) => {
|
|
406
|
+
const [code, reason] = args;
|
|
407
|
+
throw new Error(`WebSocket closed before open: ${code} ${reason?.toString() ?? ''}`);
|
|
408
|
+
}),
|
|
409
|
+
]);
|
|
410
|
+
}
|
|
411
|
+
catch (err) {
|
|
412
|
+
ws.terminate();
|
|
413
|
+
if (!this._isClosed())
|
|
414
|
+
this._state = { status: 'idle' };
|
|
415
|
+
this.log.debug(`ShellSession: WebSocket upgrade failed (shellId=${this.shellId}): ${String(err)}`);
|
|
416
|
+
throw err;
|
|
417
|
+
}
|
|
418
|
+
finally {
|
|
419
|
+
openRaceAc.abort();
|
|
420
|
+
}
|
|
421
|
+
let pendingFrames;
|
|
422
|
+
try {
|
|
423
|
+
pendingFrames = await this._readMetadataFrame(messageIterator);
|
|
424
|
+
}
|
|
425
|
+
catch (err) {
|
|
426
|
+
// Server closed before sending STATUS, or close() fired during handshake.
|
|
427
|
+
ws.terminate();
|
|
428
|
+
if (!this._isClosed())
|
|
429
|
+
this._state = { status: 'idle' };
|
|
430
|
+
throw err;
|
|
431
|
+
}
|
|
432
|
+
// close() may have fired during _readMetadataFrame — terminate and bail out.
|
|
433
|
+
if (this._isClosed()) {
|
|
434
|
+
ws.terminate();
|
|
435
|
+
return;
|
|
436
|
+
}
|
|
437
|
+
const keepaliveTimer = this._startKeepalive(ws, controller.signal);
|
|
438
|
+
// Atomic promotion: all connection objects become available together.
|
|
439
|
+
this._state = { status: 'open', ws, messageIterator, keepaliveTimer, pendingFrames };
|
|
440
|
+
}
|
|
441
|
+
/** Receive one raw binary message from the WebSocket. */
|
|
442
|
+
async _recvRaw(messageIterator) {
|
|
443
|
+
try {
|
|
444
|
+
const { value, done } = await messageIterator.next();
|
|
445
|
+
if (done)
|
|
446
|
+
throw this._closeError ?? new Error('WebSocket closed');
|
|
447
|
+
const [data] = value;
|
|
448
|
+
if (Buffer.isBuffer(data))
|
|
449
|
+
return data;
|
|
450
|
+
if (Array.isArray(data))
|
|
451
|
+
return Buffer.concat(data);
|
|
452
|
+
return Buffer.from(data);
|
|
453
|
+
}
|
|
454
|
+
catch (err) {
|
|
455
|
+
throw this._closeError ?? err;
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
/**
|
|
459
|
+
* Consume frames until a STATUS confirmation is found, stashing others in pendingFrames.
|
|
460
|
+
* Returns the accumulated pending frames to be stored in the 'open' state.
|
|
461
|
+
*/
|
|
462
|
+
async _readMetadataFrame(messageIterator) {
|
|
463
|
+
// Use an explicit AbortController so the timer can be cancelled as soon as
|
|
464
|
+
// STATUS arrives — AbortSignal.timeout() creates a timer that outlives the
|
|
465
|
+
// fast-path read and accumulates orphan wakeups on rapid reconnects.
|
|
466
|
+
const timeoutAc = new AbortController();
|
|
467
|
+
const timer = globalThis.setTimeout(() => timeoutAc.abort(new Error('timeout')), DEFAULT_METADATA_TIMEOUT);
|
|
468
|
+
const timeoutP = new Promise((_, rej) => timeoutAc.signal.addEventListener('abort', () => rej(timeoutAc.signal.reason), { once: true }));
|
|
469
|
+
const pendingFrames = [];
|
|
470
|
+
try {
|
|
471
|
+
while (true) {
|
|
472
|
+
let raw;
|
|
473
|
+
try {
|
|
474
|
+
raw = await Promise.race([this._recvRaw(messageIterator), timeoutP]);
|
|
475
|
+
}
|
|
476
|
+
catch (err) {
|
|
477
|
+
if (timeoutAc.signal.aborted) {
|
|
478
|
+
// If the WebSocket also closed concurrently (race between the 10s timer
|
|
479
|
+
// and a server close event), prefer the real close error — promoting a
|
|
480
|
+
// dead messageIterator to 'open' would mislead callers and lose the cause.
|
|
481
|
+
if (this._closeError !== null)
|
|
482
|
+
throw this._closeError;
|
|
483
|
+
this.log.warn(`ShellSession: Timed out waiting for STATUS confirmation (shellId=${this.shellId})`);
|
|
484
|
+
return pendingFrames;
|
|
485
|
+
}
|
|
486
|
+
throw err;
|
|
487
|
+
}
|
|
488
|
+
const frame = this.framer.decode(raw);
|
|
489
|
+
if (frame.channel === ShellChannel.STATUS) {
|
|
490
|
+
try {
|
|
491
|
+
const meta = (frame.json()['metadata'] ?? {});
|
|
492
|
+
if (meta['shellId']) {
|
|
493
|
+
// Confirmation frame — update shellId and we're done. On a reconnect this is
|
|
494
|
+
// the single reconnection confirmation ("sent once, before replay begins"),
|
|
495
|
+
// so read bytesDropped here — it is delivered on THIS frame and nowhere else.
|
|
496
|
+
this._shellId = String(meta['shellId']);
|
|
497
|
+
this._reconnected = Boolean(meta['reconnected']);
|
|
498
|
+
this._recordBytesDropped(meta);
|
|
499
|
+
return pendingFrames;
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
catch (err) {
|
|
503
|
+
this.log.debug(`ShellSession: malformed STATUS frame, proceeding with client-generated shellId=${this.shellId}: ${String(err)}`);
|
|
504
|
+
return pendingFrames;
|
|
505
|
+
}
|
|
506
|
+
// No shellId → termination frame (shell died before confirmation).
|
|
507
|
+
// Stash it so _iterate can set exitCode and return cleanly.
|
|
508
|
+
this.log.debug(`ShellSession: termination STATUS received before confirmation (shellId=${this.shellId})`);
|
|
509
|
+
pendingFrames.push(frame);
|
|
510
|
+
return pendingFrames;
|
|
511
|
+
}
|
|
512
|
+
pendingFrames.push(frame);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
finally {
|
|
516
|
+
globalThis.clearTimeout(timer);
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
_isConfirmationStatus(status) {
|
|
520
|
+
const meta = status['metadata'];
|
|
521
|
+
return Boolean(meta?.['shellId']);
|
|
522
|
+
}
|
|
523
|
+
/**
|
|
524
|
+
* Record `bytesDropped` from a reconnection confirmation frame's metadata, if present.
|
|
525
|
+
* `bytesDropped` reports PTY output lost from ring-buffer overflow during THIS disconnect
|
|
526
|
+
* (per-disconnect, not session-cumulative — assign, don't accumulate). Present
|
|
527
|
+
* only when greater than 0; absent on a clean reconnect.
|
|
528
|
+
*/
|
|
529
|
+
_recordBytesDropped(meta) {
|
|
530
|
+
const dropped = meta?.['bytesDropped'];
|
|
531
|
+
if (typeof dropped === 'number' && dropped > 0) {
|
|
532
|
+
this._bytesDropped = dropped;
|
|
533
|
+
this.log.warn(`ShellSession: ${dropped} bytes of PTY output lost during disconnect ` +
|
|
534
|
+
`(ring buffer overflow, shellId=${this.shellId})`);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
_isTerminationStatus(status) {
|
|
538
|
+
if (this._isConfirmationStatus(status))
|
|
539
|
+
return false;
|
|
540
|
+
return status['status'] === 'Success' || status['status'] === 'Failure';
|
|
541
|
+
}
|
|
542
|
+
_parseExitCode(status) {
|
|
543
|
+
if (status['status'] === 'Success')
|
|
544
|
+
return 0;
|
|
545
|
+
const details = status['details'];
|
|
546
|
+
const causes = details?.['causes'];
|
|
547
|
+
for (const cause of causes ?? []) {
|
|
548
|
+
if (cause['reason'] === 'ExitCode') {
|
|
549
|
+
const n = parseInt(String(cause['message']), 10);
|
|
550
|
+
if (!isNaN(n))
|
|
551
|
+
return n;
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
// Platform error with no ExitCode cause — return null so callers can
|
|
555
|
+
// distinguish "exited cleanly" (0) from "no exit code available" (null).
|
|
556
|
+
return null;
|
|
557
|
+
}
|
|
558
|
+
async *_iterate() {
|
|
559
|
+
// Hoisted outside the loop so the finally block can reference the last known
|
|
560
|
+
// open state's keepaliveTimer even after _state has been transitioned away.
|
|
561
|
+
let state;
|
|
562
|
+
try {
|
|
563
|
+
while (true) {
|
|
564
|
+
if (this._state.status !== 'open')
|
|
565
|
+
return;
|
|
566
|
+
// Capture state snapshot before awaiting — close() may transition state
|
|
567
|
+
// concurrently while the generator is suspended at a yield or await point.
|
|
568
|
+
state = this._state;
|
|
569
|
+
// Drain frames buffered during the metadata handshake first.
|
|
570
|
+
while (state.pendingFrames.length > 0) {
|
|
571
|
+
if (this._isClosed())
|
|
572
|
+
return;
|
|
573
|
+
const frame = state.pendingFrames.shift();
|
|
574
|
+
// HEARTBEAT — server echo of client keepalive, not application data.
|
|
575
|
+
if (frame.channel === ShellChannel.HEARTBEAT)
|
|
576
|
+
continue;
|
|
577
|
+
if (frame.channel === ShellChannel.CLOSE) {
|
|
578
|
+
this._state = { status: 'idle' };
|
|
579
|
+
this.log.debug(`ShellSession: CLOSE frame received in pending queue (shellId=${this.shellId})`);
|
|
580
|
+
return;
|
|
581
|
+
}
|
|
582
|
+
if (frame.channel === ShellChannel.STATUS) {
|
|
583
|
+
try {
|
|
584
|
+
const s = frame.json();
|
|
585
|
+
if (this._isTerminationStatus(s)) {
|
|
586
|
+
this._exitCode = this._parseExitCode(s);
|
|
587
|
+
this._state = { status: 'idle' };
|
|
588
|
+
yield frame;
|
|
589
|
+
return;
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
catch (err) {
|
|
593
|
+
this.log.debug(`ShellSession: malformed STATUS frame in pending queue (shellId=${this.shellId}): ${String(err)}`);
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
yield frame;
|
|
597
|
+
}
|
|
598
|
+
if (this._state.status !== 'open')
|
|
599
|
+
return;
|
|
600
|
+
let raw;
|
|
601
|
+
try {
|
|
602
|
+
raw = await this._recvRaw(state.messageIterator);
|
|
603
|
+
}
|
|
604
|
+
catch (err) {
|
|
605
|
+
if (this._isClosed())
|
|
606
|
+
return;
|
|
607
|
+
// A socket 'error' aborts this read before the authoritative 'close' arrives, so
|
|
608
|
+
// the abort error carries no closeCode. Reconnecting on that null code would
|
|
609
|
+
// pre-empt the real code (e.g. 4000 kicked / 1003 text-frames): _ensureReconnect
|
|
610
|
+
// would flip state to 'reconnecting', and the close handler's terminal decision
|
|
611
|
+
// (which guards on status 'open') would then be silently dropped. Wait for the
|
|
612
|
+
// 'close' first — `ws` always emits it after 'error' on a connected socket, and
|
|
613
|
+
// its handler sets _closeError and owns the reconnect decision (spec §7).
|
|
614
|
+
if (this._extractCloseCode(err) === null && this._closeError === null) {
|
|
615
|
+
await this._waitForClose(state.ws);
|
|
616
|
+
if (this._isClosed())
|
|
617
|
+
return;
|
|
618
|
+
}
|
|
619
|
+
// Start-or-join reconnect with the authoritative close code. The close/error
|
|
620
|
+
// handler for this socket may have already kicked off the attempt
|
|
621
|
+
// (iterator-independent path); either way we await the single shared attempt here
|
|
622
|
+
// rather than duplicating close-code logic.
|
|
623
|
+
const closeCode = this._extractCloseCode(this._closeError ?? err);
|
|
624
|
+
const didReconnect = await this._ensureReconnect(closeCode);
|
|
625
|
+
if (this._isClosed())
|
|
626
|
+
return;
|
|
627
|
+
if (!didReconnect)
|
|
628
|
+
return; // terminal close, no config, or budget exhausted
|
|
629
|
+
continue;
|
|
630
|
+
}
|
|
631
|
+
const frame = this.framer.decode(raw);
|
|
632
|
+
if (frame.channel === ShellChannel.CLOSE) {
|
|
633
|
+
this._state = { status: 'idle' };
|
|
634
|
+
this.log.debug(`ShellSession: CLOSE frame received (shellId=${this.shellId})`);
|
|
635
|
+
return;
|
|
636
|
+
}
|
|
637
|
+
// HEARTBEAT — server echo of client keepalive, not application data.
|
|
638
|
+
if (frame.channel === ShellChannel.HEARTBEAT)
|
|
639
|
+
continue;
|
|
640
|
+
if (frame.channel === ShellChannel.STATUS) {
|
|
641
|
+
try {
|
|
642
|
+
const s = frame.json();
|
|
643
|
+
if (this._isConfirmationStatus(s)) {
|
|
644
|
+
// A confirmation frame in the data stream. Per spec §6 the reconnection
|
|
645
|
+
// confirmation (carrying bytesDropped) is sent once *before* replay and is
|
|
646
|
+
// consumed by _readMetadataFrame, not here — so this path normally does not
|
|
647
|
+
// fire on reconnect. Record bytesDropped defensively in case a confirmation
|
|
648
|
+
// reaches the stream, then swallow it (not application output).
|
|
649
|
+
this._recordBytesDropped(s['metadata']);
|
|
650
|
+
continue;
|
|
651
|
+
}
|
|
652
|
+
if (this._isTerminationStatus(s)) {
|
|
653
|
+
this._exitCode = this._parseExitCode(s);
|
|
654
|
+
this._state = { status: 'idle' };
|
|
655
|
+
yield frame;
|
|
656
|
+
return;
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
catch (err) {
|
|
660
|
+
this.log.debug(`ShellSession: malformed STATUS frame (shellId=${this.shellId}): ${String(err)}`);
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
yield frame;
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
finally {
|
|
667
|
+
// Safety net: stop keepalive if close() fires while the generator is suspended
|
|
668
|
+
// at a yield point — close() transitions to 'closed' but state.keepaliveTimer
|
|
669
|
+
// was captured before that transition, so it still holds the live timer handle.
|
|
670
|
+
this._stopKeepalive(state?.keepaliveTimer ?? null);
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
/** Returns true when close() has been called. Used after await points to guard
|
|
674
|
+
* against close() firing while the method was suspended. A method call prevents
|
|
675
|
+
* TypeScript from narrowing away 'closed' comparisons after state assignments. */
|
|
676
|
+
_isClosed() {
|
|
677
|
+
return this._state.status === 'closed';
|
|
678
|
+
}
|
|
679
|
+
_extractCloseCode(err) {
|
|
680
|
+
if (err !== null && typeof err === 'object' && 'closeCode' in err) {
|
|
681
|
+
return err.closeCode;
|
|
682
|
+
}
|
|
683
|
+
return null;
|
|
684
|
+
}
|
|
685
|
+
/**
|
|
686
|
+
* Wait for the socket's authoritative 'close' to land after an 'error' woke the read
|
|
687
|
+
* loop early. The 'close' handler sets `_closeError` (carrying the real close code) and
|
|
688
|
+
* makes the reconnect decision, so this resolves as soon as `_closeError` is populated.
|
|
689
|
+
* Bounded by a short timeout in case 'close' never follows (it always does on a
|
|
690
|
+
* connected `ws`, but we must not hang the iterator on a misbehaving socket).
|
|
691
|
+
*/
|
|
692
|
+
async _waitForClose(ws) {
|
|
693
|
+
if (this._closeError !== null || this._isClosed())
|
|
694
|
+
return;
|
|
695
|
+
const ac = new AbortController();
|
|
696
|
+
const timer = globalThis.setTimeout(() => ac.abort(), DEFAULT_METADATA_TIMEOUT);
|
|
697
|
+
try {
|
|
698
|
+
await once(ws, 'close', { signal: ac.signal });
|
|
699
|
+
}
|
|
700
|
+
catch {
|
|
701
|
+
// Timed out or aborted — fall through; caller reconnects with whatever code is set.
|
|
702
|
+
}
|
|
703
|
+
finally {
|
|
704
|
+
globalThis.clearTimeout(timer);
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
/**
|
|
708
|
+
* Decide whether a close code is auto-reconnectable, then start-or-join a reconnect.
|
|
709
|
+
*
|
|
710
|
+
* This is the single entry point for triggering reconnection. It is called from
|
|
711
|
+
* wherever a disconnect is observed — the iterator's read error, the `ws.on('close')`
|
|
712
|
+
* handler (dead-detection / silent drop while no one is iterating), or the keepalive
|
|
713
|
+
* pong-timeout — so restoring the *connection* no longer depends on an active `for await`
|
|
714
|
+
* loop. (Receiving the server's replayed output after reattach still requires iterating;
|
|
715
|
+
* see the class docstring.)
|
|
716
|
+
*
|
|
717
|
+
* Returns a promise that resolves to the reconnect outcome (true = recovered).
|
|
718
|
+
* Returns immediately with `false` for terminal close codes (4000 kicked, 1003 text,
|
|
719
|
+
* 1000 normal) or when no `reconnectConfig` is set. Concurrent callers share one
|
|
720
|
+
* in-flight attempt via `_reconnectPromise`.
|
|
721
|
+
*/
|
|
722
|
+
_ensureReconnect(closeCode) {
|
|
723
|
+
// Join the in-flight attempt while it is still running — that spans both 'reconnecting'
|
|
724
|
+
// (backoff sleep) and 'connecting' (a retry's _connectWithUpgrade in progress). A second
|
|
725
|
+
// observer of the SAME disconnect (e.g. the iterator resuming after _waitForClose, while
|
|
726
|
+
// the close handler already armed the attempt and it has advanced to 'connecting') must
|
|
727
|
+
// join, not start a duplicate attempt. We deliberately do NOT join once status is 'open':
|
|
728
|
+
// a disconnect that arrives after a successful reattach (the window where the new socket
|
|
729
|
+
// is promoted to 'open' but _reconnectPromise has not yet been cleared in .finally) must
|
|
730
|
+
// be processed as a NEW event — otherwise a terminal 4000/1003 on the new socket is
|
|
731
|
+
// swallowed (kicked never set) and a fresh drop arms no new reconnect.
|
|
732
|
+
if (this._reconnectPromise && (this._state.status === 'reconnecting' || this._state.status === 'connecting')) {
|
|
733
|
+
return this._reconnectPromise;
|
|
734
|
+
}
|
|
735
|
+
if (this._isClosed())
|
|
736
|
+
return Promise.resolve(false);
|
|
737
|
+
// Terminal close codes (never auto-reconnect) and the no-config case all stop the
|
|
738
|
+
// session. Classify once, emit any code-specific warning, then do a SINGLE shared
|
|
739
|
+
// idle transition rather than repeating it per branch.
|
|
740
|
+
// 4000 kicked · 1003 text-frames-unsupported · 1000 normal close · no reconnectConfig
|
|
741
|
+
const terminal = closeCode === 4000 || closeCode === 1003 || closeCode === 1000 || !this.reconnectConfig;
|
|
742
|
+
if (terminal) {
|
|
743
|
+
if (closeCode === 4000) {
|
|
744
|
+
this._kicked = true;
|
|
745
|
+
this.log.warn(`ShellSession: kicked by new connection (close 4000, shellId=${this.shellId})`);
|
|
746
|
+
}
|
|
747
|
+
else if (closeCode === 1003) {
|
|
748
|
+
this.log.warn(`ShellSession: Server closed with 1003 (text frames not supported). ` +
|
|
749
|
+
`Open a new ShellSession — do not reconnect.`);
|
|
750
|
+
}
|
|
751
|
+
else if (closeCode === 1001 && !this.reconnectConfig) {
|
|
752
|
+
this.log.warn(`ShellSession: Server sent 1001 Going Away but no reconnectConfig — ` +
|
|
753
|
+
`stopping. Reconnect with shellId=${this.shellId}`);
|
|
754
|
+
}
|
|
755
|
+
if (this._state.status === 'open')
|
|
756
|
+
this._state = { status: 'idle' };
|
|
757
|
+
return Promise.resolve(false);
|
|
758
|
+
}
|
|
759
|
+
// Reconnectable disconnect — stop keepalive on the dead connection and enter the loop.
|
|
760
|
+
if (closeCode === 1001) {
|
|
761
|
+
this.log.warn(`ShellSession: Server sent 1001 Going Away — reconnecting (shellId=${this.shellId})`);
|
|
762
|
+
}
|
|
763
|
+
if (this._state.status === 'open')
|
|
764
|
+
this._stopKeepalive(this._state.keepaliveTimer);
|
|
765
|
+
this._state = { status: 'reconnecting' };
|
|
766
|
+
const attempt = this._reconnectWithBackoff(Date.now())
|
|
767
|
+
.then((didReconnect) => {
|
|
768
|
+
if (!didReconnect && !this._isClosed()) {
|
|
769
|
+
this.log.warn(`ShellSession: reconnect exhausted (shellId=${this.shellId})`);
|
|
770
|
+
this._state = { status: 'idle' };
|
|
771
|
+
}
|
|
772
|
+
return didReconnect;
|
|
773
|
+
})
|
|
774
|
+
.finally(() => {
|
|
775
|
+
// Only clear if we are still the current attempt — a newer _ensureReconnect may
|
|
776
|
+
// have replaced _reconnectPromise (e.g. a drop right after this one recovered).
|
|
777
|
+
if (this._reconnectPromise === attempt)
|
|
778
|
+
this._reconnectPromise = null;
|
|
779
|
+
});
|
|
780
|
+
this._reconnectPromise = attempt;
|
|
781
|
+
return this._reconnectPromise;
|
|
782
|
+
}
|
|
783
|
+
// ── Two-loop exponential backoff reconnect (mirrors Python SDK) ────────────
|
|
784
|
+
async _reconnectWithBackoff(startTime) {
|
|
785
|
+
const cfg = this.reconnectConfig;
|
|
786
|
+
// reconnectWindow: null means unlimited; undefined falls back to default.
|
|
787
|
+
const window = cfg.reconnectWindow !== undefined ? cfg.reconnectWindow : DEFAULT_RECONNECT_WINDOW;
|
|
788
|
+
while (true) {
|
|
789
|
+
if (window !== null) {
|
|
790
|
+
const elapsed = Date.now() - startTime;
|
|
791
|
+
if (elapsed >= window) {
|
|
792
|
+
this.log.warn(`ShellSession: Reconnection window of ${window}ms expired after ${elapsed}ms ` + `(shellId=${this.shellId})`);
|
|
793
|
+
return false;
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
const success = await this._runInnerRetryLoop(cfg, startTime, window);
|
|
797
|
+
if (success)
|
|
798
|
+
return true;
|
|
799
|
+
let outerDelay = cfg.outerLoopDelay ?? DEFAULT_OUTER_LOOP_DELAY;
|
|
800
|
+
// Cap outer sleep to remaining window so we don't overshoot a short window
|
|
801
|
+
// by up to outerDelay ms (e.g. a 5s window with 30s outerDelay would overshoot
|
|
802
|
+
// by 25s waiting for a retry that the window check will immediately reject).
|
|
803
|
+
if (window !== null) {
|
|
804
|
+
const remaining = window - (Date.now() - startTime);
|
|
805
|
+
outerDelay = Math.min(outerDelay, Math.max(0, remaining));
|
|
806
|
+
}
|
|
807
|
+
this.log.info(`ShellSession: Inner loop exhausted, waiting ${outerDelay}ms before next outer retry ` +
|
|
808
|
+
`(shellId=${this.shellId})`);
|
|
809
|
+
await _sleep(outerDelay, this._sessionController.signal);
|
|
810
|
+
if (this._isClosed())
|
|
811
|
+
return false;
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
async _runInnerRetryLoop(cfg, startTime, window) {
|
|
815
|
+
const maxRetries = cfg.maxRetries ?? DEFAULT_MAX_RETRIES;
|
|
816
|
+
const baseDelay = cfg.baseDelay ?? DEFAULT_BASE_DELAY;
|
|
817
|
+
const maxDelay = cfg.maxDelay ?? DEFAULT_MAX_DELAY;
|
|
818
|
+
let attempt = 0;
|
|
819
|
+
while (attempt < maxRetries) {
|
|
820
|
+
attempt++;
|
|
821
|
+
if (window !== null && Date.now() - startTime >= window)
|
|
822
|
+
return false;
|
|
823
|
+
// Attempt first — no pre-attempt sleep. Sleep only after failure so that
|
|
824
|
+
// transient disconnects reconnect immediately on the first try.
|
|
825
|
+
this.log.info(`ShellSession: Reconnect attempt ${attempt}/${maxRetries} (shellId=${this.shellId})`);
|
|
826
|
+
try {
|
|
827
|
+
await this._connectWithUpgrade();
|
|
828
|
+
// close() may have fired while connectFn was awaiting — _connectWithUpgrade returns void
|
|
829
|
+
// rather than throwing in that case, so guard here before claiming a successful reconnect.
|
|
830
|
+
if (this._isClosed())
|
|
831
|
+
return false;
|
|
832
|
+
this.log.info(`ShellSession: Reconnected (reconnected=${this.reconnected}, shellId=${this.shellId})`);
|
|
833
|
+
if (cfg.onReconnect) {
|
|
834
|
+
try {
|
|
835
|
+
await cfg.onReconnect(this.reconnected);
|
|
836
|
+
}
|
|
837
|
+
catch (err) {
|
|
838
|
+
this.log.warn(`ShellSession: onReconnect callback threw (shellId=${this.shellId}): ${String(err)}`);
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
return true;
|
|
842
|
+
}
|
|
843
|
+
catch (err) {
|
|
844
|
+
this.log.warn(`ShellSession: Reconnect attempt ${attempt} failed: ${String(err)} (shellId=${this.shellId})`);
|
|
845
|
+
}
|
|
846
|
+
if (this._isClosed())
|
|
847
|
+
return false;
|
|
848
|
+
// Re-check window after a slow _connectWithUpgrade so the backoff sleep
|
|
849
|
+
// doesn't fire after the window has already expired mid-attempt.
|
|
850
|
+
if (window !== null && Date.now() - startTime >= window)
|
|
851
|
+
return false;
|
|
852
|
+
// Exponential backoff with ±25% jitter to avoid thundering herd on
|
|
853
|
+
// simultaneous reconnects from multiple clients.
|
|
854
|
+
// Use _sessionController.signal (only aborted by close()) — not
|
|
855
|
+
// _abortController, which is already aborted by the WS close event.
|
|
856
|
+
const base = Math.min(baseDelay * Math.pow(2, attempt - 1), maxDelay);
|
|
857
|
+
const jitter = base * 0.25 * (Math.random() * 2 - 1);
|
|
858
|
+
const delay = base + jitter;
|
|
859
|
+
this.log.info(`ShellSession: Waiting ${Math.round(delay)}ms before next attempt (shellId=${this.shellId})`);
|
|
860
|
+
await _sleep(delay, this._sessionController.signal);
|
|
861
|
+
if (this._isClosed())
|
|
862
|
+
return false;
|
|
863
|
+
}
|
|
864
|
+
return false;
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
function _sleep(ms, signal) {
|
|
868
|
+
return new Promise((resolve) => {
|
|
869
|
+
if (signal?.aborted) {
|
|
870
|
+
resolve();
|
|
871
|
+
return;
|
|
872
|
+
}
|
|
873
|
+
const timer = globalThis.setTimeout(resolve, ms);
|
|
874
|
+
signal?.addEventListener('abort', () => {
|
|
875
|
+
globalThis.clearTimeout(timer);
|
|
876
|
+
resolve();
|
|
877
|
+
}, { once: true });
|
|
878
|
+
});
|
|
879
|
+
}
|
|
880
|
+
//# sourceMappingURL=session.js.map
|