@rivetkit/engine-runner 2.0.24-rc.1 → 2.0.25-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +10 -10
- package/dist/mod.cjs +1460 -812
- package/dist/mod.cjs.map +1 -1
- package/dist/mod.d.cts +263 -17
- package/dist/mod.d.ts +263 -17
- package/dist/mod.js +1454 -806
- package/dist/mod.js.map +1 -1
- package/package.json +2 -2
- package/src/actor.ts +196 -0
- package/src/mod.ts +409 -177
- package/src/stringify.ts +182 -12
- package/src/tunnel.ts +822 -428
- package/src/utils.ts +93 -0
- package/src/websocket-tunnel-adapter.ts +340 -357
- package/tests/utils.test.ts +194 -0
package/src/mod.ts
CHANGED
|
@@ -1,39 +1,28 @@
|
|
|
1
1
|
import * as protocol from "@rivetkit/engine-runner-protocol";
|
|
2
2
|
import type { Logger } from "pino";
|
|
3
3
|
import type WebSocket from "ws";
|
|
4
|
+
import { type ActorConfig, RunnerActor } from "./actor";
|
|
4
5
|
import { logger, setLogger } from "./log.js";
|
|
5
|
-
import {
|
|
6
|
-
import { Tunnel } from "./tunnel";
|
|
6
|
+
import { stringifyToClient, stringifyToServer } from "./stringify";
|
|
7
|
+
import { type HibernatingWebSocketMetadata, Tunnel } from "./tunnel";
|
|
7
8
|
import {
|
|
8
9
|
calculateBackoff,
|
|
9
10
|
parseWebSocketCloseReason,
|
|
10
11
|
unreachable,
|
|
11
12
|
} from "./utils";
|
|
12
13
|
import { importWebSocket } from "./websocket.js";
|
|
13
|
-
|
|
14
|
+
|
|
15
|
+
export type { HibernatingWebSocketMetadata };
|
|
16
|
+
export { RunnerActor, type ActorConfig };
|
|
17
|
+
export { idToStr } from "./utils";
|
|
14
18
|
|
|
15
19
|
const KV_EXPIRE: number = 30_000;
|
|
16
|
-
const PROTOCOL_VERSION: number =
|
|
20
|
+
const PROTOCOL_VERSION: number = 3;
|
|
17
21
|
const RUNNER_PING_INTERVAL = 3_000;
|
|
18
22
|
|
|
19
23
|
/** Warn once the backlog significantly exceeds the server's ack batch size. */
|
|
20
24
|
const EVENT_BACKLOG_WARN_THRESHOLD = 10_000;
|
|
21
|
-
const SIGNAL_HANDLERS: (() => void)[] = [];
|
|
22
|
-
|
|
23
|
-
export interface ActorInstance {
|
|
24
|
-
actorId: string;
|
|
25
|
-
generation: number;
|
|
26
|
-
config: ActorConfig;
|
|
27
|
-
requests: Set<string>; // Track active request IDs
|
|
28
|
-
webSockets: Set<string>; // Track active WebSocket IDs
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
export interface ActorConfig {
|
|
32
|
-
name: string;
|
|
33
|
-
key: string | null;
|
|
34
|
-
createTs: bigint;
|
|
35
|
-
input: Uint8Array | null;
|
|
36
|
-
}
|
|
25
|
+
const SIGNAL_HANDLERS: (() => void | Promise<void>)[] = [];
|
|
37
26
|
|
|
38
27
|
export interface RunnerConfig {
|
|
39
28
|
logger?: Logger;
|
|
@@ -51,38 +40,137 @@ export interface RunnerConfig {
|
|
|
51
40
|
onConnected: () => void;
|
|
52
41
|
onDisconnected: (code: number, reason: string) => void;
|
|
53
42
|
onShutdown: () => void;
|
|
43
|
+
|
|
44
|
+
/** Called when receiving a network request. */
|
|
54
45
|
fetch: (
|
|
55
46
|
runner: Runner,
|
|
56
47
|
actorId: string,
|
|
48
|
+
gatewayId: protocol.GatewayId,
|
|
57
49
|
requestId: protocol.RequestId,
|
|
58
50
|
request: Request,
|
|
59
51
|
) => Promise<Response>;
|
|
60
|
-
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Called when receiving a WebSocket connection.
|
|
55
|
+
*
|
|
56
|
+
* All event listeners must be added synchronously inside this function or
|
|
57
|
+
* else events may be missed. The open event will fire immediately after
|
|
58
|
+
* this function finishes.
|
|
59
|
+
*
|
|
60
|
+
* Any errors thrown here will disconnect the WebSocket immediately.
|
|
61
|
+
*
|
|
62
|
+
* While `path` and `headers` are partially redundant to the data in the
|
|
63
|
+
* `Request`, they may vary slightly from the actual content of `Request`.
|
|
64
|
+
* Prefer to persist the `path` and `headers` properties instead of the
|
|
65
|
+
* `Request` itself.
|
|
66
|
+
*
|
|
67
|
+
* ## Hibernating Web Sockets
|
|
68
|
+
*
|
|
69
|
+
* ### Implementation Requirements
|
|
70
|
+
*
|
|
71
|
+
* **Requirement 1: Persist HWS Immediately**
|
|
72
|
+
*
|
|
73
|
+
* This is responsible for persisting hibernatable WebSockets immediately
|
|
74
|
+
* (do not wait for open event). It is not time sensitive to flush the
|
|
75
|
+
* connection state. If this fails to persist the HWS, the client's
|
|
76
|
+
* WebSocket will be disconnected on next wake in the call to
|
|
77
|
+
* `Tunnel::restoreHibernatingRequests` since the connection entry will not
|
|
78
|
+
* exist.
|
|
79
|
+
*
|
|
80
|
+
* **Requirement 2: Persist Message Index On `message`**
|
|
81
|
+
*
|
|
82
|
+
* In the `message` event listener, this handler must persist the message
|
|
83
|
+
* index from the event. The request ID is available at
|
|
84
|
+
* `event.rivetRequestId` and message index at `event.rivetMessageIndex`.
|
|
85
|
+
*
|
|
86
|
+
* The message index should not be flushed immediately. Instead, this
|
|
87
|
+
* should:
|
|
88
|
+
*
|
|
89
|
+
* - Debounce calls to persist the message index
|
|
90
|
+
* - After each persist, call
|
|
91
|
+
* `Runner::sendHibernatableWebSocketMessageAck` to acknowledge the
|
|
92
|
+
* message
|
|
93
|
+
*
|
|
94
|
+
* This mechanism allows us to buffer messages on the gateway so we can
|
|
95
|
+
* batch-persist events on our end on a given interval.
|
|
96
|
+
*
|
|
97
|
+
* If this fails to persist, then the gateway will replay unacked
|
|
98
|
+
* messages when the actor starts again.
|
|
99
|
+
*
|
|
100
|
+
* **Requirement 3: Remove HWS From Storage On `close`**
|
|
101
|
+
*
|
|
102
|
+
* This handler should add an event listener for `close` to remove the
|
|
103
|
+
* connection from storage.
|
|
104
|
+
*
|
|
105
|
+
* If the connection remove fails to persist, the close event will be
|
|
106
|
+
* called again on the next actor start in
|
|
107
|
+
* `Tunnel::restoreHibernatingRequests` since there will be no request for
|
|
108
|
+
* the given connection.
|
|
109
|
+
*
|
|
110
|
+
* ### Restoring Connections
|
|
111
|
+
*
|
|
112
|
+
* The user of this library is responsible for:
|
|
113
|
+
* 1. Loading all persisted hibernatable WebSocket metadata for an actor
|
|
114
|
+
* 2. Calling `Runner::restoreHibernatingRequests` with this metadata at
|
|
115
|
+
* the end of `onActorStart`
|
|
116
|
+
*
|
|
117
|
+
* `restoreHibernatingRequests` will restore all connections and attach
|
|
118
|
+
* the appropriate event listeners.
|
|
119
|
+
*
|
|
120
|
+
* ### No Open Event On Restoration
|
|
121
|
+
*
|
|
122
|
+
* When restoring a HWS, the open event will not be called again. It will
|
|
123
|
+
* go straight to the message or close event.
|
|
124
|
+
*/
|
|
125
|
+
websocket: (
|
|
61
126
|
runner: Runner,
|
|
62
127
|
actorId: string,
|
|
63
128
|
ws: any,
|
|
129
|
+
gatewayId: protocol.GatewayId,
|
|
64
130
|
requestId: protocol.RequestId,
|
|
65
131
|
request: Request,
|
|
132
|
+
path: string,
|
|
133
|
+
headers: Record<string, string>,
|
|
134
|
+
isHibernatable: boolean,
|
|
135
|
+
isRestoringHibernatable: boolean,
|
|
66
136
|
) => Promise<void>;
|
|
137
|
+
|
|
138
|
+
hibernatableWebSocket: {
|
|
139
|
+
/**
|
|
140
|
+
* Determines if a WebSocket can continue to live while an actor goes to
|
|
141
|
+
* sleep.
|
|
142
|
+
*/
|
|
143
|
+
canHibernate: (
|
|
144
|
+
actorId: string,
|
|
145
|
+
gatewayId: ArrayBuffer,
|
|
146
|
+
requestId: ArrayBuffer,
|
|
147
|
+
request: Request,
|
|
148
|
+
) => boolean;
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Called when an actor starts.
|
|
153
|
+
*
|
|
154
|
+
* This callback is responsible for:
|
|
155
|
+
* 1. Initializing the actor instance
|
|
156
|
+
* 2. Loading all persisted hibernatable WebSocket metadata for this actor
|
|
157
|
+
* 3. Calling `Runner::restoreHibernatingRequests` with the loaded metadata
|
|
158
|
+
* to restore hibernatable WebSocket connections
|
|
159
|
+
*
|
|
160
|
+
* The actor should not be marked as "ready" until after
|
|
161
|
+
* `restoreHibernatingRequests` completes to ensure all hibernatable
|
|
162
|
+
* connections are fully restored before the actor processes new requests.
|
|
163
|
+
*/
|
|
67
164
|
onActorStart: (
|
|
68
165
|
actorId: string,
|
|
69
166
|
generation: number,
|
|
70
167
|
config: ActorConfig,
|
|
71
168
|
) => Promise<void>;
|
|
169
|
+
|
|
72
170
|
onActorStop: (actorId: string, generation: number) => Promise<void>;
|
|
73
|
-
getActorHibernationConfig: (
|
|
74
|
-
actorId: string,
|
|
75
|
-
requestId: ArrayBuffer,
|
|
76
|
-
request: Request,
|
|
77
|
-
) => HibernationConfig;
|
|
78
171
|
noAutoShutdown?: boolean;
|
|
79
172
|
}
|
|
80
173
|
|
|
81
|
-
export interface HibernationConfig {
|
|
82
|
-
enabled: boolean;
|
|
83
|
-
lastMsgIndex: number | undefined;
|
|
84
|
-
}
|
|
85
|
-
|
|
86
174
|
export interface KvListOptions {
|
|
87
175
|
reverse?: boolean;
|
|
88
176
|
limit?: number;
|
|
@@ -104,17 +192,17 @@ export class Runner {
|
|
|
104
192
|
return this.#config;
|
|
105
193
|
}
|
|
106
194
|
|
|
107
|
-
#actors: Map<string,
|
|
108
|
-
#actorWebSockets: Map<string, Set<WebSocketTunnelAdapter>> = new Map();
|
|
195
|
+
#actors: Map<string, RunnerActor> = new Map();
|
|
109
196
|
|
|
110
197
|
// WebSocket
|
|
111
|
-
|
|
198
|
+
__pegboardWebSocket?: WebSocket;
|
|
112
199
|
runnerId?: string;
|
|
113
200
|
#lastCommandIdx: number = -1;
|
|
114
201
|
#pingLoop?: NodeJS.Timeout;
|
|
115
202
|
#nextEventIdx: bigint = 0n;
|
|
116
203
|
#started: boolean = false;
|
|
117
204
|
#shutdown: boolean = false;
|
|
205
|
+
#shuttingDown: boolean = false;
|
|
118
206
|
#reconnectAttempt: number = 0;
|
|
119
207
|
#reconnectTimeout?: NodeJS.Timeout;
|
|
120
208
|
|
|
@@ -130,7 +218,7 @@ export class Runner {
|
|
|
130
218
|
#ackInterval?: NodeJS.Timeout;
|
|
131
219
|
|
|
132
220
|
// KV operations
|
|
133
|
-
#
|
|
221
|
+
#nextKvRequestId: number = 0;
|
|
134
222
|
#kvRequests: Map<number, KvRequestEntry> = new Map();
|
|
135
223
|
#kvCleanupInterval?: NodeJS.Timeout;
|
|
136
224
|
|
|
@@ -173,13 +261,6 @@ export class Runner {
|
|
|
173
261
|
|
|
174
262
|
// MARK: Manage actors
|
|
175
263
|
sleepActor(actorId: string, generation?: number) {
|
|
176
|
-
if (this.#shutdown) {
|
|
177
|
-
this.log?.warn({
|
|
178
|
-
msg: "runner is shut down, cannot sleep actor",
|
|
179
|
-
});
|
|
180
|
-
return;
|
|
181
|
-
}
|
|
182
|
-
|
|
183
264
|
const actor = this.getActor(actorId, generation);
|
|
184
265
|
if (!actor) return;
|
|
185
266
|
|
|
@@ -201,7 +282,7 @@ export class Runner {
|
|
|
201
282
|
}
|
|
202
283
|
|
|
203
284
|
async forceStopActor(actorId: string, generation?: number) {
|
|
204
|
-
const actor = this
|
|
285
|
+
const actor = this.getActor(actorId, generation);
|
|
205
286
|
if (!actor) return;
|
|
206
287
|
|
|
207
288
|
// If onActorStop times out, Pegboard will handle this timeout with ACTOR_STOP_THRESHOLD_DURATION_MS
|
|
@@ -218,6 +299,11 @@ export class Runner {
|
|
|
218
299
|
// Close requests after onActorStop so you can send messages over the tunnel
|
|
219
300
|
this.#tunnel?.closeActiveRequests(actor);
|
|
220
301
|
|
|
302
|
+
// Remove actor after stopping in order to ensure that we can still
|
|
303
|
+
// call actions on the runner. Do this before sending stopped update in
|
|
304
|
+
// order to ensure we don't have duplicate actors.
|
|
305
|
+
this.#removeActor(actorId, generation);
|
|
306
|
+
|
|
221
307
|
this.#sendActorStateUpdate(actorId, actor.generation, "stopped");
|
|
222
308
|
}
|
|
223
309
|
|
|
@@ -232,17 +318,17 @@ export class Runner {
|
|
|
232
318
|
}
|
|
233
319
|
}
|
|
234
320
|
|
|
235
|
-
getActor(actorId: string, generation?: number):
|
|
321
|
+
getActor(actorId: string, generation?: number): RunnerActor | undefined {
|
|
236
322
|
const actor = this.#actors.get(actorId);
|
|
237
323
|
if (!actor) {
|
|
238
|
-
this.log?.
|
|
324
|
+
this.log?.warn({
|
|
239
325
|
msg: "actor not found",
|
|
240
326
|
actorId,
|
|
241
327
|
});
|
|
242
328
|
return undefined;
|
|
243
329
|
}
|
|
244
330
|
if (generation !== undefined && actor.generation !== generation) {
|
|
245
|
-
this.log?.
|
|
331
|
+
this.log?.warn({
|
|
246
332
|
msg: "actor generation mismatch",
|
|
247
333
|
actorId,
|
|
248
334
|
generation,
|
|
@@ -253,6 +339,16 @@ export class Runner {
|
|
|
253
339
|
return actor;
|
|
254
340
|
}
|
|
255
341
|
|
|
342
|
+
async getAndWaitForActor(
|
|
343
|
+
actorId: string,
|
|
344
|
+
generation?: number,
|
|
345
|
+
): Promise<RunnerActor | undefined> {
|
|
346
|
+
const actor = this.getActor(actorId, generation);
|
|
347
|
+
if (!actor) return;
|
|
348
|
+
await actor.actorStartPromise.promise;
|
|
349
|
+
return actor;
|
|
350
|
+
}
|
|
351
|
+
|
|
256
352
|
hasActor(actorId: string, generation?: number): boolean {
|
|
257
353
|
const actor = this.#actors.get(actorId);
|
|
258
354
|
|
|
@@ -262,11 +358,15 @@ export class Runner {
|
|
|
262
358
|
);
|
|
263
359
|
}
|
|
264
360
|
|
|
361
|
+
get actors() {
|
|
362
|
+
return this.#actors;
|
|
363
|
+
}
|
|
364
|
+
|
|
265
365
|
// IMPORTANT: Make sure to call stopActiveRequests if calling #removeActor
|
|
266
366
|
#removeActor(
|
|
267
367
|
actorId: string,
|
|
268
368
|
generation?: number,
|
|
269
|
-
):
|
|
369
|
+
): RunnerActor | undefined {
|
|
270
370
|
const actor = this.#actors.get(actorId);
|
|
271
371
|
if (!actor) {
|
|
272
372
|
this.log?.error({
|
|
@@ -286,6 +386,12 @@ export class Runner {
|
|
|
286
386
|
|
|
287
387
|
this.#actors.delete(actorId);
|
|
288
388
|
|
|
389
|
+
this.log?.info({
|
|
390
|
+
msg: "removed actor",
|
|
391
|
+
actorId,
|
|
392
|
+
actors: this.#actors.size,
|
|
393
|
+
});
|
|
394
|
+
|
|
289
395
|
return actor;
|
|
290
396
|
}
|
|
291
397
|
|
|
@@ -308,23 +414,25 @@ export class Runner {
|
|
|
308
414
|
|
|
309
415
|
if (!this.#config.noAutoShutdown) {
|
|
310
416
|
if (!SIGNAL_HANDLERS.length) {
|
|
311
|
-
process.on("SIGTERM", () => {
|
|
417
|
+
process.on("SIGTERM", async () => {
|
|
312
418
|
this.log?.debug("received SIGTERM");
|
|
313
419
|
|
|
314
420
|
for (const handler of SIGNAL_HANDLERS) {
|
|
315
|
-
handler();
|
|
421
|
+
await handler();
|
|
316
422
|
}
|
|
317
423
|
|
|
318
|
-
|
|
424
|
+
// TODO: Add back
|
|
425
|
+
// process.exit(0);
|
|
319
426
|
});
|
|
320
|
-
process.on("SIGINT", () => {
|
|
427
|
+
process.on("SIGINT", async () => {
|
|
321
428
|
this.log?.debug("received SIGINT");
|
|
322
429
|
|
|
323
430
|
for (const handler of SIGNAL_HANDLERS) {
|
|
324
|
-
handler();
|
|
431
|
+
await handler();
|
|
325
432
|
}
|
|
326
433
|
|
|
327
|
-
|
|
434
|
+
// TODO: Add back
|
|
435
|
+
// process.exit(0);
|
|
328
436
|
});
|
|
329
437
|
|
|
330
438
|
this.log?.debug({
|
|
@@ -332,15 +440,24 @@ export class Runner {
|
|
|
332
440
|
});
|
|
333
441
|
}
|
|
334
442
|
|
|
335
|
-
SIGNAL_HANDLERS.push(() => {
|
|
443
|
+
SIGNAL_HANDLERS.push(async () => {
|
|
336
444
|
const weak = new WeakRef(this);
|
|
337
|
-
weak.deref()?.shutdown(false, false);
|
|
445
|
+
await weak.deref()?.shutdown(false, false);
|
|
338
446
|
});
|
|
339
447
|
}
|
|
340
448
|
}
|
|
341
449
|
|
|
342
450
|
// MARK: Shutdown
|
|
343
451
|
async shutdown(immediate: boolean, exit: boolean = false) {
|
|
452
|
+
// Prevent concurrent shutdowns
|
|
453
|
+
if (this.#shuttingDown) {
|
|
454
|
+
this.log?.debug({
|
|
455
|
+
msg: "shutdown already in progress, ignoring",
|
|
456
|
+
});
|
|
457
|
+
return;
|
|
458
|
+
}
|
|
459
|
+
this.#shuttingDown = true;
|
|
460
|
+
|
|
344
461
|
this.log?.info({
|
|
345
462
|
msg: "starting shutdown",
|
|
346
463
|
immediate,
|
|
@@ -387,11 +504,8 @@ export class Runner {
|
|
|
387
504
|
this.#kvRequests.clear();
|
|
388
505
|
|
|
389
506
|
// Close WebSocket
|
|
390
|
-
if (
|
|
391
|
-
|
|
392
|
-
this.#pegboardWebSocket.readyState === 1
|
|
393
|
-
) {
|
|
394
|
-
const pegboardWebSocket = this.#pegboardWebSocket;
|
|
507
|
+
if (this.__webSocketReady()) {
|
|
508
|
+
const pegboardWebSocket = this.__pegboardWebSocket;
|
|
395
509
|
if (immediate) {
|
|
396
510
|
// Stop immediately
|
|
397
511
|
pegboardWebSocket.close(1000, "pegboard.runner_shutdown");
|
|
@@ -403,22 +517,14 @@ export class Runner {
|
|
|
403
517
|
readyState: pegboardWebSocket.readyState,
|
|
404
518
|
});
|
|
405
519
|
|
|
406
|
-
//
|
|
407
|
-
//
|
|
408
|
-
|
|
520
|
+
// Start stopping
|
|
521
|
+
//
|
|
522
|
+
// The runner workflow will send StopActor commands for all
|
|
523
|
+
// actors
|
|
524
|
+
this.__sendToServer({
|
|
409
525
|
tag: "ToServerStopping",
|
|
410
526
|
val: null,
|
|
411
527
|
});
|
|
412
|
-
if (
|
|
413
|
-
this.#pegboardWebSocket &&
|
|
414
|
-
this.#pegboardWebSocket.readyState === 1
|
|
415
|
-
) {
|
|
416
|
-
this.#pegboardWebSocket.send(encoded);
|
|
417
|
-
} else {
|
|
418
|
-
this.log?.error(
|
|
419
|
-
"WebSocket not available or not open for sending data",
|
|
420
|
-
);
|
|
421
|
-
}
|
|
422
528
|
|
|
423
529
|
const closePromise = new Promise<void>((resolve) => {
|
|
424
530
|
if (!pegboardWebSocket)
|
|
@@ -434,7 +540,8 @@ export class Runner {
|
|
|
434
540
|
});
|
|
435
541
|
});
|
|
436
542
|
|
|
437
|
-
//
|
|
543
|
+
// Wait for all actors to stop before closing ws
|
|
544
|
+
await this.#waitForActorsToStop(pegboardWebSocket);
|
|
438
545
|
|
|
439
546
|
this.log?.info({
|
|
440
547
|
msg: "closing WebSocket",
|
|
@@ -459,7 +566,7 @@ export class Runner {
|
|
|
459
566
|
// the runner has already shut down
|
|
460
567
|
this.log?.debug({
|
|
461
568
|
msg: "no runner WebSocket to shutdown or already closed",
|
|
462
|
-
readyState: this
|
|
569
|
+
readyState: this.__pegboardWebSocket?.readyState,
|
|
463
570
|
});
|
|
464
571
|
}
|
|
465
572
|
|
|
@@ -469,9 +576,96 @@ export class Runner {
|
|
|
469
576
|
this.#tunnel = undefined;
|
|
470
577
|
}
|
|
471
578
|
|
|
579
|
+
this.#config.onShutdown();
|
|
580
|
+
|
|
472
581
|
if (exit) process.exit(0);
|
|
582
|
+
}
|
|
473
583
|
|
|
474
|
-
|
|
584
|
+
/**
|
|
585
|
+
* Wait for all actors to stop before proceeding with shutdown.
|
|
586
|
+
*
|
|
587
|
+
* This method polls every 100ms to check if all actors have been stopped.
|
|
588
|
+
*
|
|
589
|
+
* It will resolve early if:
|
|
590
|
+
* - All actors are stopped
|
|
591
|
+
* - The WebSocket connection is closed
|
|
592
|
+
* - The shutdown timeout is reached (120 seconds)
|
|
593
|
+
*/
|
|
594
|
+
async #waitForActorsToStop(ws: WebSocket): Promise<void> {
|
|
595
|
+
const shutdownTimeout = 120_000; // 120 seconds
|
|
596
|
+
const shutdownCheckInterval = 100; // Check every 100ms
|
|
597
|
+
const progressLogInterval = 5_000; // Log progress every 5 seconds
|
|
598
|
+
const shutdownStartTs = Date.now();
|
|
599
|
+
let lastProgressLogTs = 0; // Ensure first log happens immediately
|
|
600
|
+
|
|
601
|
+
return new Promise<void>((resolve) => {
|
|
602
|
+
const checkActors = () => {
|
|
603
|
+
const now = Date.now();
|
|
604
|
+
const elapsed = now - shutdownStartTs;
|
|
605
|
+
const wsIsClosed = ws.readyState === 2 || ws.readyState === 3;
|
|
606
|
+
|
|
607
|
+
if (this.#actors.size === 0) {
|
|
608
|
+
this.log?.info({
|
|
609
|
+
msg: "all actors stopped",
|
|
610
|
+
elapsed,
|
|
611
|
+
});
|
|
612
|
+
return true;
|
|
613
|
+
} else if (wsIsClosed) {
|
|
614
|
+
this.log?.warn({
|
|
615
|
+
msg: "websocket closed before all actors stopped",
|
|
616
|
+
remainingActors: this.#actors.size,
|
|
617
|
+
elapsed,
|
|
618
|
+
});
|
|
619
|
+
return true;
|
|
620
|
+
} else if (elapsed >= shutdownTimeout) {
|
|
621
|
+
this.log?.warn({
|
|
622
|
+
msg: "shutdown timeout reached, forcing close",
|
|
623
|
+
remainingActors: this.#actors.size,
|
|
624
|
+
elapsed,
|
|
625
|
+
});
|
|
626
|
+
return true;
|
|
627
|
+
} else {
|
|
628
|
+
// Log progress every 5 seconds
|
|
629
|
+
if (now - lastProgressLogTs >= progressLogInterval) {
|
|
630
|
+
this.log?.info({
|
|
631
|
+
msg: "waiting for actors to stop",
|
|
632
|
+
remainingActors: this.#actors.size,
|
|
633
|
+
elapsed,
|
|
634
|
+
});
|
|
635
|
+
lastProgressLogTs = now;
|
|
636
|
+
}
|
|
637
|
+
return false;
|
|
638
|
+
}
|
|
639
|
+
};
|
|
640
|
+
|
|
641
|
+
// Check immediately first
|
|
642
|
+
if (checkActors()) {
|
|
643
|
+
this.log?.debug({
|
|
644
|
+
msg: "actors check completed immediately",
|
|
645
|
+
});
|
|
646
|
+
resolve();
|
|
647
|
+
return;
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
this.log?.debug({
|
|
651
|
+
msg: "starting actor wait interval",
|
|
652
|
+
checkInterval: shutdownCheckInterval,
|
|
653
|
+
});
|
|
654
|
+
|
|
655
|
+
const interval = setInterval(() => {
|
|
656
|
+
this.log?.debug({
|
|
657
|
+
msg: "actor wait interval tick",
|
|
658
|
+
actorCount: this.#actors.size,
|
|
659
|
+
});
|
|
660
|
+
if (checkActors()) {
|
|
661
|
+
this.log?.debug({
|
|
662
|
+
msg: "actors check completed, clearing interval",
|
|
663
|
+
});
|
|
664
|
+
clearInterval(interval);
|
|
665
|
+
resolve();
|
|
666
|
+
}
|
|
667
|
+
}, shutdownCheckInterval);
|
|
668
|
+
});
|
|
475
669
|
}
|
|
476
670
|
|
|
477
671
|
// MARK: Networking
|
|
@@ -498,7 +692,7 @@ export class Runner {
|
|
|
498
692
|
|
|
499
693
|
const WS = await importWebSocket();
|
|
500
694
|
const ws = new WS(this.pegboardUrl, protocols) as any as WebSocket;
|
|
501
|
-
this
|
|
695
|
+
this.__pegboardWebSocket = ws;
|
|
502
696
|
|
|
503
697
|
this.log?.info({
|
|
504
698
|
msg: "connecting",
|
|
@@ -564,9 +758,6 @@ export class Runner {
|
|
|
564
758
|
val: init,
|
|
565
759
|
});
|
|
566
760
|
|
|
567
|
-
// Process unsent KV requests
|
|
568
|
-
this.#processUnsentKvRequests();
|
|
569
|
-
|
|
570
761
|
// Start ping interval
|
|
571
762
|
const pingLoop = setInterval(() => {
|
|
572
763
|
if (ws.readyState === 1) {
|
|
@@ -612,6 +803,10 @@ export class Runner {
|
|
|
612
803
|
|
|
613
804
|
// Parse message
|
|
614
805
|
const message = protocol.decodeToClient(buf);
|
|
806
|
+
this.log?.debug({
|
|
807
|
+
msg: "received runner message",
|
|
808
|
+
data: stringifyToClient(message),
|
|
809
|
+
});
|
|
615
810
|
|
|
616
811
|
// Handle message
|
|
617
812
|
if (message.tag === "ToClientInit") {
|
|
@@ -635,8 +830,10 @@ export class Runner {
|
|
|
635
830
|
runnerLostThreshold: this.#runnerLostThreshold,
|
|
636
831
|
});
|
|
637
832
|
|
|
638
|
-
// Resend events
|
|
833
|
+
// Resend pending events
|
|
834
|
+
this.#processUnsentKvRequests();
|
|
639
835
|
this.#resendUnacknowledgedEvents(init.lastEventIdx);
|
|
836
|
+
this.#tunnel?.resendBufferedEvents();
|
|
640
837
|
|
|
641
838
|
this.#config.onConnected();
|
|
642
839
|
} else if (message.tag === "ToClientCommands") {
|
|
@@ -753,13 +950,11 @@ export class Runner {
|
|
|
753
950
|
});
|
|
754
951
|
|
|
755
952
|
for (const commandWrapper of commands) {
|
|
756
|
-
this.log?.info({
|
|
757
|
-
msg: "received command",
|
|
758
|
-
command: stringifyCommandWrapper(commandWrapper),
|
|
759
|
-
});
|
|
760
953
|
if (commandWrapper.inner.tag === "CommandStartActor") {
|
|
954
|
+
// Spawn background promise
|
|
761
955
|
this.#handleCommandStartActor(commandWrapper);
|
|
762
956
|
} else if (commandWrapper.inner.tag === "CommandStopActor") {
|
|
957
|
+
// Spawn background promise
|
|
763
958
|
this.#handleCommandStopActor(commandWrapper);
|
|
764
959
|
} else {
|
|
765
960
|
unreachable(commandWrapper.inner);
|
|
@@ -808,7 +1003,13 @@ export class Runner {
|
|
|
808
1003
|
}
|
|
809
1004
|
}
|
|
810
1005
|
|
|
811
|
-
#handleCommandStartActor(commandWrapper: protocol.CommandWrapper) {
|
|
1006
|
+
async #handleCommandStartActor(commandWrapper: protocol.CommandWrapper) {
|
|
1007
|
+
// IMPORTANT: Make sure no async code runs before inserting #actors and
|
|
1008
|
+
// calling addRequestToActor in order to prevent race conditions with
|
|
1009
|
+
// subsequence commands
|
|
1010
|
+
|
|
1011
|
+
if (!this.#tunnel) throw new Error("missing tunnel on actor start");
|
|
1012
|
+
|
|
812
1013
|
const startCommand = commandWrapper.inner
|
|
813
1014
|
.val as protocol.CommandStartActor;
|
|
814
1015
|
|
|
@@ -823,43 +1024,80 @@ export class Runner {
|
|
|
823
1024
|
input: config.input ? new Uint8Array(config.input) : null,
|
|
824
1025
|
};
|
|
825
1026
|
|
|
826
|
-
const instance
|
|
1027
|
+
const instance = new RunnerActor(
|
|
827
1028
|
actorId,
|
|
828
1029
|
generation,
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
1030
|
+
actorConfig,
|
|
1031
|
+
startCommand.hibernatingRequests,
|
|
1032
|
+
);
|
|
1033
|
+
|
|
1034
|
+
const existingActor = this.#actors.get(actorId);
|
|
1035
|
+
if (existingActor) {
|
|
1036
|
+
this.log?.warn({
|
|
1037
|
+
msg: "replacing existing actor in actors map",
|
|
1038
|
+
actorId,
|
|
1039
|
+
existingGeneration: existingActor.generation,
|
|
1040
|
+
newGeneration: generation,
|
|
1041
|
+
existingPendingRequests: existingActor.pendingRequests.length,
|
|
1042
|
+
});
|
|
1043
|
+
}
|
|
833
1044
|
|
|
834
1045
|
this.#actors.set(actorId, instance);
|
|
835
1046
|
|
|
1047
|
+
// NOTE: We have to populate the requestToActor map BEFORE running any
|
|
1048
|
+
// async code in order for incoming tunnel messages to wait for
|
|
1049
|
+
// instance.actorStartPromise before processing messages
|
|
1050
|
+
// TODO: Where is this GC'd if something fails?
|
|
1051
|
+
for (const hr of startCommand.hibernatingRequests) {
|
|
1052
|
+
this.#tunnel.addRequestToActor(hr.gatewayId, hr.requestId, actorId);
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
this.log?.info({
|
|
1056
|
+
msg: "created actor",
|
|
1057
|
+
actors: this.#actors.size,
|
|
1058
|
+
actorId,
|
|
1059
|
+
name: config.name,
|
|
1060
|
+
key: config.key,
|
|
1061
|
+
generation,
|
|
1062
|
+
hibernatingRequests: startCommand.hibernatingRequests.length,
|
|
1063
|
+
});
|
|
1064
|
+
|
|
836
1065
|
this.#sendActorStateUpdate(actorId, generation, "running");
|
|
837
1066
|
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
.
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
});
|
|
1067
|
+
try {
|
|
1068
|
+
// TODO: Add timeout to onActorStart
|
|
1069
|
+
// Call onActorStart asynchronously and handle errors
|
|
1070
|
+
this.log?.debug({
|
|
1071
|
+
msg: "calling onActorStart",
|
|
1072
|
+
actorId,
|
|
1073
|
+
generation,
|
|
1074
|
+
});
|
|
1075
|
+
await this.#config.onActorStart(actorId, generation, actorConfig);
|
|
848
1076
|
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
1077
|
+
instance.actorStartPromise.resolve();
|
|
1078
|
+
} catch (err) {
|
|
1079
|
+
this.log?.error({
|
|
1080
|
+
msg: "error starting runner actor",
|
|
1081
|
+
actorId,
|
|
1082
|
+
err,
|
|
852
1083
|
});
|
|
1084
|
+
|
|
1085
|
+
instance.actorStartPromise.reject(err);
|
|
1086
|
+
|
|
1087
|
+
// TODO: Mark as crashed
|
|
1088
|
+
// Send stopped state update if start failed
|
|
1089
|
+
await this.forceStopActor(actorId, generation);
|
|
1090
|
+
}
|
|
853
1091
|
}
|
|
854
1092
|
|
|
855
|
-
#handleCommandStopActor(commandWrapper: protocol.CommandWrapper) {
|
|
1093
|
+
async #handleCommandStopActor(commandWrapper: protocol.CommandWrapper) {
|
|
856
1094
|
const stopCommand = commandWrapper.inner
|
|
857
1095
|
.val as protocol.CommandStopActor;
|
|
858
1096
|
|
|
859
1097
|
const actorId = stopCommand.actorId;
|
|
860
1098
|
const generation = stopCommand.generation;
|
|
861
1099
|
|
|
862
|
-
this.forceStopActor(actorId, generation);
|
|
1100
|
+
await this.forceStopActor(actorId, generation);
|
|
863
1101
|
}
|
|
864
1102
|
|
|
865
1103
|
#sendActorIntent(
|
|
@@ -867,13 +1105,6 @@ export class Runner {
|
|
|
867
1105
|
generation: number,
|
|
868
1106
|
intentType: "sleep" | "stop",
|
|
869
1107
|
) {
|
|
870
|
-
if (this.#shutdown) {
|
|
871
|
-
console.trace("send actor intent", actorId, intentType);
|
|
872
|
-
this.log?.warn({
|
|
873
|
-
msg: "Runner is shut down, cannot send actor intent",
|
|
874
|
-
});
|
|
875
|
-
return;
|
|
876
|
-
}
|
|
877
1108
|
let actorIntent: protocol.ActorIntent;
|
|
878
1109
|
|
|
879
1110
|
if (intentType === "sleep") {
|
|
@@ -904,12 +1135,6 @@ export class Runner {
|
|
|
904
1135
|
|
|
905
1136
|
this.#recordEvent(eventWrapper);
|
|
906
1137
|
|
|
907
|
-
this.log?.info({
|
|
908
|
-
msg: "sending event to server",
|
|
909
|
-
event: stringifyEvent(eventWrapper.inner),
|
|
910
|
-
index: eventWrapper.index.toString(),
|
|
911
|
-
});
|
|
912
|
-
|
|
913
1138
|
this.__sendToServer({
|
|
914
1139
|
tag: "ToServerEvents",
|
|
915
1140
|
val: [eventWrapper],
|
|
@@ -921,12 +1146,6 @@ export class Runner {
|
|
|
921
1146
|
generation: number,
|
|
922
1147
|
stateType: "running" | "stopped",
|
|
923
1148
|
) {
|
|
924
|
-
if (this.#shutdown) {
|
|
925
|
-
this.log?.warn({
|
|
926
|
-
msg: "Runner is shut down, cannot send actor state update",
|
|
927
|
-
});
|
|
928
|
-
return;
|
|
929
|
-
}
|
|
930
1149
|
let actorState: protocol.ActorState;
|
|
931
1150
|
|
|
932
1151
|
if (stateType === "running") {
|
|
@@ -960,12 +1179,6 @@ export class Runner {
|
|
|
960
1179
|
|
|
961
1180
|
this.#recordEvent(eventWrapper);
|
|
962
1181
|
|
|
963
|
-
this.log?.info({
|
|
964
|
-
msg: "sending event to server",
|
|
965
|
-
event: stringifyEvent(eventWrapper.inner),
|
|
966
|
-
index: eventWrapper.index.toString(),
|
|
967
|
-
});
|
|
968
|
-
|
|
969
1182
|
this.__sendToServer({
|
|
970
1183
|
tag: "ToServerEvents",
|
|
971
1184
|
val: [eventWrapper],
|
|
@@ -973,13 +1186,6 @@ export class Runner {
|
|
|
973
1186
|
}
|
|
974
1187
|
|
|
975
1188
|
#sendCommandAcknowledgment() {
|
|
976
|
-
if (this.#shutdown) {
|
|
977
|
-
this.log?.warn({
|
|
978
|
-
msg: "Runner is shut down, cannot send command acknowledgment",
|
|
979
|
-
});
|
|
980
|
-
return;
|
|
981
|
-
}
|
|
982
|
-
|
|
983
1189
|
if (this.#lastCommandIdx < 0) {
|
|
984
1190
|
// No commands received yet, nothing to acknowledge
|
|
985
1191
|
return;
|
|
@@ -1288,11 +1494,6 @@ export class Runner {
|
|
|
1288
1494
|
const actor = this.getActor(actorId, generation);
|
|
1289
1495
|
if (!actor) return;
|
|
1290
1496
|
|
|
1291
|
-
if (this.#shutdown) {
|
|
1292
|
-
console.warn("Runner is shut down, cannot set alarm");
|
|
1293
|
-
return;
|
|
1294
|
-
}
|
|
1295
|
-
|
|
1296
1497
|
const alarmEvent: protocol.EventActorSetAlarm = {
|
|
1297
1498
|
actorId,
|
|
1298
1499
|
generation: actor.generation,
|
|
@@ -1325,15 +1526,7 @@ export class Runner {
|
|
|
1325
1526
|
requestData: protocol.KvRequestData,
|
|
1326
1527
|
): Promise<any> {
|
|
1327
1528
|
return new Promise((resolve, reject) => {
|
|
1328
|
-
|
|
1329
|
-
reject(new Error("Runner is shut down"));
|
|
1330
|
-
return;
|
|
1331
|
-
}
|
|
1332
|
-
|
|
1333
|
-
const requestId = this.#nextRequestId++;
|
|
1334
|
-
const isConnected =
|
|
1335
|
-
this.#pegboardWebSocket &&
|
|
1336
|
-
this.#pegboardWebSocket.readyState === 1;
|
|
1529
|
+
const requestId = this.#nextKvRequestId++;
|
|
1337
1530
|
|
|
1338
1531
|
// Store the request
|
|
1339
1532
|
const requestEntry = {
|
|
@@ -1347,7 +1540,7 @@ export class Runner {
|
|
|
1347
1540
|
|
|
1348
1541
|
this.#kvRequests.set(requestId, requestEntry);
|
|
1349
1542
|
|
|
1350
|
-
if (
|
|
1543
|
+
if (this.__webSocketReady()) {
|
|
1351
1544
|
// Send immediately
|
|
1352
1545
|
this.#sendSingleKvRequest(requestId);
|
|
1353
1546
|
}
|
|
@@ -1380,10 +1573,7 @@ export class Runner {
|
|
|
1380
1573
|
}
|
|
1381
1574
|
|
|
1382
1575
|
#processUnsentKvRequests() {
|
|
1383
|
-
if (
|
|
1384
|
-
!this.#pegboardWebSocket ||
|
|
1385
|
-
this.#pegboardWebSocket.readyState !== 1
|
|
1386
|
-
) {
|
|
1576
|
+
if (!this.__webSocketReady()) {
|
|
1387
1577
|
return;
|
|
1388
1578
|
}
|
|
1389
1579
|
|
|
@@ -1400,26 +1590,25 @@ export class Runner {
|
|
|
1400
1590
|
}
|
|
1401
1591
|
}
|
|
1402
1592
|
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1593
|
+
/** Asserts WebSocket exists and is ready. */
|
|
1594
|
+
__webSocketReady(): this is this & {
|
|
1595
|
+
__pegboardWebSocket: NonNullable<Runner["__pegboardWebSocket"]>;
|
|
1596
|
+
} {
|
|
1597
|
+
return (
|
|
1598
|
+
!!this.__pegboardWebSocket &&
|
|
1599
|
+
this.__pegboardWebSocket.readyState === 1
|
|
1600
|
+
);
|
|
1407
1601
|
}
|
|
1408
1602
|
|
|
1409
1603
|
__sendToServer(message: protocol.ToServer) {
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
return;
|
|
1415
|
-
}
|
|
1604
|
+
this.log?.debug({
|
|
1605
|
+
msg: "sending runner message",
|
|
1606
|
+
data: stringifyToServer(message),
|
|
1607
|
+
});
|
|
1416
1608
|
|
|
1417
1609
|
const encoded = protocol.encodeToServer(message);
|
|
1418
|
-
if (
|
|
1419
|
-
this
|
|
1420
|
-
this.#pegboardWebSocket.readyState === 1
|
|
1421
|
-
) {
|
|
1422
|
-
this.#pegboardWebSocket.send(encoded);
|
|
1610
|
+
if (this.__webSocketReady()) {
|
|
1611
|
+
this.__pegboardWebSocket.send(encoded);
|
|
1423
1612
|
} else {
|
|
1424
1613
|
this.log?.error({
|
|
1425
1614
|
msg: "WebSocket not available or not open for sending data",
|
|
@@ -1427,8 +1616,50 @@ export class Runner {
|
|
|
1427
1616
|
}
|
|
1428
1617
|
}
|
|
1429
1618
|
|
|
1430
|
-
|
|
1431
|
-
|
|
1619
|
+
sendHibernatableWebSocketMessageAck(
|
|
1620
|
+
gatewayId: ArrayBuffer,
|
|
1621
|
+
requestId: ArrayBuffer,
|
|
1622
|
+
index: number,
|
|
1623
|
+
) {
|
|
1624
|
+
if (!this.#tunnel)
|
|
1625
|
+
throw new Error("missing tunnel to send message ack");
|
|
1626
|
+
this.#tunnel.sendHibernatableWebSocketMessageAck(
|
|
1627
|
+
gatewayId,
|
|
1628
|
+
requestId,
|
|
1629
|
+
index,
|
|
1630
|
+
);
|
|
1631
|
+
}
|
|
1632
|
+
|
|
1633
|
+
/**
|
|
1634
|
+
* Restores hibernatable WebSocket connections for an actor.
|
|
1635
|
+
*
|
|
1636
|
+
* This method should be called at the end of `onActorStart` after the
|
|
1637
|
+
* actor instance is fully initialized.
|
|
1638
|
+
*
|
|
1639
|
+
* This method will:
|
|
1640
|
+
* - Restore all provided hibernatable WebSocket connections
|
|
1641
|
+
* - Attach event listeners to the restored WebSockets
|
|
1642
|
+
* - Close any WebSocket connections that failed to restore
|
|
1643
|
+
*
|
|
1644
|
+
* The provided metadata list should include all hibernatable WebSockets
|
|
1645
|
+
* that were persisted for this actor. The gateway will automatically
|
|
1646
|
+
* close any connections that are not restored (i.e., not included in
|
|
1647
|
+
* this list).
|
|
1648
|
+
*
|
|
1649
|
+
* **Important:** This method must be called after `onActorStart` completes
|
|
1650
|
+
* and before marking the actor as "ready" to ensure all hibernatable
|
|
1651
|
+
* connections are fully restored.
|
|
1652
|
+
*
|
|
1653
|
+
* @param actorId - The ID of the actor to restore connections for
|
|
1654
|
+
* @param metaEntries - Array of hibernatable WebSocket metadata to restore
|
|
1655
|
+
*/
|
|
1656
|
+
async restoreHibernatingRequests(
|
|
1657
|
+
actorId: string,
|
|
1658
|
+
metaEntries: HibernatingWebSocketMetadata[],
|
|
1659
|
+
) {
|
|
1660
|
+
if (!this.#tunnel)
|
|
1661
|
+
throw new Error("missing tunnel to restore hibernating requests");
|
|
1662
|
+
await this.#tunnel.restoreHibernatingRequests(actorId, metaEntries);
|
|
1432
1663
|
}
|
|
1433
1664
|
|
|
1434
1665
|
getServerlessInitPacket(): string | undefined {
|
|
@@ -1486,9 +1717,10 @@ export class Runner {
|
|
|
1486
1717
|
|
|
1487
1718
|
if (eventsToResend.length === 0) return;
|
|
1488
1719
|
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1720
|
+
this.log?.info({
|
|
1721
|
+
msg: "resending unacknowledged events",
|
|
1722
|
+
fromIndex: lastEventIdx + 1n,
|
|
1723
|
+
});
|
|
1492
1724
|
|
|
1493
1725
|
// Resend events in batches
|
|
1494
1726
|
this.__sendToServer({
|