@rivetkit/engine-runner 2.0.26 → 2.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rivetkit/engine-runner",
3
- "version": "2.0.26",
3
+ "version": "2.0.27",
4
4
  "type": "module",
5
5
  "exports": {
6
6
  "import": {
@@ -16,7 +16,7 @@
16
16
  "uuid": "^12.0.0",
17
17
  "pino": "^9.9.5",
18
18
  "ws": "^8.18.3",
19
- "@rivetkit/engine-runner-protocol": "2.0.26"
19
+ "@rivetkit/engine-runner-protocol": "2.0.27"
20
20
  },
21
21
  "devDependencies": {
22
22
  "@types/node": "^22.18.1",
package/src/actor.ts CHANGED
@@ -27,10 +27,6 @@ export class RunnerActor {
27
27
  }> = [];
28
28
  actorStartPromise: ReturnType<typeof promiseWithResolvers<void>>;
29
29
 
30
- lastCommandIdx: bigint = -1n;
31
- nextEventIdx: bigint = 0n;
32
- eventHistory: protocol.EventWrapper[] = [];
33
-
34
30
  /**
35
31
  * If restoreHibernatingRequests has been called. This is used to assert
36
32
  * that the caller is implemented correctly.
@@ -85,8 +81,8 @@ export class RunnerActor {
85
81
  gatewayId,
86
82
  requestId,
87
83
  request: {
88
- resolve: () => { },
89
- reject: () => { },
84
+ resolve: () => {},
85
+ reject: () => {},
90
86
  actorId: this.actorId,
91
87
  gatewayId: gatewayId,
92
88
  requestId: requestId,
@@ -122,8 +118,8 @@ export class RunnerActor {
122
118
  gatewayId,
123
119
  requestId,
124
120
  request: {
125
- resolve: () => { },
126
- reject: () => { },
121
+ resolve: () => {},
122
+ reject: () => {},
127
123
  actorId: this.actorId,
128
124
  gatewayId: gatewayId,
129
125
  requestId: requestId,
@@ -197,14 +193,4 @@ export class RunnerActor {
197
193
  this.webSockets.splice(index, 1);
198
194
  }
199
195
  }
200
-
201
- handleAckEvents(lastEventIdx: bigint) {
202
- this.eventHistory = this.eventHistory.filter(
203
- (event) => event.checkpoint.index > lastEventIdx,
204
- );
205
- }
206
-
207
- recordEvent(eventWrapper: protocol.EventWrapper) {
208
- this.eventHistory.push(eventWrapper);
209
- }
210
196
  }
package/src/mod.ts CHANGED
@@ -8,7 +8,6 @@ import { type HibernatingWebSocketMetadata, Tunnel } from "./tunnel";
8
8
  import {
9
9
  calculateBackoff,
10
10
  parseWebSocketCloseReason,
11
- stringifyError,
12
11
  unreachable,
13
12
  } from "./utils";
14
13
  import { importWebSocket } from "./websocket.js";
@@ -18,18 +17,13 @@ export { RunnerActor, type ActorConfig };
18
17
  export { idToStr } from "./utils";
19
18
 
20
19
  const KV_EXPIRE: number = 30_000;
21
- const PROTOCOL_VERSION: number = 4;
20
+ const PROTOCOL_VERSION: number = 3;
21
+ const RUNNER_PING_INTERVAL = 3_000;
22
22
 
23
23
  /** Warn once the backlog significantly exceeds the server's ack batch size. */
24
24
  const EVENT_BACKLOG_WARN_THRESHOLD = 10_000;
25
25
  const SIGNAL_HANDLERS: (() => void | Promise<void>)[] = [];
26
26
 
27
- export class RunnerShutdownError extends Error {
28
- constructor() {
29
- super("Runner shut down");
30
- }
31
- }
32
-
33
27
  export interface RunnerConfig {
34
28
  logger?: Logger;
35
29
  version: number;
@@ -203,6 +197,9 @@ export class Runner {
203
197
  // WebSocket
204
198
  __pegboardWebSocket?: WebSocket;
205
199
  runnerId?: string;
200
+ #lastCommandIdx: number = -1;
201
+ #pingLoop?: NodeJS.Timeout;
202
+ #nextEventIdx: bigint = 0n;
206
203
  #started: boolean = false;
207
204
  #shutdown: boolean = false;
208
205
  #shuttingDown: boolean = false;
@@ -214,6 +211,7 @@ export class Runner {
214
211
  #runnerLostTimeout?: NodeJS.Timeout;
215
212
 
216
213
  // Event storage for resending
214
+ #eventHistory: protocol.EventWrapper[] = [];
217
215
  #eventBacklogWarned: boolean = false;
218
216
 
219
217
  // Command acknowledgment
@@ -257,14 +255,7 @@ export class Runner {
257
255
 
258
256
  // Start cleaning up old unsent KV requests every 15 seconds
259
257
  this.#kvCleanupInterval = setInterval(() => {
260
- try {
261
- this.#cleanupOldKvRequests();
262
- } catch (err) {
263
- this.log?.error({
264
- msg: "error cleaning up kv requests",
265
- error: stringifyError(err),
266
- });
267
- }
258
+ this.#cleanupOldKvRequests();
268
259
  }, 15000); // Run every 15 seconds
269
260
  }
270
261
 
@@ -316,31 +307,14 @@ export class Runner {
316
307
  this.#sendActorStateUpdate(actorId, actor.generation, "stopped");
317
308
  }
318
309
 
319
- #handleLost() {
310
+ #stopAllActors() {
320
311
  this.log?.info({
321
- msg: "stopping all actors due to runner lost threshold",
312
+ msg: "stopping all actors due to runner lost threshold exceeded",
322
313
  });
323
314
 
324
- // Remove all remaining kv requests
325
- for (const [_, request] of this.#kvRequests.entries()) {
326
- request.reject(new RunnerShutdownError());
327
- }
328
-
329
- this.#kvRequests.clear();
330
-
331
- this.#stopAllActors();
332
- }
333
-
334
- #stopAllActors() {
335
315
  const actorIds = Array.from(this.#actors.keys());
336
316
  for (const actorId of actorIds) {
337
- this.forceStopActor(actorId).catch((err) => {
338
- this.log?.error({
339
- msg: "error stopping actor",
340
- actorId,
341
- error: stringifyError(err),
342
- });
343
- });
317
+ this.forceStopActor(actorId);
344
318
  }
345
319
  }
346
320
 
@@ -503,6 +477,12 @@ export class Runner {
503
477
  this.#runnerLostTimeout = undefined;
504
478
  }
505
479
 
480
+ // Clear ping loop
481
+ if (this.#pingLoop) {
482
+ clearInterval(this.#pingLoop);
483
+ this.#pingLoop = undefined;
484
+ }
485
+
506
486
  // Clear ack interval
507
487
  if (this.#ackInterval) {
508
488
  clearInterval(this.#ackInterval);
@@ -758,6 +738,10 @@ export class Runner {
758
738
  name: this.#config.runnerName,
759
739
  version: this.#config.version,
760
740
  totalSlots: this.#config.totalSlots,
741
+ lastCommandIdx:
742
+ this.#lastCommandIdx >= 0
743
+ ? BigInt(this.#lastCommandIdx)
744
+ : null,
761
745
  prepopulateActorNames: new Map(
762
746
  Object.entries(this.#config.prepopulateActorNames).map(
763
747
  ([name, data]) => [
@@ -774,22 +758,33 @@ export class Runner {
774
758
  val: init,
775
759
  });
776
760
 
761
+ // Start ping interval
762
+ const pingLoop = setInterval(() => {
763
+ if (ws.readyState === 1) {
764
+ this.__sendToServer({
765
+ tag: "ToServerPing",
766
+ val: {
767
+ ts: BigInt(Date.now()),
768
+ },
769
+ });
770
+ } else {
771
+ clearInterval(pingLoop);
772
+ this.log?.info({
773
+ msg: "WebSocket not open, stopping ping loop",
774
+ });
775
+ }
776
+ }, RUNNER_PING_INTERVAL);
777
+ this.#pingLoop = pingLoop;
778
+
777
779
  // Start command acknowledgment interval (5 minutes)
778
780
  const ackInterval = 5 * 60 * 1000; // 5 minutes in milliseconds
779
781
  const ackLoop = setInterval(() => {
780
- try {
781
- if (ws.readyState === 1) {
782
- this.#sendCommandAcknowledgment();
783
- } else {
784
- clearInterval(ackLoop);
785
- this.log?.info({
786
- msg: "WebSocket not open, stopping ack loop",
787
- });
788
- }
789
- } catch (err) {
790
- this.log?.error({
791
- msg: "error in command acknowledgment loop",
792
- error: stringifyError(err),
782
+ if (ws.readyState === 1) {
783
+ this.#sendCommandAcknowledgment();
784
+ } else {
785
+ clearInterval(ackLoop);
786
+ this.log?.info({
787
+ msg: "WebSocket not open, stopping ack loop",
793
788
  });
794
789
  }
795
790
  }, ackInterval);
@@ -820,8 +815,8 @@ export class Runner {
820
815
  if (this.runnerId !== init.runnerId) {
821
816
  this.runnerId = init.runnerId;
822
817
 
823
- // Clear actors if runner id changed
824
- this.#stopAllActors();
818
+ // Clear history if runner id changed
819
+ this.#eventHistory.length = 0;
825
820
  }
826
821
 
827
822
  // Store the runner lost threshold from metadata
@@ -831,12 +826,13 @@ export class Runner {
831
826
 
832
827
  this.log?.info({
833
828
  msg: "received init",
829
+ lastEventIdx: init.lastEventIdx,
834
830
  runnerLostThreshold: this.#runnerLostThreshold,
835
831
  });
836
832
 
837
833
  // Resend pending events
838
834
  this.#processUnsentKvRequests();
839
- this.#resendUnacknowledgedEvents();
835
+ this.#resendUnacknowledgedEvents(init.lastEventIdx);
840
836
  this.#tunnel?.resendBufferedEvents();
841
837
 
842
838
  this.#config.onConnected();
@@ -849,19 +845,10 @@ export class Runner {
849
845
  const kvResponse = message.val;
850
846
  this.#handleKvResponse(kvResponse);
851
847
  } else if (message.tag === "ToClientTunnelMessage") {
852
- this.#tunnel?.handleTunnelMessage(message.val).catch((err) => {
853
- this.log?.error({
854
- msg: "error handling tunnel message",
855
- error: stringifyError(err),
856
- });
857
- });
858
- } else if (message.tag === "ToClientPing") {
859
- this.__sendToServer({
860
- tag: "ToServerPong",
861
- val: {
862
- ts: message.val.ts,
863
- },
864
- });
848
+ this.#tunnel?.handleTunnelMessage(message.val);
849
+ } else if (message.tag === "ToClientClose") {
850
+ this.#tunnel?.shutdown();
851
+ ws.close(1000, "manual closure");
865
852
  } else {
866
853
  unreachable(message);
867
854
  }
@@ -884,14 +871,7 @@ export class Runner {
884
871
  seconds: this.#runnerLostThreshold / 1000,
885
872
  });
886
873
  this.#runnerLostTimeout = setTimeout(() => {
887
- try {
888
- this.#handleLost();
889
- } catch (err) {
890
- this.log?.error({
891
- msg: "error handling runner lost",
892
- error: stringifyError(err),
893
- });
894
- }
874
+ this.#stopAllActors();
895
875
  }, this.#runnerLostThreshold);
896
876
  }
897
877
 
@@ -929,6 +909,12 @@ export class Runner {
929
909
  this.#config.onDisconnected(ev.code, ev.reason);
930
910
  }
931
911
 
912
+ // Clear ping loop on close
913
+ if (this.#pingLoop) {
914
+ clearInterval(this.#pingLoop);
915
+ this.#pingLoop = undefined;
916
+ }
917
+
932
918
  // Clear ack interval on close
933
919
  if (this.#ackInterval) {
934
920
  clearInterval(this.#ackInterval);
@@ -947,14 +933,7 @@ export class Runner {
947
933
  seconds: this.#runnerLostThreshold / 1000,
948
934
  });
949
935
  this.#runnerLostTimeout = setTimeout(() => {
950
- try {
951
- this.#handleLost();
952
- } catch (err) {
953
- this.log?.error({
954
- msg: "error handling runner lost",
955
- error: stringifyError(err),
956
- });
957
- }
936
+ this.#stopAllActors();
958
937
  }, this.#runnerLostThreshold);
959
938
  }
960
939
 
@@ -973,86 +952,52 @@ export class Runner {
973
952
  for (const commandWrapper of commands) {
974
953
  if (commandWrapper.inner.tag === "CommandStartActor") {
975
954
  // Spawn background promise
976
- this.#handleCommandStartActor(commandWrapper).catch((err) => {
977
- this.log?.error({
978
- msg: "error handling start actor command",
979
- actorId: commandWrapper.checkpoint.actorId,
980
- error: stringifyError(err),
981
- });
982
- });
955
+ this.#handleCommandStartActor(commandWrapper);
983
956
  } else if (commandWrapper.inner.tag === "CommandStopActor") {
984
957
  // Spawn background promise
985
- this.#handleCommandStopActor(commandWrapper).catch((err) => {
986
- this.log?.error({
987
- msg: "error handling stop actor command",
988
- actorId: commandWrapper.checkpoint.actorId,
989
- error: stringifyError(err),
990
- });
991
- });
958
+ this.#handleCommandStopActor(commandWrapper);
992
959
  } else {
993
960
  unreachable(commandWrapper.inner);
994
961
  }
995
962
 
996
- const actor = this.getActor(
997
- commandWrapper.checkpoint.actorId,
998
- commandWrapper.inner.val.generation,
999
- );
1000
- if (actor) actor.lastCommandIdx = commandWrapper.checkpoint.index;
963
+ this.#lastCommandIdx = Number(commandWrapper.index);
1001
964
  }
1002
965
  }
1003
966
 
1004
967
  #handleAckEvents(ack: protocol.ToClientAckEvents) {
1005
- let originalTotalEvents = Array.from(this.#actors).reduce(
1006
- (s, [_, actor]) => s + actor.eventHistory.length,
1007
- 0,
1008
- );
1009
-
1010
- for (const [_, actor] of this.#actors) {
1011
- let checkpoint = ack.lastEventCheckpoints.find(
1012
- (x) => x.actorId == actor.actorId,
1013
- );
1014
-
1015
- if (checkpoint) actor.handleAckEvents(checkpoint.index);
1016
- }
968
+ const lastAckedIdx = ack.lastEventIdx;
1017
969
 
1018
- const totalEvents = Array.from(this.#actors).reduce(
1019
- (s, [_, actor]) => s + actor.eventHistory.length,
1020
- 0,
970
+ const originalLength = this.#eventHistory.length;
971
+ this.#eventHistory = this.#eventHistory.filter(
972
+ (event) => event.index > lastAckedIdx,
1021
973
  );
1022
- const prunedCount = originalTotalEvents - totalEvents;
1023
974
 
975
+ const prunedCount = originalLength - this.#eventHistory.length;
1024
976
  if (prunedCount > 0) {
1025
977
  this.log?.info({
1026
978
  msg: "pruned acknowledged events",
979
+ lastAckedIdx: lastAckedIdx.toString(),
1027
980
  prunedCount,
1028
981
  });
1029
982
  }
1030
983
 
1031
- if (totalEvents <= EVENT_BACKLOG_WARN_THRESHOLD) {
984
+ if (this.#eventHistory.length <= EVENT_BACKLOG_WARN_THRESHOLD) {
1032
985
  this.#eventBacklogWarned = false;
1033
986
  }
1034
987
  }
1035
988
 
1036
989
  /** Track events to send to the server in case we need to resend it on disconnect. */
1037
990
  #recordEvent(eventWrapper: protocol.EventWrapper) {
1038
- const actor = this.getActor(eventWrapper.checkpoint.actorId);
1039
- if (!actor) return;
1040
-
1041
- actor.recordEvent(eventWrapper);
1042
-
1043
- let totalEvents = Array.from(this.#actors).reduce(
1044
- (s, [_, actor]) => s + actor.eventHistory.length,
1045
- 0,
1046
- );
991
+ this.#eventHistory.push(eventWrapper);
1047
992
 
1048
993
  if (
1049
- totalEvents > EVENT_BACKLOG_WARN_THRESHOLD &&
994
+ this.#eventHistory.length > EVENT_BACKLOG_WARN_THRESHOLD &&
1050
995
  !this.#eventBacklogWarned
1051
996
  ) {
1052
997
  this.#eventBacklogWarned = true;
1053
998
  this.log?.warn({
1054
999
  msg: "unacknowledged event backlog exceeds threshold",
1055
- backlogSize: totalEvents,
1000
+ backlogSize: this.#eventHistory.length,
1056
1001
  threshold: EVENT_BACKLOG_WARN_THRESHOLD,
1057
1002
  });
1058
1003
  }
@@ -1068,7 +1013,7 @@ export class Runner {
1068
1013
  const startCommand = commandWrapper.inner
1069
1014
  .val as protocol.CommandStartActor;
1070
1015
 
1071
- const actorId = commandWrapper.checkpoint.actorId;
1016
+ const actorId = startCommand.actorId;
1072
1017
  const generation = startCommand.generation;
1073
1018
  const config = startCommand.config;
1074
1019
 
@@ -1149,7 +1094,7 @@ export class Runner {
1149
1094
  const stopCommand = commandWrapper.inner
1150
1095
  .val as protocol.CommandStopActor;
1151
1096
 
1152
- const actorId = commandWrapper.checkpoint.actorId;
1097
+ const actorId = stopCommand.actorId;
1153
1098
  const generation = stopCommand.generation;
1154
1099
 
1155
1100
  await this.forceStopActor(actorId, generation);
@@ -1160,9 +1105,6 @@ export class Runner {
1160
1105
  generation: number,
1161
1106
  intentType: "sleep" | "stop",
1162
1107
  ) {
1163
- const actor = this.getActor(actorId, generation);
1164
- if (!actor) return;
1165
-
1166
1108
  let actorIntent: protocol.ActorIntent;
1167
1109
 
1168
1110
  if (intentType === "sleep") {
@@ -1182,11 +1124,9 @@ export class Runner {
1182
1124
  intent: actorIntent,
1183
1125
  };
1184
1126
 
1127
+ const eventIndex = this.#nextEventIdx++;
1185
1128
  const eventWrapper: protocol.EventWrapper = {
1186
- checkpoint: {
1187
- actorId,
1188
- index: actor.nextEventIdx++,
1189
- },
1129
+ index: eventIndex,
1190
1130
  inner: {
1191
1131
  tag: "EventActorIntent",
1192
1132
  val: intentEvent,
@@ -1206,9 +1146,6 @@ export class Runner {
1206
1146
  generation: number,
1207
1147
  stateType: "running" | "stopped",
1208
1148
  ) {
1209
- const actor = this.getActor(actorId, generation);
1210
- if (!actor) return;
1211
-
1212
1149
  let actorState: protocol.ActorState;
1213
1150
 
1214
1151
  if (stateType === "running") {
@@ -1231,11 +1168,9 @@ export class Runner {
1231
1168
  state: actorState,
1232
1169
  };
1233
1170
 
1171
+ const eventIndex = this.#nextEventIdx++;
1234
1172
  const eventWrapper: protocol.EventWrapper = {
1235
- checkpoint: {
1236
- actorId,
1237
- index: actor.nextEventIdx++,
1238
- },
1173
+ index: eventIndex,
1239
1174
  inner: {
1240
1175
  tag: "EventActorStateUpdate",
1241
1176
  val: stateUpdateEvent,
@@ -1251,18 +1186,9 @@ export class Runner {
1251
1186
  }
1252
1187
 
1253
1188
  #sendCommandAcknowledgment() {
1254
- const lastCommandCheckpoints = [];
1255
-
1256
- for (const [_, actor] of this.#actors) {
1257
- if (actor.lastCommandIdx < 0) {
1258
- // No commands received yet, nothing to acknowledge
1259
- continue;
1260
- }
1261
-
1262
- lastCommandCheckpoints.push({
1263
- actorId: actor.actorId,
1264
- index: actor.lastCommandIdx,
1265
- });
1189
+ if (this.#lastCommandIdx < 0) {
1190
+ // No commands received yet, nothing to acknowledge
1191
+ return;
1266
1192
  }
1267
1193
 
1268
1194
  //this.#log?.log("Sending command acknowledgment", this.#lastCommandIdx);
@@ -1270,7 +1196,7 @@ export class Runner {
1270
1196
  this.__sendToServer({
1271
1197
  tag: "ToServerAckCommands",
1272
1198
  val: {
1273
- lastCommandCheckpoints,
1199
+ lastCommandIdx: BigInt(this.#lastCommandIdx),
1274
1200
  },
1275
1201
  });
1276
1202
  }
@@ -1574,11 +1500,9 @@ export class Runner {
1574
1500
  alarmTs: alarmTs !== null ? BigInt(alarmTs) : null,
1575
1501
  };
1576
1502
 
1503
+ const eventIndex = this.#nextEventIdx++;
1577
1504
  const eventWrapper: protocol.EventWrapper = {
1578
- checkpoint: {
1579
- actorId,
1580
- index: actor.nextEventIdx++,
1581
- },
1505
+ index: eventIndex,
1582
1506
  inner: {
1583
1507
  tag: "EventActorSetAlarm",
1584
1508
  val: alarmEvent,
@@ -1745,7 +1669,6 @@ export class Runner {
1745
1669
  tag: "ToServerlessServerInit",
1746
1670
  val: {
1747
1671
  runnerId: this.runnerId,
1748
- runnerProtocolVersion: PROTOCOL_VERSION,
1749
1672
  },
1750
1673
  });
1751
1674
 
@@ -1776,34 +1699,27 @@ export class Runner {
1776
1699
  msg: `Scheduling reconnect attempt ${this.#reconnectAttempt + 1} in ${delay}ms`,
1777
1700
  });
1778
1701
 
1779
- this.#reconnectTimeout = setTimeout(() => {
1702
+ this.#reconnectTimeout = setTimeout(async () => {
1780
1703
  if (!this.#shutdown) {
1781
1704
  this.#reconnectAttempt++;
1782
1705
  this.log?.debug({
1783
1706
  msg: `Attempting to reconnect (attempt ${this.#reconnectAttempt})...`,
1784
1707
  });
1785
- this.#openPegboardWebSocket().catch((err) => {
1786
- this.log?.error({
1787
- msg: "error during websocket reconnection",
1788
- error: stringifyError(err),
1789
- });
1790
- });
1708
+ await this.#openPegboardWebSocket();
1791
1709
  }
1792
1710
  }, delay);
1793
1711
  }
1794
1712
 
1795
- #resendUnacknowledgedEvents() {
1796
- const eventsToResend = [];
1797
-
1798
- for (const [_, actor] of this.#actors) {
1799
- eventsToResend.push(...actor.eventHistory);
1800
- }
1713
+ #resendUnacknowledgedEvents(lastEventIdx: bigint) {
1714
+ const eventsToResend = this.#eventHistory.filter(
1715
+ (event) => event.index > lastEventIdx,
1716
+ );
1801
1717
 
1802
1718
  if (eventsToResend.length === 0) return;
1803
1719
 
1804
1720
  this.log?.info({
1805
1721
  msg: "resending unacknowledged events",
1806
- count: eventsToResend.length,
1722
+ fromIndex: lastEventIdx + 1n,
1807
1723
  });
1808
1724
 
1809
1725
  // Resend events in batches