@dxos/messaging 0.5.8 → 0.5.9-main.0a0e87d

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/lib/browser/index.mjs +812 -559
  2. package/dist/lib/browser/index.mjs.map +4 -4
  3. package/dist/lib/browser/meta.json +1 -1
  4. package/dist/lib/node/index.cjs +778 -545
  5. package/dist/lib/node/index.cjs.map +4 -4
  6. package/dist/lib/node/meta.json +1 -1
  7. package/dist/types/src/messenger-monitor.d.ts +8 -0
  8. package/dist/types/src/messenger-monitor.d.ts.map +1 -0
  9. package/dist/types/src/messenger.d.ts +1 -0
  10. package/dist/types/src/messenger.d.ts.map +1 -1
  11. package/dist/types/src/signal-client/signal-client-monitor.d.ts +30 -0
  12. package/dist/types/src/signal-client/signal-client-monitor.d.ts.map +1 -0
  13. package/dist/types/src/signal-client/signal-client.d.ts +25 -50
  14. package/dist/types/src/signal-client/signal-client.d.ts.map +1 -1
  15. package/dist/types/src/signal-client/signal-local-state.d.ts +46 -0
  16. package/dist/types/src/signal-client/signal-local-state.d.ts.map +1 -0
  17. package/dist/types/src/signal-client/signal-rpc-client-monitor.d.ts +6 -0
  18. package/dist/types/src/signal-client/signal-rpc-client-monitor.d.ts.map +1 -0
  19. package/dist/types/src/signal-client/signal-rpc-client.d.ts +4 -2
  20. package/dist/types/src/signal-client/signal-rpc-client.d.ts.map +1 -1
  21. package/dist/types/src/signal-manager/memory-signal-manager.d.ts +0 -2
  22. package/dist/types/src/signal-manager/memory-signal-manager.d.ts.map +1 -1
  23. package/dist/types/src/signal-manager/signal-manager.d.ts +0 -2
  24. package/dist/types/src/signal-manager/signal-manager.d.ts.map +1 -1
  25. package/dist/types/src/signal-manager/websocket-signal-manager-monitor.d.ts +8 -0
  26. package/dist/types/src/signal-manager/websocket-signal-manager-monitor.d.ts.map +1 -0
  27. package/dist/types/src/signal-manager/websocket-signal-manager.d.ts +7 -3
  28. package/dist/types/src/signal-manager/websocket-signal-manager.d.ts.map +1 -1
  29. package/dist/types/src/signal-methods.d.ts +6 -4
  30. package/dist/types/src/signal-methods.d.ts.map +1 -1
  31. package/package.json +13 -12
  32. package/src/messenger-monitor.ts +20 -0
  33. package/src/messenger.ts +16 -5
  34. package/src/signal-client/signal-client-monitor.ts +111 -0
  35. package/src/signal-client/signal-client.test.ts +111 -259
  36. package/src/signal-client/signal-client.ts +141 -252
  37. package/src/signal-client/signal-local-state.ts +156 -0
  38. package/src/signal-client/signal-rpc-client-monitor.ts +15 -0
  39. package/src/signal-client/signal-rpc-client.ts +38 -21
  40. package/src/signal-manager/memory-signal-manager.ts +0 -2
  41. package/src/signal-manager/signal-manager.ts +0 -3
  42. package/src/signal-manager/websocket-signal-manager-monitor.ts +20 -0
  43. package/src/signal-manager/websocket-signal-manager.ts +48 -26
  44. package/src/signal-methods.ts +7 -4
@@ -0,0 +1,156 @@
1
+ //
2
+ // Copyright 2024 DXOS.org
3
+ //
4
+
5
+ import { asyncTimeout, Event } from '@dxos/async';
6
+ import type { Any, Stream } from '@dxos/codec-protobuf';
7
+ import { cancelWithContext, type Context } from '@dxos/context';
8
+ import { PublicKey } from '@dxos/keys';
9
+ import { log } from '@dxos/log';
10
+ import { type Message as SignalMessage, type SwarmEvent } from '@dxos/protocols/proto/dxos/mesh/signal';
11
+ import { ComplexMap, ComplexSet, safeAwaitAll } from '@dxos/util';
12
+
13
+ import { type SignalRPCClient } from './signal-rpc-client';
14
+ import type { Message } from '../signal-methods';
15
+
16
+ export class SignalLocalState {
17
+ /**
18
+ * Swarm events streams. Keys represent actually joined topic and peerId.
19
+ */
20
+ private readonly _swarmStreams = new ComplexMap<{ topic: PublicKey; peerId: PublicKey }, Stream<SwarmEvent>>(
21
+ ({ topic, peerId }) => topic.toHex() + peerId.toHex(),
22
+ );
23
+
24
+ /**
25
+ * Represent desired joined topic and peerId.
26
+ */
27
+ private readonly _joinedTopics = new ComplexSet<{ topic: PublicKey; peerId: PublicKey }>(
28
+ ({ topic, peerId }) => topic.toHex() + peerId.toHex(),
29
+ );
30
+
31
+ /**
32
+ * Represent desired message subscriptions.
33
+ */
34
+ private readonly _subscribedMessages = new ComplexSet<{ peerId: PublicKey }>(({ peerId }) => peerId.toHex());
35
+
36
+ /**
37
+ * Message streams. Keys represents actually subscribed peers.
38
+ * @internal
39
+ */
40
+ readonly messageStreams = new ComplexMap<PublicKey, Stream<SignalMessage>>((key) => key.toHex());
41
+
42
+ /**
43
+ * Event to use in tests to wait till subscription is successfully established.
44
+ * @internal
45
+ */
46
+ readonly reconciled = new Event();
47
+
48
+ constructor(
49
+ private readonly _onMessage: (params: { author: PublicKey; recipient: PublicKey; payload: Any }) => Promise<void>,
50
+ private readonly _onSwarmEvent: (params: { topic: PublicKey; swarmEvent: SwarmEvent }) => Promise<void>,
51
+ ) {}
52
+
53
+ async safeCloseStreams(): Promise<{ failureCount: number }> {
54
+ const streams = ([...this._swarmStreams.values()] as Stream<any>[]).concat([...this.messageStreams.values()]);
55
+ this._swarmStreams.clear();
56
+ this.messageStreams.clear();
57
+ const failureCount = (await safeAwaitAll(streams, (s) => s.close())).length;
58
+ return { failureCount };
59
+ }
60
+
61
+ join({ topic, peerId }: { topic: PublicKey; peerId: PublicKey }) {
62
+ this._joinedTopics.add({ topic, peerId });
63
+ }
64
+
65
+ leave({ topic, peerId }: { topic: PublicKey; peerId: PublicKey }) {
66
+ void this._swarmStreams.get({ topic, peerId })?.close();
67
+ this._swarmStreams.delete({ topic, peerId });
68
+ this._joinedTopics.delete({ topic, peerId });
69
+ }
70
+
71
+ subscribeMessages(peerId: PublicKey) {
72
+ this._subscribedMessages.add({ peerId });
73
+ }
74
+
75
+ unsubscribeMessages(peerId: PublicKey) {
76
+ log('unsubscribing from messages', { peerId });
77
+ this._subscribedMessages.delete({ peerId });
78
+ void this.messageStreams.get(peerId)?.close();
79
+ this.messageStreams.delete(peerId);
80
+ }
81
+
82
+ public async reconcile(ctx: Context, client: SignalRPCClient) {
83
+ await this._reconcileSwarmSubscriptions(ctx, client);
84
+ await this._reconcileMessageSubscriptions(ctx, client);
85
+ this.reconciled.emit();
86
+ }
87
+
88
+ private async _reconcileSwarmSubscriptions(ctx: Context, client: SignalRPCClient): Promise<void> {
89
+ // Unsubscribe from topics that are no longer needed.
90
+ for (const { topic, peerId } of this._swarmStreams.keys()) {
91
+ // Join desired topics.
92
+ if (this._joinedTopics.has({ topic, peerId })) {
93
+ continue;
94
+ }
95
+
96
+ void this._swarmStreams.get({ topic, peerId })?.close();
97
+ this._swarmStreams.delete({ topic, peerId });
98
+ }
99
+
100
+ // Subscribe to topics that are needed.
101
+ for (const { topic, peerId } of this._joinedTopics.values()) {
102
+ // Join desired topics.
103
+ if (this._swarmStreams.has({ topic, peerId })) {
104
+ continue;
105
+ }
106
+
107
+ const swarmStream = await asyncTimeout(cancelWithContext(ctx, client.join({ topic, peerId })), 5_000);
108
+ // Subscribing to swarm events.
109
+ // TODO(mykola): What happens when the swarm stream is closed? Maybe send leave event for each peer?
110
+ swarmStream.subscribe(async (swarmEvent: SwarmEvent) => {
111
+ if (this._joinedTopics.has({ topic, peerId })) {
112
+ log('swarm event', { swarmEvent });
113
+ await this._onSwarmEvent({ topic, swarmEvent });
114
+ }
115
+ });
116
+
117
+ // Saving swarm stream.
118
+ this._swarmStreams.set({ topic, peerId }, swarmStream);
119
+ }
120
+ }
121
+
122
+ private async _reconcileMessageSubscriptions(ctx: Context, client: SignalRPCClient): Promise<void> {
123
+ // Unsubscribe from messages that are no longer needed.
124
+ for (const peerId of this.messageStreams.keys()) {
125
+ // Join desired topics.
126
+ if (this._subscribedMessages.has({ peerId })) {
127
+ continue;
128
+ }
129
+
130
+ void this.messageStreams.get(peerId)?.close();
131
+ this.messageStreams.delete(peerId);
132
+ }
133
+
134
+ // Subscribe to messages that are needed.
135
+ for (const { peerId } of this._subscribedMessages.values()) {
136
+ if (this.messageStreams.has(peerId)) {
137
+ continue;
138
+ }
139
+
140
+ const messageStream = await asyncTimeout(cancelWithContext(ctx, client.receiveMessages(peerId)), 5_000);
141
+ messageStream.subscribe(async (signalMessage: SignalMessage) => {
142
+ if (this._subscribedMessages.has({ peerId })) {
143
+ const message: Message = {
144
+ author: PublicKey.from(signalMessage.author),
145
+ recipient: PublicKey.from(signalMessage.recipient),
146
+ payload: signalMessage.payload,
147
+ };
148
+ await this._onMessage(message);
149
+ }
150
+ });
151
+
152
+ // Saving message stream.
153
+ this.messageStreams.set(peerId, messageStream);
154
+ }
155
+ }
156
+ }
@@ -0,0 +1,15 @@
1
+ //
2
+ // Copyright 2024 DXOS.org
3
+ //
4
+
5
+ import { trace } from '@dxos/tracing';
6
+
7
+ export class SignalRpcClientMonitor {
8
+ public recordClientCloseFailure(params: { failureReason: string }) {
9
+ trace.metrics.increment('mesh.signal.signal-rpc-client.close-failure', 1, {
10
+ tags: {
11
+ reason: params.failureReason,
12
+ },
13
+ });
14
+ }
15
+ }
@@ -4,7 +4,7 @@
4
4
 
5
5
  import WebSocket from 'isomorphic-ws';
6
6
 
7
- import { scheduleTaskInterval, Trigger } from '@dxos/async';
7
+ import { scheduleTaskInterval, TimeoutError, Trigger } from '@dxos/async';
8
8
  import { type Any, type Stream } from '@dxos/codec-protobuf';
9
9
  import { Context } from '@dxos/context';
10
10
  import { invariant } from '@dxos/invariant';
@@ -14,6 +14,8 @@ import { schema, trace } from '@dxos/protocols';
14
14
  import { type Message as SignalMessage, type Signal } from '@dxos/protocols/proto/dxos/mesh/signal';
15
15
  import { createProtoRpcPeer, type ProtoRpcPeer } from '@dxos/rpc';
16
16
 
17
+ import { SignalRpcClientMonitor } from './signal-rpc-client-monitor';
18
+
17
19
  const SIGNAL_KEEPALIVE_INTERVAL = 10000;
18
20
 
19
21
  interface Services {
@@ -39,9 +41,10 @@ export type SignalRPCClientParams = {
39
41
  };
40
42
 
41
43
  export class SignalRPCClient {
42
- private _socket?: WebSocket;
43
- private _rpc?: ProtoRpcPeer<Services>;
44
+ private readonly _socket: WebSocket;
45
+ private readonly _rpc: ProtoRpcPeer<Services>;
44
46
  private readonly _connectTrigger = new Trigger();
47
+
45
48
  private _keepaliveCtx?: Context;
46
49
 
47
50
  private _closed = false;
@@ -50,6 +53,8 @@ export class SignalRPCClient {
50
53
  private readonly _callbacks: SignalCallbacks;
51
54
  private readonly _closeComplete = new Trigger();
52
55
 
56
+ private readonly _monitor = new SignalRpcClientMonitor();
57
+
53
58
  constructor({ url, callbacks = {} }: SignalRPCClientParams) {
54
59
  const traceId = PublicKey.random().toHex();
55
60
  log.trace('dxos.mesh.signal-rpc-client.constructor', trace.begin({ id: traceId }));
@@ -92,6 +97,10 @@ export class SignalRPCClient {
92
97
  this._socket.onopen = async () => {
93
98
  try {
94
99
  await this._rpc!.open();
100
+ if (this._closed) {
101
+ await this._safeCloseRpc();
102
+ return;
103
+ }
95
104
  log(`RPC open ${this._url}`);
96
105
  this._callbacks.onConnected?.();
97
106
  this._connectTrigger.wake();
@@ -109,6 +118,8 @@ export class SignalRPCClient {
109
118
  );
110
119
  } catch (err: any) {
111
120
  this._callbacks.onError?.(err);
121
+ this._socket.close();
122
+ this._closed = true;
112
123
  }
113
124
  };
114
125
 
@@ -121,47 +132,46 @@ export class SignalRPCClient {
121
132
 
122
133
  this._socket.onerror = async (event: WebSocket.ErrorEvent) => {
123
134
  if (this._closed) {
124
- // Ignore errors after close.
135
+ this._socket.close();
125
136
  return;
126
137
  }
138
+ this._closed = true;
127
139
 
128
140
  this._callbacks.onError?.(event.error ?? new Error(event.message));
129
- this._connectTrigger.reset();
130
-
131
- try {
132
- await this._rpc?.close();
133
- } catch (err) {
134
- log.catch(err);
135
- }
136
- this._closed = true;
141
+ await this._safeCloseRpc();
137
142
 
138
- log.warn(event.message ?? 'Socket error', { url: this._url });
143
+ log.warn(`Socket ${event.type ?? 'unknown'} error`, { message: event.message, url: this._url });
139
144
  };
140
145
 
141
146
  log.trace('dxos.mesh.signal-rpc-client.constructor', trace.end({ id: traceId }));
142
147
  }
143
148
 
144
149
  async close() {
145
- await this._keepaliveCtx?.dispose();
150
+ if (this._closed) {
151
+ return;
152
+ }
146
153
  this._closed = true;
154
+
155
+ await this._keepaliveCtx?.dispose();
147
156
  try {
148
- await this._rpc?.close();
157
+ await this._safeCloseRpc();
149
158
 
150
- if (this._socket?.readyState === WebSocket.OPEN || this._socket?.readyState === WebSocket.CONNECTING) {
159
+ if (this._socket.readyState === WebSocket.OPEN || this._socket.readyState === WebSocket.CONNECTING) {
151
160
  // close() only starts the closing handshake.
152
161
  this._socket.close();
153
162
  }
163
+
154
164
  await this._closeComplete.wait({ timeout: 1_000 });
155
165
  } catch (err) {
156
- log.warn('close error', err);
166
+ const failureReason = err instanceof TimeoutError ? 'timeout' : err?.constructor?.name ?? 'unknown';
167
+ this._monitor.recordClientCloseFailure({ failureReason });
157
168
  }
158
169
  }
159
170
 
160
171
  async join({ topic, peerId }: { topic: PublicKey; peerId: PublicKey }) {
161
172
  log('join', { topic, peerId, metadata: this._callbacks?.getMetadata?.() });
162
- await this._connectTrigger.wait();
163
173
  invariant(!this._closed, 'SignalRPCClient is closed');
164
- invariant(this._rpc, 'Rpc is not initialized');
174
+ await this._connectTrigger.wait();
165
175
  const swarmStream = this._rpc.rpc.Signal.join({
166
176
  swarm: topic.asUint8Array(),
167
177
  peer: peerId.asUint8Array(),
@@ -175,7 +185,6 @@ export class SignalRPCClient {
175
185
  log('receiveMessages', { peerId });
176
186
  invariant(!this._closed, 'SignalRPCClient is closed');
177
187
  await this._connectTrigger.wait();
178
- invariant(this._rpc, 'Rpc is not initialized');
179
188
  const messageStream = this._rpc.rpc.Signal.receiveMessages({
180
189
  peer: peerId.asUint8Array(),
181
190
  });
@@ -187,7 +196,6 @@ export class SignalRPCClient {
187
196
  log('sendMessage', { author, recipient, payload, metadata: this._callbacks?.getMetadata?.() });
188
197
  invariant(!this._closed, 'SignalRPCClient is closed');
189
198
  await this._connectTrigger.wait();
190
- invariant(this._rpc, 'Rpc is not initialized');
191
199
  await this._rpc.rpc.Signal.sendMessage({
192
200
  author: author.asUint8Array(),
193
201
  recipient: recipient.asUint8Array(),
@@ -195,4 +203,13 @@ export class SignalRPCClient {
195
203
  metadata: this._callbacks?.getMetadata?.(),
196
204
  });
197
205
  }
206
+
207
+ private async _safeCloseRpc() {
208
+ try {
209
+ this._connectTrigger.reset();
210
+ await this._rpc.close();
211
+ } catch (err) {
212
+ log.catch(err);
213
+ }
214
+ }
198
215
  }
@@ -13,7 +13,6 @@ import { type SwarmEvent } from '@dxos/protocols/proto/dxos/mesh/signal';
13
13
  import { ComplexMap, ComplexSet } from '@dxos/util';
14
14
 
15
15
  import { type SignalManager } from './signal-manager';
16
- import { type CommandTrace } from '../signal-client';
17
16
  import { type SignalStatus } from '../signal-methods';
18
17
 
19
18
  /**
@@ -38,7 +37,6 @@ export class MemorySignalManagerContext {
38
37
  */
39
38
  export class MemorySignalManager implements SignalManager {
40
39
  readonly statusChanged = new Event<SignalStatus[]>();
41
- readonly commandTrace = new Event<CommandTrace>();
42
40
  readonly swarmEvent = new Event<{
43
41
  topic: PublicKey;
44
42
  swarmEvent: SwarmEvent;
@@ -6,7 +6,6 @@ import { type Event } from '@dxos/async';
6
6
  import { type PublicKey } from '@dxos/keys';
7
7
  import { type SwarmEvent } from '@dxos/protocols/proto/dxos/mesh/signal';
8
8
 
9
- import { type CommandTrace } from '../signal-client';
10
9
  import { type Message, type SignalMethods, type SignalStatus } from '../signal-methods';
11
10
 
12
11
  /**
@@ -15,11 +14,9 @@ import { type Message, type SignalMethods, type SignalStatus } from '../signal-m
15
14
  export interface SignalManager extends SignalMethods {
16
15
  open(): Promise<void>;
17
16
  close(): Promise<void>;
18
-
19
17
  getStatus(): SignalStatus[];
20
18
 
21
19
  statusChanged: Event<SignalStatus[]>;
22
- commandTrace: Event<CommandTrace>;
23
20
  swarmEvent: Event<{ topic: PublicKey; swarmEvent: SwarmEvent }>;
24
21
  onMessage: Event<Message>;
25
22
  }
@@ -0,0 +1,20 @@
1
+ //
2
+ // Copyright 2024 DXOS.org
3
+ //
4
+
5
+ import { trace } from '@dxos/tracing';
6
+
7
+ export class WebsocketSignalManagerMonitor {
8
+ public recordRateLimitExceeded() {
9
+ trace.metrics.increment('mesh.signal.signal-manager.rate-limit-hit', 1);
10
+ }
11
+
12
+ public recordServerFailure(params: { serverName: string; willRestart: boolean }) {
13
+ trace.metrics.increment('mesh.signal.signal-manager.server-failure', 1, {
14
+ tags: {
15
+ server: params.serverName,
16
+ restarted: params.willRestart,
17
+ },
18
+ });
19
+ }
20
+ }
@@ -11,9 +11,11 @@ import { log } from '@dxos/log';
11
11
  import { RateLimitExceededError, TimeoutError, trace } from '@dxos/protocols';
12
12
  import { type Runtime } from '@dxos/protocols/proto/dxos/config';
13
13
  import { type SwarmEvent } from '@dxos/protocols/proto/dxos/mesh/signal';
14
+ import { BitField, safeAwaitAll } from '@dxos/util';
14
15
 
15
16
  import { type SignalManager } from './signal-manager';
16
- import { type CommandTrace, SignalClient } from '../signal-client';
17
+ import { WebsocketSignalManagerMonitor } from './websocket-signal-manager-monitor';
18
+ import { SignalClient } from '../signal-client';
17
19
  import { type SignalClientMethods, type SignalMethods, type SignalStatus } from '../signal-methods';
18
20
 
19
21
  const MAX_SERVER_FAILURES = 5;
@@ -24,13 +26,18 @@ const WSS_SIGNAL_SERVER_REBOOT_DELAY = 3_000;
24
26
  */
25
27
  export class WebsocketSignalManager implements SignalManager {
26
28
  private readonly _servers = new Map<string, SignalClientMethods>();
29
+ private readonly _monitor = new WebsocketSignalManagerMonitor();
30
+
31
+ /**
32
+ * Used to avoid logging failed server restarts more than once until the server actually recovers.
33
+ */
34
+ private readonly _failedServersBitfield: Uint8Array;
27
35
 
28
36
  private _ctx!: Context;
29
37
  private _opened = false;
30
38
 
31
39
  readonly failureCount = new Map<string, number>();
32
40
  readonly statusChanged = new Event<SignalStatus[]>();
33
- readonly commandTrace = new Event<CommandTrace>();
34
41
  readonly swarmEvent = new Event<{
35
42
  topic: PublicKey;
36
43
  swarmEvent: SwarmEvent;
@@ -66,8 +73,8 @@ export class WebsocketSignalManager implements SignalManager {
66
73
 
67
74
  this._servers.set(host.server, server);
68
75
  this.failureCount.set(host.server, 0);
69
- server.commandTrace.on((trace) => this.commandTrace.emit(trace));
70
76
  }
77
+ this._failedServersBitfield = BitField.zeros(this._hosts.length);
71
78
  }
72
79
 
73
80
  @synchronized
@@ -80,8 +87,7 @@ export class WebsocketSignalManager implements SignalManager {
80
87
 
81
88
  this._initContext();
82
89
 
83
- // TODO(burdon): Await.
84
- [...this._servers.values()].forEach((server) => server.open());
90
+ await safeAwaitAll(this._servers.values(), (server) => server.open());
85
91
 
86
92
  this._opened = true;
87
93
  log.trace('dxos.mesh.websocket-signal-manager.open', trace.end({ id: this._instanceId }));
@@ -93,10 +99,8 @@ export class WebsocketSignalManager implements SignalManager {
93
99
  return;
94
100
  }
95
101
  this._opened = false;
96
-
97
102
  await this._ctx.dispose();
98
-
99
- await Promise.all(Array.from(this._servers.values()).map((server) => server.close()));
103
+ await safeAwaitAll(this._servers.values(), (server) => server.close());
100
104
  }
101
105
 
102
106
  async restartServer(serverName: string) {
@@ -126,7 +130,6 @@ export class WebsocketSignalManager implements SignalManager {
126
130
  async leave({ topic, peerId }: { topic: PublicKey; peerId: PublicKey }) {
127
131
  log('leaving', { topic, peerId });
128
132
  invariant(this._opened, 'Closed');
129
-
130
133
  await this._forEachServer((server) => server.leave({ topic, peerId }));
131
134
  }
132
135
 
@@ -142,26 +145,35 @@ export class WebsocketSignalManager implements SignalManager {
142
145
  log('signal', { recipient });
143
146
  invariant(this._opened, 'Closed');
144
147
 
145
- void this._forEachServer(async (server, serverName) => {
146
- void server.sendMessage({ author, recipient, payload }).catch((err) => {
147
- if (err instanceof RateLimitExceededError) {
148
- log.info('WSS rate limit exceeded', { err });
149
- } else if (err instanceof TimeoutError || err.constructor.name === 'TimeoutError') {
150
- log.info('WSS sendMessage timeout', { err });
151
- void this.checkServerFailure(serverName);
152
- } else {
153
- log.info(`error sending to ${serverName}`, { err });
154
- void this.checkServerFailure(serverName);
155
- }
156
- });
148
+ void this._forEachServer(async (server, serverName, index) => {
149
+ void server
150
+ .sendMessage({ author, recipient, payload })
151
+ .then(() => this._clearServerFailedFlag(serverName, index))
152
+ .catch((err) => {
153
+ if (err instanceof RateLimitExceededError) {
154
+ log.info('WSS rate limit exceeded', { err });
155
+ this._monitor.recordRateLimitExceeded();
156
+ } else if (err instanceof TimeoutError || err.constructor.name === 'TimeoutError') {
157
+ log.info('WSS sendMessage timeout', { err });
158
+ void this.checkServerFailure(serverName, index);
159
+ } else {
160
+ log.warn(`error sending to ${serverName}`, { err });
161
+ void this.checkServerFailure(serverName, index);
162
+ }
163
+ });
157
164
  });
158
165
  }
159
166
 
160
167
  @synchronized
161
- async checkServerFailure(serverName: string) {
168
+ async checkServerFailure(serverName: string, index: number) {
162
169
  const failureCount = this.failureCount.get(serverName!) ?? 0;
163
- if (failureCount > MAX_SERVER_FAILURES) {
164
- log.warn(`too many failures sending to ${serverName} (${failureCount} > ${MAX_SERVER_FAILURES}), restarting`);
170
+ const isRestartRequired = failureCount > MAX_SERVER_FAILURES;
171
+ this._monitor.recordServerFailure({ serverName, willRestart: isRestartRequired });
172
+ if (isRestartRequired) {
173
+ if (!BitField.get(this._failedServersBitfield, index)) {
174
+ log.warn('too many failures for ws-server, restarting', { serverName, failureCount });
175
+ BitField.set(this._failedServersBitfield, index, true);
176
+ }
165
177
  await this.restartServer(serverName!);
166
178
  this.failureCount.set(serverName!, 0);
167
179
  return;
@@ -170,6 +182,14 @@ export class WebsocketSignalManager implements SignalManager {
170
182
  this.failureCount.set(serverName!, (this.failureCount.get(serverName!) ?? 0) + 1);
171
183
  }
172
184
 
185
+ private _clearServerFailedFlag(serverName: string, index: number) {
186
+ if (BitField.get(this._failedServersBitfield, index)) {
187
+ log.info('server connection restored', { serverName });
188
+ BitField.set(this._failedServersBitfield, index, false);
189
+ this.failureCount.set(serverName!, 0);
190
+ }
191
+ }
192
+
173
193
  async subscribeMessages(peerId: PublicKey) {
174
194
  log('subscribed for message stream', { peerId });
175
195
  invariant(this._opened, 'Closed');
@@ -191,8 +211,10 @@ export class WebsocketSignalManager implements SignalManager {
191
211
  }
192
212
 
193
213
  private async _forEachServer<ReturnType>(
194
- fn: (server: SignalMethods, serverName: string) => Promise<ReturnType>,
214
+ fn: (server: SignalMethods, serverName: string, index: number) => Promise<ReturnType>,
195
215
  ): Promise<ReturnType[]> {
196
- return Promise.all(Array.from(this._servers.entries()).map(([serverName, server]) => fn(server, serverName)));
216
+ return Promise.all(
217
+ Array.from(this._servers.entries()).map(([serverName, server], idx) => fn(server, serverName, idx)),
218
+ );
197
219
  }
198
220
  }
@@ -2,14 +2,16 @@
2
2
  // Copyright 2022 DXOS.org
3
3
  //
4
4
 
5
- import { type Any } from '@dxos/codec-protobuf';
6
5
  import { type PublicKey } from '@dxos/keys';
7
6
  import { type SignalState } from '@dxos/protocols/proto/dxos/mesh/signal';
8
7
 
9
8
  export interface Message {
10
9
  author: PublicKey;
11
10
  recipient: PublicKey;
12
- payload: Any;
11
+ payload: {
12
+ type_url: string;
13
+ value: Uint8Array;
14
+ };
13
15
  }
14
16
 
15
17
  export type SignalStatus = {
@@ -43,6 +45,7 @@ export interface SignalMethods {
43
45
  /**
44
46
  * Start receiving messages from peer.
45
47
  */
48
+ // TODO(burdon): Return unsubscribe function. Encapsulate callback/routing here.
46
49
  subscribeMessages: (peerId: PublicKey) => Promise<void>;
47
50
 
48
51
  /**
@@ -55,7 +58,7 @@ export interface SignalMethods {
55
58
  * Signaling client.
56
59
  */
57
60
  export interface SignalClientMethods extends SignalMethods {
58
- open(): Promise<void>;
59
- close(): Promise<void>;
61
+ open(): Promise<this>;
62
+ close(): Promise<this>;
60
63
  getStatus(): SignalStatus;
61
64
  }