@dxos/messaging 0.5.8 → 0.5.9-main.0a0e87d
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/browser/index.mjs +812 -559
- package/dist/lib/browser/index.mjs.map +4 -4
- package/dist/lib/browser/meta.json +1 -1
- package/dist/lib/node/index.cjs +778 -545
- package/dist/lib/node/index.cjs.map +4 -4
- package/dist/lib/node/meta.json +1 -1
- package/dist/types/src/messenger-monitor.d.ts +8 -0
- package/dist/types/src/messenger-monitor.d.ts.map +1 -0
- package/dist/types/src/messenger.d.ts +1 -0
- package/dist/types/src/messenger.d.ts.map +1 -1
- package/dist/types/src/signal-client/signal-client-monitor.d.ts +30 -0
- package/dist/types/src/signal-client/signal-client-monitor.d.ts.map +1 -0
- package/dist/types/src/signal-client/signal-client.d.ts +25 -50
- package/dist/types/src/signal-client/signal-client.d.ts.map +1 -1
- package/dist/types/src/signal-client/signal-local-state.d.ts +46 -0
- package/dist/types/src/signal-client/signal-local-state.d.ts.map +1 -0
- package/dist/types/src/signal-client/signal-rpc-client-monitor.d.ts +6 -0
- package/dist/types/src/signal-client/signal-rpc-client-monitor.d.ts.map +1 -0
- package/dist/types/src/signal-client/signal-rpc-client.d.ts +4 -2
- package/dist/types/src/signal-client/signal-rpc-client.d.ts.map +1 -1
- package/dist/types/src/signal-manager/memory-signal-manager.d.ts +0 -2
- package/dist/types/src/signal-manager/memory-signal-manager.d.ts.map +1 -1
- package/dist/types/src/signal-manager/signal-manager.d.ts +0 -2
- package/dist/types/src/signal-manager/signal-manager.d.ts.map +1 -1
- package/dist/types/src/signal-manager/websocket-signal-manager-monitor.d.ts +8 -0
- package/dist/types/src/signal-manager/websocket-signal-manager-monitor.d.ts.map +1 -0
- package/dist/types/src/signal-manager/websocket-signal-manager.d.ts +7 -3
- package/dist/types/src/signal-manager/websocket-signal-manager.d.ts.map +1 -1
- package/dist/types/src/signal-methods.d.ts +6 -4
- package/dist/types/src/signal-methods.d.ts.map +1 -1
- package/package.json +13 -12
- package/src/messenger-monitor.ts +20 -0
- package/src/messenger.ts +16 -5
- package/src/signal-client/signal-client-monitor.ts +111 -0
- package/src/signal-client/signal-client.test.ts +111 -259
- package/src/signal-client/signal-client.ts +141 -252
- package/src/signal-client/signal-local-state.ts +156 -0
- package/src/signal-client/signal-rpc-client-monitor.ts +15 -0
- package/src/signal-client/signal-rpc-client.ts +38 -21
- package/src/signal-manager/memory-signal-manager.ts +0 -2
- package/src/signal-manager/signal-manager.ts +0 -3
- package/src/signal-manager/websocket-signal-manager-monitor.ts +20 -0
- package/src/signal-manager/websocket-signal-manager.ts +48 -26
- package/src/signal-methods.ts +7 -4
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Copyright 2024 DXOS.org
|
|
3
|
+
//
|
|
4
|
+
|
|
5
|
+
import { asyncTimeout, Event } from '@dxos/async';
|
|
6
|
+
import type { Any, Stream } from '@dxos/codec-protobuf';
|
|
7
|
+
import { cancelWithContext, type Context } from '@dxos/context';
|
|
8
|
+
import { PublicKey } from '@dxos/keys';
|
|
9
|
+
import { log } from '@dxos/log';
|
|
10
|
+
import { type Message as SignalMessage, type SwarmEvent } from '@dxos/protocols/proto/dxos/mesh/signal';
|
|
11
|
+
import { ComplexMap, ComplexSet, safeAwaitAll } from '@dxos/util';
|
|
12
|
+
|
|
13
|
+
import { type SignalRPCClient } from './signal-rpc-client';
|
|
14
|
+
import type { Message } from '../signal-methods';
|
|
15
|
+
|
|
16
|
+
export class SignalLocalState {
|
|
17
|
+
/**
|
|
18
|
+
* Swarm events streams. Keys represent actually joined topic and peerId.
|
|
19
|
+
*/
|
|
20
|
+
private readonly _swarmStreams = new ComplexMap<{ topic: PublicKey; peerId: PublicKey }, Stream<SwarmEvent>>(
|
|
21
|
+
({ topic, peerId }) => topic.toHex() + peerId.toHex(),
|
|
22
|
+
);
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Represent desired joined topic and peerId.
|
|
26
|
+
*/
|
|
27
|
+
private readonly _joinedTopics = new ComplexSet<{ topic: PublicKey; peerId: PublicKey }>(
|
|
28
|
+
({ topic, peerId }) => topic.toHex() + peerId.toHex(),
|
|
29
|
+
);
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Represent desired message subscriptions.
|
|
33
|
+
*/
|
|
34
|
+
private readonly _subscribedMessages = new ComplexSet<{ peerId: PublicKey }>(({ peerId }) => peerId.toHex());
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Message streams. Keys represents actually subscribed peers.
|
|
38
|
+
* @internal
|
|
39
|
+
*/
|
|
40
|
+
readonly messageStreams = new ComplexMap<PublicKey, Stream<SignalMessage>>((key) => key.toHex());
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Event to use in tests to wait till subscription is successfully established.
|
|
44
|
+
* @internal
|
|
45
|
+
*/
|
|
46
|
+
readonly reconciled = new Event();
|
|
47
|
+
|
|
48
|
+
constructor(
|
|
49
|
+
private readonly _onMessage: (params: { author: PublicKey; recipient: PublicKey; payload: Any }) => Promise<void>,
|
|
50
|
+
private readonly _onSwarmEvent: (params: { topic: PublicKey; swarmEvent: SwarmEvent }) => Promise<void>,
|
|
51
|
+
) {}
|
|
52
|
+
|
|
53
|
+
async safeCloseStreams(): Promise<{ failureCount: number }> {
|
|
54
|
+
const streams = ([...this._swarmStreams.values()] as Stream<any>[]).concat([...this.messageStreams.values()]);
|
|
55
|
+
this._swarmStreams.clear();
|
|
56
|
+
this.messageStreams.clear();
|
|
57
|
+
const failureCount = (await safeAwaitAll(streams, (s) => s.close())).length;
|
|
58
|
+
return { failureCount };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
join({ topic, peerId }: { topic: PublicKey; peerId: PublicKey }) {
|
|
62
|
+
this._joinedTopics.add({ topic, peerId });
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
leave({ topic, peerId }: { topic: PublicKey; peerId: PublicKey }) {
|
|
66
|
+
void this._swarmStreams.get({ topic, peerId })?.close();
|
|
67
|
+
this._swarmStreams.delete({ topic, peerId });
|
|
68
|
+
this._joinedTopics.delete({ topic, peerId });
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
subscribeMessages(peerId: PublicKey) {
|
|
72
|
+
this._subscribedMessages.add({ peerId });
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
unsubscribeMessages(peerId: PublicKey) {
|
|
76
|
+
log('unsubscribing from messages', { peerId });
|
|
77
|
+
this._subscribedMessages.delete({ peerId });
|
|
78
|
+
void this.messageStreams.get(peerId)?.close();
|
|
79
|
+
this.messageStreams.delete(peerId);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
public async reconcile(ctx: Context, client: SignalRPCClient) {
|
|
83
|
+
await this._reconcileSwarmSubscriptions(ctx, client);
|
|
84
|
+
await this._reconcileMessageSubscriptions(ctx, client);
|
|
85
|
+
this.reconciled.emit();
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
private async _reconcileSwarmSubscriptions(ctx: Context, client: SignalRPCClient): Promise<void> {
|
|
89
|
+
// Unsubscribe from topics that are no longer needed.
|
|
90
|
+
for (const { topic, peerId } of this._swarmStreams.keys()) {
|
|
91
|
+
// Join desired topics.
|
|
92
|
+
if (this._joinedTopics.has({ topic, peerId })) {
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
void this._swarmStreams.get({ topic, peerId })?.close();
|
|
97
|
+
this._swarmStreams.delete({ topic, peerId });
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Subscribe to topics that are needed.
|
|
101
|
+
for (const { topic, peerId } of this._joinedTopics.values()) {
|
|
102
|
+
// Join desired topics.
|
|
103
|
+
if (this._swarmStreams.has({ topic, peerId })) {
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const swarmStream = await asyncTimeout(cancelWithContext(ctx, client.join({ topic, peerId })), 5_000);
|
|
108
|
+
// Subscribing to swarm events.
|
|
109
|
+
// TODO(mykola): What happens when the swarm stream is closed? Maybe send leave event for each peer?
|
|
110
|
+
swarmStream.subscribe(async (swarmEvent: SwarmEvent) => {
|
|
111
|
+
if (this._joinedTopics.has({ topic, peerId })) {
|
|
112
|
+
log('swarm event', { swarmEvent });
|
|
113
|
+
await this._onSwarmEvent({ topic, swarmEvent });
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
// Saving swarm stream.
|
|
118
|
+
this._swarmStreams.set({ topic, peerId }, swarmStream);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
private async _reconcileMessageSubscriptions(ctx: Context, client: SignalRPCClient): Promise<void> {
|
|
123
|
+
// Unsubscribe from messages that are no longer needed.
|
|
124
|
+
for (const peerId of this.messageStreams.keys()) {
|
|
125
|
+
// Join desired topics.
|
|
126
|
+
if (this._subscribedMessages.has({ peerId })) {
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
void this.messageStreams.get(peerId)?.close();
|
|
131
|
+
this.messageStreams.delete(peerId);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Subscribe to messages that are needed.
|
|
135
|
+
for (const { peerId } of this._subscribedMessages.values()) {
|
|
136
|
+
if (this.messageStreams.has(peerId)) {
|
|
137
|
+
continue;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const messageStream = await asyncTimeout(cancelWithContext(ctx, client.receiveMessages(peerId)), 5_000);
|
|
141
|
+
messageStream.subscribe(async (signalMessage: SignalMessage) => {
|
|
142
|
+
if (this._subscribedMessages.has({ peerId })) {
|
|
143
|
+
const message: Message = {
|
|
144
|
+
author: PublicKey.from(signalMessage.author),
|
|
145
|
+
recipient: PublicKey.from(signalMessage.recipient),
|
|
146
|
+
payload: signalMessage.payload,
|
|
147
|
+
};
|
|
148
|
+
await this._onMessage(message);
|
|
149
|
+
}
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
// Saving message stream.
|
|
153
|
+
this.messageStreams.set(peerId, messageStream);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Copyright 2024 DXOS.org
|
|
3
|
+
//
|
|
4
|
+
|
|
5
|
+
import { trace } from '@dxos/tracing';
|
|
6
|
+
|
|
7
|
+
export class SignalRpcClientMonitor {
|
|
8
|
+
public recordClientCloseFailure(params: { failureReason: string }) {
|
|
9
|
+
trace.metrics.increment('mesh.signal.signal-rpc-client.close-failure', 1, {
|
|
10
|
+
tags: {
|
|
11
|
+
reason: params.failureReason,
|
|
12
|
+
},
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
}
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
import WebSocket from 'isomorphic-ws';
|
|
6
6
|
|
|
7
|
-
import { scheduleTaskInterval, Trigger } from '@dxos/async';
|
|
7
|
+
import { scheduleTaskInterval, TimeoutError, Trigger } from '@dxos/async';
|
|
8
8
|
import { type Any, type Stream } from '@dxos/codec-protobuf';
|
|
9
9
|
import { Context } from '@dxos/context';
|
|
10
10
|
import { invariant } from '@dxos/invariant';
|
|
@@ -14,6 +14,8 @@ import { schema, trace } from '@dxos/protocols';
|
|
|
14
14
|
import { type Message as SignalMessage, type Signal } from '@dxos/protocols/proto/dxos/mesh/signal';
|
|
15
15
|
import { createProtoRpcPeer, type ProtoRpcPeer } from '@dxos/rpc';
|
|
16
16
|
|
|
17
|
+
import { SignalRpcClientMonitor } from './signal-rpc-client-monitor';
|
|
18
|
+
|
|
17
19
|
const SIGNAL_KEEPALIVE_INTERVAL = 10000;
|
|
18
20
|
|
|
19
21
|
interface Services {
|
|
@@ -39,9 +41,10 @@ export type SignalRPCClientParams = {
|
|
|
39
41
|
};
|
|
40
42
|
|
|
41
43
|
export class SignalRPCClient {
|
|
42
|
-
private _socket
|
|
43
|
-
private _rpc
|
|
44
|
+
private readonly _socket: WebSocket;
|
|
45
|
+
private readonly _rpc: ProtoRpcPeer<Services>;
|
|
44
46
|
private readonly _connectTrigger = new Trigger();
|
|
47
|
+
|
|
45
48
|
private _keepaliveCtx?: Context;
|
|
46
49
|
|
|
47
50
|
private _closed = false;
|
|
@@ -50,6 +53,8 @@ export class SignalRPCClient {
|
|
|
50
53
|
private readonly _callbacks: SignalCallbacks;
|
|
51
54
|
private readonly _closeComplete = new Trigger();
|
|
52
55
|
|
|
56
|
+
private readonly _monitor = new SignalRpcClientMonitor();
|
|
57
|
+
|
|
53
58
|
constructor({ url, callbacks = {} }: SignalRPCClientParams) {
|
|
54
59
|
const traceId = PublicKey.random().toHex();
|
|
55
60
|
log.trace('dxos.mesh.signal-rpc-client.constructor', trace.begin({ id: traceId }));
|
|
@@ -92,6 +97,10 @@ export class SignalRPCClient {
|
|
|
92
97
|
this._socket.onopen = async () => {
|
|
93
98
|
try {
|
|
94
99
|
await this._rpc!.open();
|
|
100
|
+
if (this._closed) {
|
|
101
|
+
await this._safeCloseRpc();
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
95
104
|
log(`RPC open ${this._url}`);
|
|
96
105
|
this._callbacks.onConnected?.();
|
|
97
106
|
this._connectTrigger.wake();
|
|
@@ -109,6 +118,8 @@ export class SignalRPCClient {
|
|
|
109
118
|
);
|
|
110
119
|
} catch (err: any) {
|
|
111
120
|
this._callbacks.onError?.(err);
|
|
121
|
+
this._socket.close();
|
|
122
|
+
this._closed = true;
|
|
112
123
|
}
|
|
113
124
|
};
|
|
114
125
|
|
|
@@ -121,47 +132,46 @@ export class SignalRPCClient {
|
|
|
121
132
|
|
|
122
133
|
this._socket.onerror = async (event: WebSocket.ErrorEvent) => {
|
|
123
134
|
if (this._closed) {
|
|
124
|
-
|
|
135
|
+
this._socket.close();
|
|
125
136
|
return;
|
|
126
137
|
}
|
|
138
|
+
this._closed = true;
|
|
127
139
|
|
|
128
140
|
this._callbacks.onError?.(event.error ?? new Error(event.message));
|
|
129
|
-
this.
|
|
130
|
-
|
|
131
|
-
try {
|
|
132
|
-
await this._rpc?.close();
|
|
133
|
-
} catch (err) {
|
|
134
|
-
log.catch(err);
|
|
135
|
-
}
|
|
136
|
-
this._closed = true;
|
|
141
|
+
await this._safeCloseRpc();
|
|
137
142
|
|
|
138
|
-
log.warn(event.
|
|
143
|
+
log.warn(`Socket ${event.type ?? 'unknown'} error`, { message: event.message, url: this._url });
|
|
139
144
|
};
|
|
140
145
|
|
|
141
146
|
log.trace('dxos.mesh.signal-rpc-client.constructor', trace.end({ id: traceId }));
|
|
142
147
|
}
|
|
143
148
|
|
|
144
149
|
async close() {
|
|
145
|
-
|
|
150
|
+
if (this._closed) {
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
146
153
|
this._closed = true;
|
|
154
|
+
|
|
155
|
+
await this._keepaliveCtx?.dispose();
|
|
147
156
|
try {
|
|
148
|
-
await this.
|
|
157
|
+
await this._safeCloseRpc();
|
|
149
158
|
|
|
150
|
-
if (this._socket
|
|
159
|
+
if (this._socket.readyState === WebSocket.OPEN || this._socket.readyState === WebSocket.CONNECTING) {
|
|
151
160
|
// close() only starts the closing handshake.
|
|
152
161
|
this._socket.close();
|
|
153
162
|
}
|
|
163
|
+
|
|
154
164
|
await this._closeComplete.wait({ timeout: 1_000 });
|
|
155
165
|
} catch (err) {
|
|
156
|
-
|
|
166
|
+
const failureReason = err instanceof TimeoutError ? 'timeout' : err?.constructor?.name ?? 'unknown';
|
|
167
|
+
this._monitor.recordClientCloseFailure({ failureReason });
|
|
157
168
|
}
|
|
158
169
|
}
|
|
159
170
|
|
|
160
171
|
async join({ topic, peerId }: { topic: PublicKey; peerId: PublicKey }) {
|
|
161
172
|
log('join', { topic, peerId, metadata: this._callbacks?.getMetadata?.() });
|
|
162
|
-
await this._connectTrigger.wait();
|
|
163
173
|
invariant(!this._closed, 'SignalRPCClient is closed');
|
|
164
|
-
|
|
174
|
+
await this._connectTrigger.wait();
|
|
165
175
|
const swarmStream = this._rpc.rpc.Signal.join({
|
|
166
176
|
swarm: topic.asUint8Array(),
|
|
167
177
|
peer: peerId.asUint8Array(),
|
|
@@ -175,7 +185,6 @@ export class SignalRPCClient {
|
|
|
175
185
|
log('receiveMessages', { peerId });
|
|
176
186
|
invariant(!this._closed, 'SignalRPCClient is closed');
|
|
177
187
|
await this._connectTrigger.wait();
|
|
178
|
-
invariant(this._rpc, 'Rpc is not initialized');
|
|
179
188
|
const messageStream = this._rpc.rpc.Signal.receiveMessages({
|
|
180
189
|
peer: peerId.asUint8Array(),
|
|
181
190
|
});
|
|
@@ -187,7 +196,6 @@ export class SignalRPCClient {
|
|
|
187
196
|
log('sendMessage', { author, recipient, payload, metadata: this._callbacks?.getMetadata?.() });
|
|
188
197
|
invariant(!this._closed, 'SignalRPCClient is closed');
|
|
189
198
|
await this._connectTrigger.wait();
|
|
190
|
-
invariant(this._rpc, 'Rpc is not initialized');
|
|
191
199
|
await this._rpc.rpc.Signal.sendMessage({
|
|
192
200
|
author: author.asUint8Array(),
|
|
193
201
|
recipient: recipient.asUint8Array(),
|
|
@@ -195,4 +203,13 @@ export class SignalRPCClient {
|
|
|
195
203
|
metadata: this._callbacks?.getMetadata?.(),
|
|
196
204
|
});
|
|
197
205
|
}
|
|
206
|
+
|
|
207
|
+
private async _safeCloseRpc() {
|
|
208
|
+
try {
|
|
209
|
+
this._connectTrigger.reset();
|
|
210
|
+
await this._rpc.close();
|
|
211
|
+
} catch (err) {
|
|
212
|
+
log.catch(err);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
198
215
|
}
|
|
@@ -13,7 +13,6 @@ import { type SwarmEvent } from '@dxos/protocols/proto/dxos/mesh/signal';
|
|
|
13
13
|
import { ComplexMap, ComplexSet } from '@dxos/util';
|
|
14
14
|
|
|
15
15
|
import { type SignalManager } from './signal-manager';
|
|
16
|
-
import { type CommandTrace } from '../signal-client';
|
|
17
16
|
import { type SignalStatus } from '../signal-methods';
|
|
18
17
|
|
|
19
18
|
/**
|
|
@@ -38,7 +37,6 @@ export class MemorySignalManagerContext {
|
|
|
38
37
|
*/
|
|
39
38
|
export class MemorySignalManager implements SignalManager {
|
|
40
39
|
readonly statusChanged = new Event<SignalStatus[]>();
|
|
41
|
-
readonly commandTrace = new Event<CommandTrace>();
|
|
42
40
|
readonly swarmEvent = new Event<{
|
|
43
41
|
topic: PublicKey;
|
|
44
42
|
swarmEvent: SwarmEvent;
|
|
@@ -6,7 +6,6 @@ import { type Event } from '@dxos/async';
|
|
|
6
6
|
import { type PublicKey } from '@dxos/keys';
|
|
7
7
|
import { type SwarmEvent } from '@dxos/protocols/proto/dxos/mesh/signal';
|
|
8
8
|
|
|
9
|
-
import { type CommandTrace } from '../signal-client';
|
|
10
9
|
import { type Message, type SignalMethods, type SignalStatus } from '../signal-methods';
|
|
11
10
|
|
|
12
11
|
/**
|
|
@@ -15,11 +14,9 @@ import { type Message, type SignalMethods, type SignalStatus } from '../signal-m
|
|
|
15
14
|
export interface SignalManager extends SignalMethods {
|
|
16
15
|
open(): Promise<void>;
|
|
17
16
|
close(): Promise<void>;
|
|
18
|
-
|
|
19
17
|
getStatus(): SignalStatus[];
|
|
20
18
|
|
|
21
19
|
statusChanged: Event<SignalStatus[]>;
|
|
22
|
-
commandTrace: Event<CommandTrace>;
|
|
23
20
|
swarmEvent: Event<{ topic: PublicKey; swarmEvent: SwarmEvent }>;
|
|
24
21
|
onMessage: Event<Message>;
|
|
25
22
|
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
//
|
|
2
|
+
// Copyright 2024 DXOS.org
|
|
3
|
+
//
|
|
4
|
+
|
|
5
|
+
import { trace } from '@dxos/tracing';
|
|
6
|
+
|
|
7
|
+
export class WebsocketSignalManagerMonitor {
|
|
8
|
+
public recordRateLimitExceeded() {
|
|
9
|
+
trace.metrics.increment('mesh.signal.signal-manager.rate-limit-hit', 1);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
public recordServerFailure(params: { serverName: string; willRestart: boolean }) {
|
|
13
|
+
trace.metrics.increment('mesh.signal.signal-manager.server-failure', 1, {
|
|
14
|
+
tags: {
|
|
15
|
+
server: params.serverName,
|
|
16
|
+
restarted: params.willRestart,
|
|
17
|
+
},
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -11,9 +11,11 @@ import { log } from '@dxos/log';
|
|
|
11
11
|
import { RateLimitExceededError, TimeoutError, trace } from '@dxos/protocols';
|
|
12
12
|
import { type Runtime } from '@dxos/protocols/proto/dxos/config';
|
|
13
13
|
import { type SwarmEvent } from '@dxos/protocols/proto/dxos/mesh/signal';
|
|
14
|
+
import { BitField, safeAwaitAll } from '@dxos/util';
|
|
14
15
|
|
|
15
16
|
import { type SignalManager } from './signal-manager';
|
|
16
|
-
import {
|
|
17
|
+
import { WebsocketSignalManagerMonitor } from './websocket-signal-manager-monitor';
|
|
18
|
+
import { SignalClient } from '../signal-client';
|
|
17
19
|
import { type SignalClientMethods, type SignalMethods, type SignalStatus } from '../signal-methods';
|
|
18
20
|
|
|
19
21
|
const MAX_SERVER_FAILURES = 5;
|
|
@@ -24,13 +26,18 @@ const WSS_SIGNAL_SERVER_REBOOT_DELAY = 3_000;
|
|
|
24
26
|
*/
|
|
25
27
|
export class WebsocketSignalManager implements SignalManager {
|
|
26
28
|
private readonly _servers = new Map<string, SignalClientMethods>();
|
|
29
|
+
private readonly _monitor = new WebsocketSignalManagerMonitor();
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Used to avoid logging failed server restarts more than once until the server actually recovers.
|
|
33
|
+
*/
|
|
34
|
+
private readonly _failedServersBitfield: Uint8Array;
|
|
27
35
|
|
|
28
36
|
private _ctx!: Context;
|
|
29
37
|
private _opened = false;
|
|
30
38
|
|
|
31
39
|
readonly failureCount = new Map<string, number>();
|
|
32
40
|
readonly statusChanged = new Event<SignalStatus[]>();
|
|
33
|
-
readonly commandTrace = new Event<CommandTrace>();
|
|
34
41
|
readonly swarmEvent = new Event<{
|
|
35
42
|
topic: PublicKey;
|
|
36
43
|
swarmEvent: SwarmEvent;
|
|
@@ -66,8 +73,8 @@ export class WebsocketSignalManager implements SignalManager {
|
|
|
66
73
|
|
|
67
74
|
this._servers.set(host.server, server);
|
|
68
75
|
this.failureCount.set(host.server, 0);
|
|
69
|
-
server.commandTrace.on((trace) => this.commandTrace.emit(trace));
|
|
70
76
|
}
|
|
77
|
+
this._failedServersBitfield = BitField.zeros(this._hosts.length);
|
|
71
78
|
}
|
|
72
79
|
|
|
73
80
|
@synchronized
|
|
@@ -80,8 +87,7 @@ export class WebsocketSignalManager implements SignalManager {
|
|
|
80
87
|
|
|
81
88
|
this._initContext();
|
|
82
89
|
|
|
83
|
-
|
|
84
|
-
[...this._servers.values()].forEach((server) => server.open());
|
|
90
|
+
await safeAwaitAll(this._servers.values(), (server) => server.open());
|
|
85
91
|
|
|
86
92
|
this._opened = true;
|
|
87
93
|
log.trace('dxos.mesh.websocket-signal-manager.open', trace.end({ id: this._instanceId }));
|
|
@@ -93,10 +99,8 @@ export class WebsocketSignalManager implements SignalManager {
|
|
|
93
99
|
return;
|
|
94
100
|
}
|
|
95
101
|
this._opened = false;
|
|
96
|
-
|
|
97
102
|
await this._ctx.dispose();
|
|
98
|
-
|
|
99
|
-
await Promise.all(Array.from(this._servers.values()).map((server) => server.close()));
|
|
103
|
+
await safeAwaitAll(this._servers.values(), (server) => server.close());
|
|
100
104
|
}
|
|
101
105
|
|
|
102
106
|
async restartServer(serverName: string) {
|
|
@@ -126,7 +130,6 @@ export class WebsocketSignalManager implements SignalManager {
|
|
|
126
130
|
async leave({ topic, peerId }: { topic: PublicKey; peerId: PublicKey }) {
|
|
127
131
|
log('leaving', { topic, peerId });
|
|
128
132
|
invariant(this._opened, 'Closed');
|
|
129
|
-
|
|
130
133
|
await this._forEachServer((server) => server.leave({ topic, peerId }));
|
|
131
134
|
}
|
|
132
135
|
|
|
@@ -142,26 +145,35 @@ export class WebsocketSignalManager implements SignalManager {
|
|
|
142
145
|
log('signal', { recipient });
|
|
143
146
|
invariant(this._opened, 'Closed');
|
|
144
147
|
|
|
145
|
-
void this._forEachServer(async (server, serverName) => {
|
|
146
|
-
void server
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
148
|
+
void this._forEachServer(async (server, serverName, index) => {
|
|
149
|
+
void server
|
|
150
|
+
.sendMessage({ author, recipient, payload })
|
|
151
|
+
.then(() => this._clearServerFailedFlag(serverName, index))
|
|
152
|
+
.catch((err) => {
|
|
153
|
+
if (err instanceof RateLimitExceededError) {
|
|
154
|
+
log.info('WSS rate limit exceeded', { err });
|
|
155
|
+
this._monitor.recordRateLimitExceeded();
|
|
156
|
+
} else if (err instanceof TimeoutError || err.constructor.name === 'TimeoutError') {
|
|
157
|
+
log.info('WSS sendMessage timeout', { err });
|
|
158
|
+
void this.checkServerFailure(serverName, index);
|
|
159
|
+
} else {
|
|
160
|
+
log.warn(`error sending to ${serverName}`, { err });
|
|
161
|
+
void this.checkServerFailure(serverName, index);
|
|
162
|
+
}
|
|
163
|
+
});
|
|
157
164
|
});
|
|
158
165
|
}
|
|
159
166
|
|
|
160
167
|
@synchronized
|
|
161
|
-
async checkServerFailure(serverName: string) {
|
|
168
|
+
async checkServerFailure(serverName: string, index: number) {
|
|
162
169
|
const failureCount = this.failureCount.get(serverName!) ?? 0;
|
|
163
|
-
|
|
164
|
-
|
|
170
|
+
const isRestartRequired = failureCount > MAX_SERVER_FAILURES;
|
|
171
|
+
this._monitor.recordServerFailure({ serverName, willRestart: isRestartRequired });
|
|
172
|
+
if (isRestartRequired) {
|
|
173
|
+
if (!BitField.get(this._failedServersBitfield, index)) {
|
|
174
|
+
log.warn('too many failures for ws-server, restarting', { serverName, failureCount });
|
|
175
|
+
BitField.set(this._failedServersBitfield, index, true);
|
|
176
|
+
}
|
|
165
177
|
await this.restartServer(serverName!);
|
|
166
178
|
this.failureCount.set(serverName!, 0);
|
|
167
179
|
return;
|
|
@@ -170,6 +182,14 @@ export class WebsocketSignalManager implements SignalManager {
|
|
|
170
182
|
this.failureCount.set(serverName!, (this.failureCount.get(serverName!) ?? 0) + 1);
|
|
171
183
|
}
|
|
172
184
|
|
|
185
|
+
private _clearServerFailedFlag(serverName: string, index: number) {
|
|
186
|
+
if (BitField.get(this._failedServersBitfield, index)) {
|
|
187
|
+
log.info('server connection restored', { serverName });
|
|
188
|
+
BitField.set(this._failedServersBitfield, index, false);
|
|
189
|
+
this.failureCount.set(serverName!, 0);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
173
193
|
async subscribeMessages(peerId: PublicKey) {
|
|
174
194
|
log('subscribed for message stream', { peerId });
|
|
175
195
|
invariant(this._opened, 'Closed');
|
|
@@ -191,8 +211,10 @@ export class WebsocketSignalManager implements SignalManager {
|
|
|
191
211
|
}
|
|
192
212
|
|
|
193
213
|
private async _forEachServer<ReturnType>(
|
|
194
|
-
fn: (server: SignalMethods, serverName: string) => Promise<ReturnType>,
|
|
214
|
+
fn: (server: SignalMethods, serverName: string, index: number) => Promise<ReturnType>,
|
|
195
215
|
): Promise<ReturnType[]> {
|
|
196
|
-
return Promise.all(
|
|
216
|
+
return Promise.all(
|
|
217
|
+
Array.from(this._servers.entries()).map(([serverName, server], idx) => fn(server, serverName, idx)),
|
|
218
|
+
);
|
|
197
219
|
}
|
|
198
220
|
}
|
package/src/signal-methods.ts
CHANGED
|
@@ -2,14 +2,16 @@
|
|
|
2
2
|
// Copyright 2022 DXOS.org
|
|
3
3
|
//
|
|
4
4
|
|
|
5
|
-
import { type Any } from '@dxos/codec-protobuf';
|
|
6
5
|
import { type PublicKey } from '@dxos/keys';
|
|
7
6
|
import { type SignalState } from '@dxos/protocols/proto/dxos/mesh/signal';
|
|
8
7
|
|
|
9
8
|
export interface Message {
|
|
10
9
|
author: PublicKey;
|
|
11
10
|
recipient: PublicKey;
|
|
12
|
-
payload:
|
|
11
|
+
payload: {
|
|
12
|
+
type_url: string;
|
|
13
|
+
value: Uint8Array;
|
|
14
|
+
};
|
|
13
15
|
}
|
|
14
16
|
|
|
15
17
|
export type SignalStatus = {
|
|
@@ -43,6 +45,7 @@ export interface SignalMethods {
|
|
|
43
45
|
/**
|
|
44
46
|
* Start receiving messages from peer.
|
|
45
47
|
*/
|
|
48
|
+
// TODO(burdon): Return unsubscribe function. Encapsulate callback/routing here.
|
|
46
49
|
subscribeMessages: (peerId: PublicKey) => Promise<void>;
|
|
47
50
|
|
|
48
51
|
/**
|
|
@@ -55,7 +58,7 @@ export interface SignalMethods {
|
|
|
55
58
|
* Signaling client.
|
|
56
59
|
*/
|
|
57
60
|
export interface SignalClientMethods extends SignalMethods {
|
|
58
|
-
open(): Promise<
|
|
59
|
-
close(): Promise<
|
|
61
|
+
open(): Promise<this>;
|
|
62
|
+
close(): Promise<this>;
|
|
60
63
|
getStatus(): SignalStatus;
|
|
61
64
|
}
|