@rljson/server 0.0.13 → 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.architecture.md +3 -1
- package/README.blog.md +8 -0
- package/README.trouble.md +33 -0
- package/dist/README.architecture.md +3 -1
- package/dist/README.blog.md +8 -0
- package/dist/README.trouble.md +33 -0
- package/dist/node.d.ts +7 -0
- package/dist/server.js +35 -0
- package/package.json +1 -1
package/README.architecture.md
CHANGED
|
@@ -86,7 +86,9 @@ The `Node` class sits above `Server` and `Client`, bridging `@rljson/network` to
|
|
|
86
86
|
3. **Manages transport**: Uses injectable factories (`CreateHubTransport`/`CreateClientTransport`) to create the transport layer, keeping the Node class transport-agnostic.
|
|
87
87
|
4. **Agent lifecycle**: An optional `createAgent` factory in `NodeDeps` is called on every `ready` event. The returned `AgentHandle.stop()` is called before the next role transition. This enables application-level wiring (e.g. FsAgent) without circular dependencies.
|
|
88
88
|
5. **Serialized transitions**: Role transitions are queued — a new `role-changed` event waits for the previous transition to complete before starting. This prevents race conditions between teardown and setup.
|
|
89
|
-
6. **
|
|
89
|
+
6. **Hub-changed reconnect** (v0.0.14): Subscribes to `NetworkManager`'s `hub-changed` event in addition to `role-changed`. When the hub changes but the node's role stays `client`, the node tears down its connection and reconnects to the new hub. This prevents split-brain scenarios where clients remain attached to a stale hub.
|
|
90
|
+
7. **Clean socket teardown** (v0.0.14): `_tearDownCurrentRole()` explicitly calls `disconnect()` on client sockets before clearing the reference. This prevents orphaned Socket.IO connections from auto-reconnecting to the old hub.
|
|
91
|
+
8. **Error resilience**: Errors in user-provided code (agent factories, transport factories) are caught and logged. The node continues functioning — a failed transport degrades connectivity but doesn't crash, a failed agent leaves the node's core intact.
|
|
90
92
|
|
|
91
93
|
```text
|
|
92
94
|
┌─────────────────────────────────────────┐
|
package/README.blog.md
CHANGED
|
@@ -17,3 +17,11 @@ Add posts as Markdown entries in this file (newest last). Keep each post small a
|
|
|
17
17
|
- Why it matters
|
|
18
18
|
- Links: PRs, docs, demos
|
|
19
19
|
```
|
|
20
|
+
|
|
21
|
+
## 2026-03-20 — v0.0.14: Split-brain fix and hub-changed reconnect
|
|
22
|
+
|
|
23
|
+
- Node class now listens to `hub-changed` events from NetworkManager — clients reconnect when hub changes but role stays `client`
|
|
24
|
+
- `_tearDownCurrentRole()` explicitly disconnects sockets before clearing references — prevents orphaned connections
|
|
25
|
+
- Validated on 4-node Windows test lab: E2E Reports 18 & 19 both score **38/41 passed, 0 failures**
|
|
26
|
+
- Previous Report 17 showed split-brain (two simultaneous hubs, 23/41 passed) — now fully resolved
|
|
27
|
+
- PR: https://github.com/rljson/server/pull/14
|
package/README.trouble.md
CHANGED
|
@@ -10,9 +10,42 @@ found in the LICENSE file in the root of this package.
|
|
|
10
10
|
|
|
11
11
|
## Table of contents <!-- omit in toc -->
|
|
12
12
|
|
|
13
|
+
- [Split-Brain: Clients not reconnecting on hub change (fixed in v0.0.14)](#split-brain-clients-not-reconnecting-on-hub-change-fixed-in-v0014)
|
|
13
14
|
- [Vscode Windows: Debugging is not working](#vscode-windows-debugging-is-not-working)
|
|
14
15
|
- [Test Isolation: Socket.IO event listener accumulation](#test-isolation-socketio-event-listener-accumulation)
|
|
15
16
|
|
|
17
|
+
## Split-Brain: Clients not reconnecting on hub change (fixed in v0.0.14)
|
|
18
|
+
|
|
19
|
+
Date: 2026-03-20
|
|
20
|
+
|
|
21
|
+
**Problem:**
|
|
22
|
+
|
|
23
|
+
In a 4-node deployment, two nodes simultaneously acted as hub (split-brain). Clients stayed connected to the old hub while a new hub was elected. File sync stopped working because the hub had no real clients.
|
|
24
|
+
|
|
25
|
+
**Symptoms:**
|
|
26
|
+
|
|
27
|
+
- E2E Report 17: 23/41 passed, 18 failed
|
|
28
|
+
- Two nodes reporting `role=hub` simultaneously
|
|
29
|
+
- Files written by one hub never appearing on clients
|
|
30
|
+
- File counts diverging between nodes (hub accumulating files, clients stuck)
|
|
31
|
+
|
|
32
|
+
**Root Cause:**
|
|
33
|
+
|
|
34
|
+
Two bugs in the `Node` class:
|
|
35
|
+
|
|
36
|
+
1. **Missing `hub-changed` listener**: Node only subscribed to `role-changed` from NetworkManager. When the hub changed but the node's role stayed `client`, the `role-changed` handler skipped (same role). Clients never reconnected to the new hub.
|
|
37
|
+
|
|
38
|
+
2. **No socket disconnect on teardown**: `_tearDownCurrentRole()` set `_clientSocket = undefined` without calling `disconnect()`. The orphaned Socket.IO connection kept auto-reconnecting to the old hub (especially with the `socket.connect()` reconnect fix from v0.0.13).
|
|
39
|
+
|
|
40
|
+
**Solution (v0.0.14):**
|
|
41
|
+
|
|
42
|
+
1. Added `_onHubChanged` listener that tears down and reconnects when hub changes while role stays `client`
|
|
43
|
+
2. Added explicit `socket.disconnect()` call in `_tearDownCurrentRole()` before clearing the reference
|
|
44
|
+
|
|
45
|
+
**Validation:**
|
|
46
|
+
|
|
47
|
+
- E2E Reports 18 & 19: **38/41 passed, 0 failures** on 4-node test lab
|
|
48
|
+
|
|
16
49
|
## Vscode Windows: Debugging is not working
|
|
17
50
|
|
|
18
51
|
Date: 2025-03-08
|
|
@@ -86,7 +86,9 @@ The `Node` class sits above `Server` and `Client`, bridging `@rljson/network` to
|
|
|
86
86
|
3. **Manages transport**: Uses injectable factories (`CreateHubTransport`/`CreateClientTransport`) to create the transport layer, keeping the Node class transport-agnostic.
|
|
87
87
|
4. **Agent lifecycle**: An optional `createAgent` factory in `NodeDeps` is called on every `ready` event. The returned `AgentHandle.stop()` is called before the next role transition. This enables application-level wiring (e.g. FsAgent) without circular dependencies.
|
|
88
88
|
5. **Serialized transitions**: Role transitions are queued — a new `role-changed` event waits for the previous transition to complete before starting. This prevents race conditions between teardown and setup.
|
|
89
|
-
6. **
|
|
89
|
+
6. **Hub-changed reconnect** (v0.0.14): Subscribes to `NetworkManager`'s `hub-changed` event in addition to `role-changed`. When the hub changes but the node's role stays `client`, the node tears down its connection and reconnects to the new hub. This prevents split-brain scenarios where clients remain attached to a stale hub.
|
|
90
|
+
7. **Clean socket teardown** (v0.0.14): `_tearDownCurrentRole()` explicitly calls `disconnect()` on client sockets before clearing the reference. This prevents orphaned Socket.IO connections from auto-reconnecting to the old hub.
|
|
91
|
+
8. **Error resilience**: Errors in user-provided code (agent factories, transport factories) are caught and logged. The node continues functioning — a failed transport degrades connectivity but doesn't crash, a failed agent leaves the node's core intact.
|
|
90
92
|
|
|
91
93
|
```text
|
|
92
94
|
┌─────────────────────────────────────────┐
|
package/dist/README.blog.md
CHANGED
|
@@ -17,3 +17,11 @@ Add posts as Markdown entries in this file (newest last). Keep each post small a
|
|
|
17
17
|
- Why it matters
|
|
18
18
|
- Links: PRs, docs, demos
|
|
19
19
|
```
|
|
20
|
+
|
|
21
|
+
## 2026-03-20 — v0.0.14: Split-brain fix and hub-changed reconnect
|
|
22
|
+
|
|
23
|
+
- Node class now listens to `hub-changed` events from NetworkManager — clients reconnect when hub changes but role stays `client`
|
|
24
|
+
- `_tearDownCurrentRole()` explicitly disconnects sockets before clearing references — prevents orphaned connections
|
|
25
|
+
- Validated on 4-node Windows test lab: E2E Reports 18 & 19 both score **38/41 passed, 0 failures**
|
|
26
|
+
- Previous Report 17 showed split-brain (two simultaneous hubs, 23/41 passed) — now fully resolved
|
|
27
|
+
- PR: https://github.com/rljson/server/pull/14
|
package/dist/README.trouble.md
CHANGED
|
@@ -10,9 +10,42 @@ found in the LICENSE file in the root of this package.
|
|
|
10
10
|
|
|
11
11
|
## Table of contents <!-- omit in toc -->
|
|
12
12
|
|
|
13
|
+
- [Split-Brain: Clients not reconnecting on hub change (fixed in v0.0.14)](#split-brain-clients-not-reconnecting-on-hub-change-fixed-in-v0014)
|
|
13
14
|
- [Vscode Windows: Debugging is not working](#vscode-windows-debugging-is-not-working)
|
|
14
15
|
- [Test Isolation: Socket.IO event listener accumulation](#test-isolation-socketio-event-listener-accumulation)
|
|
15
16
|
|
|
17
|
+
## Split-Brain: Clients not reconnecting on hub change (fixed in v0.0.14)
|
|
18
|
+
|
|
19
|
+
Date: 2026-03-20
|
|
20
|
+
|
|
21
|
+
**Problem:**
|
|
22
|
+
|
|
23
|
+
In a 4-node deployment, two nodes simultaneously acted as hub (split-brain). Clients stayed connected to the old hub while a new hub was elected. File sync stopped working because the hub had no real clients.
|
|
24
|
+
|
|
25
|
+
**Symptoms:**
|
|
26
|
+
|
|
27
|
+
- E2E Report 17: 23/41 passed, 18 failed
|
|
28
|
+
- Two nodes reporting `role=hub` simultaneously
|
|
29
|
+
- Files written by one hub never appearing on clients
|
|
30
|
+
- File counts diverging between nodes (hub accumulating files, clients stuck)
|
|
31
|
+
|
|
32
|
+
**Root Cause:**
|
|
33
|
+
|
|
34
|
+
Two bugs in the `Node` class:
|
|
35
|
+
|
|
36
|
+
1. **Missing `hub-changed` listener**: Node only subscribed to `role-changed` from NetworkManager. When the hub changed but the node's role stayed `client`, the `role-changed` handler skipped (same role). Clients never reconnected to the new hub.
|
|
37
|
+
|
|
38
|
+
2. **No socket disconnect on teardown**: `_tearDownCurrentRole()` set `_clientSocket = undefined` without calling `disconnect()`. The orphaned Socket.IO connection kept auto-reconnecting to the old hub (especially with the `socket.connect()` reconnect fix from v0.0.13).
|
|
39
|
+
|
|
40
|
+
**Solution (v0.0.14):**
|
|
41
|
+
|
|
42
|
+
1. Added `_onHubChanged` listener that tears down and reconnects when hub changes while role stays `client`
|
|
43
|
+
2. Added explicit `socket.disconnect()` call in `_tearDownCurrentRole()` before clearing the reference
|
|
44
|
+
|
|
45
|
+
**Validation:**
|
|
46
|
+
|
|
47
|
+
- E2E Reports 18 & 19: **38/41 passed, 0 failures** on 4-node test lab
|
|
48
|
+
|
|
16
49
|
## Vscode Windows: Debugging is not working
|
|
17
50
|
|
|
18
51
|
Date: 2025-03-08
|
package/dist/node.d.ts
CHANGED
|
@@ -163,6 +163,13 @@ export declare class Node {
|
|
|
163
163
|
* @param cb - Callback
|
|
164
164
|
*/
|
|
165
165
|
off<E extends NodeEventName>(event: E, cb: NodeEvents[E]): void;
|
|
166
|
+
/**
|
|
167
|
+
* Handle hub-changed while role stays 'client'.
|
|
168
|
+
* When NetworkManager emits hub-changed but NOT role-changed (because
|
|
169
|
+
* old role === new role === 'client'), the Node must tear down the old
|
|
170
|
+
* client connection and reconnect to the new hub.
|
|
171
|
+
*/
|
|
172
|
+
private _onHubChanged;
|
|
166
173
|
private _onRoleChanged;
|
|
167
174
|
private _performTransition;
|
|
168
175
|
private _becomeHub;
|
package/dist/server.js
CHANGED
|
@@ -1440,6 +1440,7 @@ class Node {
|
|
|
1440
1440
|
this._bsMem = new BsMem();
|
|
1441
1441
|
this._running = true;
|
|
1442
1442
|
this._networkManager.on("role-changed", this._onRoleChanged);
|
|
1443
|
+
this._networkManager.on("hub-changed", this._onHubChanged);
|
|
1443
1444
|
await this._networkManager.start();
|
|
1444
1445
|
}
|
|
1445
1446
|
/**
|
|
@@ -1449,6 +1450,7 @@ class Node {
|
|
|
1449
1450
|
if (!this._running) return;
|
|
1450
1451
|
this._running = false;
|
|
1451
1452
|
this._networkManager.off("role-changed", this._onRoleChanged);
|
|
1453
|
+
this._networkManager.off("hub-changed", this._onHubChanged);
|
|
1452
1454
|
if (this._transitioning) {
|
|
1453
1455
|
await this._transitioning;
|
|
1454
1456
|
this._transitioning = void 0;
|
|
@@ -1526,6 +1528,24 @@ class Node {
|
|
|
1526
1528
|
// .........................................................................
|
|
1527
1529
|
// Role transitions
|
|
1528
1530
|
// .........................................................................
|
|
1531
|
+
/**
|
|
1532
|
+
* Handle hub-changed while role stays 'client'.
|
|
1533
|
+
* When NetworkManager emits hub-changed but NOT role-changed (because
|
|
1534
|
+
* old role === new role === 'client'), the Node must tear down the old
|
|
1535
|
+
* client connection and reconnect to the new hub.
|
|
1536
|
+
*/
|
|
1537
|
+
_onHubChanged = () => {
|
|
1538
|
+
if (!this._running || this._role !== "client") return;
|
|
1539
|
+
const topology = this._networkManager.getTopology();
|
|
1540
|
+
if (topology.myRole !== "client") return;
|
|
1541
|
+
this._logger.info("Node", "Hub changed while client — reconnecting");
|
|
1542
|
+
const prev = this._transitioning ?? Promise.resolve();
|
|
1543
|
+
this._transitioning = prev.then(async () => {
|
|
1544
|
+
if (!this._running || this._role !== "client") return;
|
|
1545
|
+
await this._tearDownCurrentRole();
|
|
1546
|
+
await this._becomeClient();
|
|
1547
|
+
});
|
|
1548
|
+
};
|
|
1529
1549
|
_onRoleChanged = (event) => {
|
|
1530
1550
|
if (!this._running) return;
|
|
1531
1551
|
const { current } = event;
|
|
@@ -1549,6 +1569,18 @@ class Node {
|
|
|
1549
1569
|
await this._becomeClient();
|
|
1550
1570
|
break;
|
|
1551
1571
|
}
|
|
1572
|
+
if (!this._running) return;
|
|
1573
|
+
const networkRole = this._networkManager.getTopology().myRole;
|
|
1574
|
+
if (networkRole !== this._role && networkRole !== "unassigned") {
|
|
1575
|
+
this._logger.info(
|
|
1576
|
+
"Node",
|
|
1577
|
+
`Reconciling stale role: node=${this._role} → network=${networkRole}`
|
|
1578
|
+
);
|
|
1579
|
+
await this._performTransition({
|
|
1580
|
+
previous: this._role,
|
|
1581
|
+
current: networkRole
|
|
1582
|
+
});
|
|
1583
|
+
}
|
|
1552
1584
|
}
|
|
1553
1585
|
async _becomeHub() {
|
|
1554
1586
|
await this._ioMem.init();
|
|
@@ -1635,6 +1667,9 @@ class Node {
|
|
|
1635
1667
|
await this._client.tearDown();
|
|
1636
1668
|
this._client = void 0;
|
|
1637
1669
|
}
|
|
1670
|
+
if (this._clientSocket && "disconnect" in this._clientSocket) {
|
|
1671
|
+
this._clientSocket.disconnect();
|
|
1672
|
+
}
|
|
1638
1673
|
this._clientSocket = void 0;
|
|
1639
1674
|
}
|
|
1640
1675
|
async _startAgent(ctx) {
|