@topgunbuild/server 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +910 -9
- package/dist/index.d.ts +910 -9
- package/dist/index.js +2472 -170
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +3323 -1016
- package/dist/index.mjs.map +1 -1
- package/package.json +3 -3
package/dist/index.mjs
CHANGED
|
@@ -10,7 +10,7 @@ import { createServer as createHttpServer } from "http";
|
|
|
10
10
|
import { createServer as createHttpsServer } from "https";
|
|
11
11
|
import { readFileSync as readFileSync2 } from "fs";
|
|
12
12
|
import { WebSocketServer as WebSocketServer2, WebSocket as WebSocket3 } from "ws";
|
|
13
|
-
import { HLC, LWWMap as LWWMap2, ORMap as ORMap2, serialize as
|
|
13
|
+
import { HLC, LWWMap as LWWMap2, ORMap as ORMap2, serialize as serialize4, deserialize, MessageSchema, WriteConcern as WriteConcern2, ConsistencyLevel as ConsistencyLevel2, DEFAULT_REPLICATION_CONFIG as DEFAULT_REPLICATION_CONFIG2 } from "@topgunbuild/core";
|
|
14
14
|
import * as jwt from "jsonwebtoken";
|
|
15
15
|
import * as crypto from "crypto";
|
|
16
16
|
|
|
@@ -570,11 +570,268 @@ var TopicManager = class {
|
|
|
570
570
|
|
|
571
571
|
// src/cluster/ClusterManager.ts
|
|
572
572
|
import { WebSocket, WebSocketServer } from "ws";
|
|
573
|
-
import { EventEmitter } from "events";
|
|
573
|
+
import { EventEmitter as EventEmitter2 } from "events";
|
|
574
574
|
import * as dns from "dns";
|
|
575
575
|
import { readFileSync } from "fs";
|
|
576
576
|
import * as https from "https";
|
|
577
|
-
|
|
577
|
+
|
|
578
|
+
// src/cluster/FailureDetector.ts
|
|
579
|
+
import { EventEmitter } from "events";
|
|
580
|
+
var DEFAULT_FAILURE_DETECTOR_CONFIG = {
|
|
581
|
+
heartbeatIntervalMs: 1e3,
|
|
582
|
+
suspicionTimeoutMs: 5e3,
|
|
583
|
+
confirmationTimeoutMs: 1e4,
|
|
584
|
+
phiThreshold: 8,
|
|
585
|
+
minSamples: 10,
|
|
586
|
+
maxSamples: 100,
|
|
587
|
+
initialHeartbeatIntervalMs: 1e3
|
|
588
|
+
};
|
|
589
|
+
var FailureDetector = class extends EventEmitter {
|
|
590
|
+
constructor(config = {}) {
|
|
591
|
+
super();
|
|
592
|
+
this.nodeStates = /* @__PURE__ */ new Map();
|
|
593
|
+
this.monitoringNodes = /* @__PURE__ */ new Set();
|
|
594
|
+
this.confirmationTimers = /* @__PURE__ */ new Map();
|
|
595
|
+
this.started = false;
|
|
596
|
+
this.config = { ...DEFAULT_FAILURE_DETECTOR_CONFIG, ...config };
|
|
597
|
+
}
|
|
598
|
+
/**
|
|
599
|
+
* Start the failure detector monitoring loop.
|
|
600
|
+
*/
|
|
601
|
+
start() {
|
|
602
|
+
if (this.started) return;
|
|
603
|
+
this.started = true;
|
|
604
|
+
this.checkTimer = setInterval(() => {
|
|
605
|
+
this.checkAllNodes();
|
|
606
|
+
}, this.config.heartbeatIntervalMs);
|
|
607
|
+
logger.info({ config: this.config }, "FailureDetector started");
|
|
608
|
+
}
|
|
609
|
+
/**
|
|
610
|
+
* Stop the failure detector and clean up.
|
|
611
|
+
*/
|
|
612
|
+
stop() {
|
|
613
|
+
if (!this.started) return;
|
|
614
|
+
this.started = false;
|
|
615
|
+
if (this.checkTimer) {
|
|
616
|
+
clearInterval(this.checkTimer);
|
|
617
|
+
this.checkTimer = void 0;
|
|
618
|
+
}
|
|
619
|
+
for (const timer of this.confirmationTimers.values()) {
|
|
620
|
+
clearTimeout(timer);
|
|
621
|
+
}
|
|
622
|
+
this.confirmationTimers.clear();
|
|
623
|
+
logger.info("FailureDetector stopped");
|
|
624
|
+
}
|
|
625
|
+
/**
|
|
626
|
+
* Start monitoring a node.
|
|
627
|
+
*/
|
|
628
|
+
startMonitoring(nodeId) {
|
|
629
|
+
if (this.monitoringNodes.has(nodeId)) return;
|
|
630
|
+
this.monitoringNodes.add(nodeId);
|
|
631
|
+
this.nodeStates.set(nodeId, {
|
|
632
|
+
lastHeartbeat: Date.now(),
|
|
633
|
+
intervalHistory: [],
|
|
634
|
+
isSuspected: false,
|
|
635
|
+
isConfirmedFailed: false
|
|
636
|
+
});
|
|
637
|
+
logger.debug({ nodeId }, "Started monitoring node");
|
|
638
|
+
}
|
|
639
|
+
/**
|
|
640
|
+
* Stop monitoring a node.
|
|
641
|
+
*/
|
|
642
|
+
stopMonitoring(nodeId) {
|
|
643
|
+
this.monitoringNodes.delete(nodeId);
|
|
644
|
+
this.nodeStates.delete(nodeId);
|
|
645
|
+
const timer = this.confirmationTimers.get(nodeId);
|
|
646
|
+
if (timer) {
|
|
647
|
+
clearTimeout(timer);
|
|
648
|
+
this.confirmationTimers.delete(nodeId);
|
|
649
|
+
}
|
|
650
|
+
logger.debug({ nodeId }, "Stopped monitoring node");
|
|
651
|
+
}
|
|
652
|
+
/**
|
|
653
|
+
* Record a heartbeat from a node.
|
|
654
|
+
* This updates the node's state and clears any suspicion.
|
|
655
|
+
*/
|
|
656
|
+
recordHeartbeat(nodeId) {
|
|
657
|
+
const state = this.nodeStates.get(nodeId);
|
|
658
|
+
if (!state) {
|
|
659
|
+
this.startMonitoring(nodeId);
|
|
660
|
+
return;
|
|
661
|
+
}
|
|
662
|
+
const now = Date.now();
|
|
663
|
+
const interval = now - state.lastHeartbeat;
|
|
664
|
+
state.intervalHistory.push(interval);
|
|
665
|
+
if (state.intervalHistory.length > this.config.maxSamples) {
|
|
666
|
+
state.intervalHistory.shift();
|
|
667
|
+
}
|
|
668
|
+
state.lastHeartbeat = now;
|
|
669
|
+
if (state.isSuspected) {
|
|
670
|
+
state.isSuspected = false;
|
|
671
|
+
state.suspicionStartTime = void 0;
|
|
672
|
+
state.isConfirmedFailed = false;
|
|
673
|
+
const timer = this.confirmationTimers.get(nodeId);
|
|
674
|
+
if (timer) {
|
|
675
|
+
clearTimeout(timer);
|
|
676
|
+
this.confirmationTimers.delete(nodeId);
|
|
677
|
+
}
|
|
678
|
+
this.emit("nodeRecovered", { nodeId });
|
|
679
|
+
logger.info({ nodeId }, "Node recovered");
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
/**
|
|
683
|
+
* Check all monitored nodes for failure.
|
|
684
|
+
*/
|
|
685
|
+
checkAllNodes() {
|
|
686
|
+
for (const nodeId of this.monitoringNodes) {
|
|
687
|
+
const phi = this.calculatePhi(nodeId);
|
|
688
|
+
const state = this.nodeStates.get(nodeId);
|
|
689
|
+
if (!state) continue;
|
|
690
|
+
if (phi > this.config.phiThreshold) {
|
|
691
|
+
if (!state.isSuspected) {
|
|
692
|
+
state.isSuspected = true;
|
|
693
|
+
state.suspicionStartTime = Date.now();
|
|
694
|
+
this.emit("nodeSuspected", { nodeId, phi });
|
|
695
|
+
logger.warn({ nodeId, phi }, "Node suspected");
|
|
696
|
+
this.scheduleConfirmation(nodeId);
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
/**
|
|
702
|
+
* Schedule failure confirmation after suspicion timeout.
|
|
703
|
+
*/
|
|
704
|
+
scheduleConfirmation(nodeId) {
|
|
705
|
+
const existingTimer = this.confirmationTimers.get(nodeId);
|
|
706
|
+
if (existingTimer) {
|
|
707
|
+
clearTimeout(existingTimer);
|
|
708
|
+
}
|
|
709
|
+
const timer = setTimeout(() => {
|
|
710
|
+
this.confirmFailure(nodeId);
|
|
711
|
+
}, this.config.confirmationTimeoutMs);
|
|
712
|
+
this.confirmationTimers.set(nodeId, timer);
|
|
713
|
+
}
|
|
714
|
+
/**
|
|
715
|
+
* Confirm node failure after confirmation timeout.
|
|
716
|
+
*/
|
|
717
|
+
confirmFailure(nodeId) {
|
|
718
|
+
const state = this.nodeStates.get(nodeId);
|
|
719
|
+
if (!state) return;
|
|
720
|
+
if (state.isSuspected && !state.isConfirmedFailed) {
|
|
721
|
+
state.isConfirmedFailed = true;
|
|
722
|
+
this.emit("nodeConfirmedFailed", { nodeId });
|
|
723
|
+
logger.error({ nodeId }, "Node failure confirmed");
|
|
724
|
+
}
|
|
725
|
+
this.confirmationTimers.delete(nodeId);
|
|
726
|
+
}
|
|
727
|
+
/**
|
|
728
|
+
* Calculate the phi value for a node using the Phi Accrual algorithm.
|
|
729
|
+
*
|
|
730
|
+
* Phi = -log10(P_later(t_now - t_last))
|
|
731
|
+
*
|
|
732
|
+
* where P_later is the probability that a heartbeat will arrive later than expected.
|
|
733
|
+
*/
|
|
734
|
+
calculatePhi(nodeId) {
|
|
735
|
+
const state = this.nodeStates.get(nodeId);
|
|
736
|
+
if (!state) return 0;
|
|
737
|
+
const now = Date.now();
|
|
738
|
+
const timeSinceLastHeartbeat = now - state.lastHeartbeat;
|
|
739
|
+
if (state.intervalHistory.length < this.config.minSamples) {
|
|
740
|
+
const expectedInterval = this.config.initialHeartbeatIntervalMs;
|
|
741
|
+
return timeSinceLastHeartbeat / expectedInterval;
|
|
742
|
+
}
|
|
743
|
+
const mean = this.calculateMean(state.intervalHistory);
|
|
744
|
+
const variance = this.calculateVariance(state.intervalHistory, mean);
|
|
745
|
+
const stdDev = Math.sqrt(variance);
|
|
746
|
+
if (timeSinceLastHeartbeat <= mean) {
|
|
747
|
+
return 0;
|
|
748
|
+
}
|
|
749
|
+
const deviations = stdDev > 0 ? (timeSinceLastHeartbeat - mean) / stdDev : 0;
|
|
750
|
+
const phi = Math.max(0, deviations);
|
|
751
|
+
return phi;
|
|
752
|
+
}
|
|
753
|
+
/**
|
|
754
|
+
* Calculate mean of an array of numbers.
|
|
755
|
+
*/
|
|
756
|
+
calculateMean(values) {
|
|
757
|
+
if (values.length === 0) return 0;
|
|
758
|
+
return values.reduce((sum, v) => sum + v, 0) / values.length;
|
|
759
|
+
}
|
|
760
|
+
/**
|
|
761
|
+
* Calculate variance of an array of numbers.
|
|
762
|
+
*/
|
|
763
|
+
calculateVariance(values, mean) {
|
|
764
|
+
if (values.length < 2) return 0;
|
|
765
|
+
return values.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / values.length;
|
|
766
|
+
}
|
|
767
|
+
/**
|
|
768
|
+
* Get list of currently suspected nodes.
|
|
769
|
+
*/
|
|
770
|
+
getSuspectedNodes() {
|
|
771
|
+
const suspected = [];
|
|
772
|
+
for (const [nodeId, state] of this.nodeStates) {
|
|
773
|
+
if (state.isSuspected) {
|
|
774
|
+
suspected.push(nodeId);
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
return suspected;
|
|
778
|
+
}
|
|
779
|
+
/**
|
|
780
|
+
* Get list of confirmed failed nodes.
|
|
781
|
+
*/
|
|
782
|
+
getConfirmedFailedNodes() {
|
|
783
|
+
const failed = [];
|
|
784
|
+
for (const [nodeId, state] of this.nodeStates) {
|
|
785
|
+
if (state.isConfirmedFailed) {
|
|
786
|
+
failed.push(nodeId);
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
return failed;
|
|
790
|
+
}
|
|
791
|
+
/**
|
|
792
|
+
* Check if a specific node is suspected.
|
|
793
|
+
*/
|
|
794
|
+
isSuspected(nodeId) {
|
|
795
|
+
return this.nodeStates.get(nodeId)?.isSuspected ?? false;
|
|
796
|
+
}
|
|
797
|
+
/**
|
|
798
|
+
* Check if a specific node's failure is confirmed.
|
|
799
|
+
*/
|
|
800
|
+
isConfirmedFailed(nodeId) {
|
|
801
|
+
return this.nodeStates.get(nodeId)?.isConfirmedFailed ?? false;
|
|
802
|
+
}
|
|
803
|
+
/**
|
|
804
|
+
* Get the current phi value for a node.
|
|
805
|
+
*/
|
|
806
|
+
getPhi(nodeId) {
|
|
807
|
+
return this.calculatePhi(nodeId);
|
|
808
|
+
}
|
|
809
|
+
/**
|
|
810
|
+
* Get all monitored nodes.
|
|
811
|
+
*/
|
|
812
|
+
getMonitoredNodes() {
|
|
813
|
+
return Array.from(this.monitoringNodes);
|
|
814
|
+
}
|
|
815
|
+
/**
|
|
816
|
+
* Get metrics for monitoring.
|
|
817
|
+
*/
|
|
818
|
+
getMetrics() {
|
|
819
|
+
let suspectedCount = 0;
|
|
820
|
+
let confirmedCount = 0;
|
|
821
|
+
for (const state of this.nodeStates.values()) {
|
|
822
|
+
if (state.isSuspected) suspectedCount++;
|
|
823
|
+
if (state.isConfirmedFailed) confirmedCount++;
|
|
824
|
+
}
|
|
825
|
+
return {
|
|
826
|
+
monitoredNodes: this.monitoringNodes.size,
|
|
827
|
+
suspectedNodes: suspectedCount,
|
|
828
|
+
confirmedFailedNodes: confirmedCount
|
|
829
|
+
};
|
|
830
|
+
}
|
|
831
|
+
};
|
|
832
|
+
|
|
833
|
+
// src/cluster/ClusterManager.ts
|
|
834
|
+
var ClusterManager = class extends EventEmitter2 {
|
|
578
835
|
constructor(config) {
|
|
579
836
|
super();
|
|
580
837
|
this.members = /* @__PURE__ */ new Map();
|
|
@@ -582,6 +839,30 @@ var ClusterManager = class extends EventEmitter {
|
|
|
582
839
|
this.reconnectIntervals = /* @__PURE__ */ new Map();
|
|
583
840
|
this._actualPort = 0;
|
|
584
841
|
this.config = config;
|
|
842
|
+
this.failureDetector = new FailureDetector({
|
|
843
|
+
...DEFAULT_FAILURE_DETECTOR_CONFIG,
|
|
844
|
+
heartbeatIntervalMs: config.heartbeatIntervalMs ?? 1e3,
|
|
845
|
+
...config.failureDetection
|
|
846
|
+
});
|
|
847
|
+
this.failureDetector.on("nodeSuspected", (event) => {
|
|
848
|
+
logger.warn({ nodeId: event.nodeId, phi: event.phi }, "Node suspected (failure detector)");
|
|
849
|
+
this.emit("nodeSuspected", event.nodeId, event.phi);
|
|
850
|
+
});
|
|
851
|
+
this.failureDetector.on("nodeRecovered", (event) => {
|
|
852
|
+
logger.info({ nodeId: event.nodeId }, "Node recovered (failure detector)");
|
|
853
|
+
this.emit("nodeRecovered", event.nodeId);
|
|
854
|
+
});
|
|
855
|
+
this.failureDetector.on("nodeConfirmedFailed", (event) => {
|
|
856
|
+
logger.error({ nodeId: event.nodeId }, "Node failure confirmed");
|
|
857
|
+
this.emit("nodeConfirmedFailed", event.nodeId);
|
|
858
|
+
this.handleNodeFailure(event.nodeId);
|
|
859
|
+
});
|
|
860
|
+
}
|
|
861
|
+
/**
|
|
862
|
+
* Get the failure detector instance.
|
|
863
|
+
*/
|
|
864
|
+
getFailureDetector() {
|
|
865
|
+
return this.failureDetector;
|
|
585
866
|
}
|
|
586
867
|
/** Get the actual port the cluster is listening on */
|
|
587
868
|
get port() {
|
|
@@ -633,6 +914,8 @@ var ClusterManager = class extends EventEmitter {
|
|
|
633
914
|
}
|
|
634
915
|
stop() {
|
|
635
916
|
logger.info({ port: this.config.port }, "Stopping Cluster Manager");
|
|
917
|
+
this.stopHeartbeat();
|
|
918
|
+
this.failureDetector.stop();
|
|
636
919
|
for (const timeout of this.reconnectIntervals.values()) {
|
|
637
920
|
clearTimeout(timeout);
|
|
638
921
|
}
|
|
@@ -652,6 +935,61 @@ var ClusterManager = class extends EventEmitter {
|
|
|
652
935
|
this.server.close();
|
|
653
936
|
}
|
|
654
937
|
}
|
|
938
|
+
/**
|
|
939
|
+
* Start sending heartbeats to all peers.
|
|
940
|
+
*/
|
|
941
|
+
startHeartbeat() {
|
|
942
|
+
if (this.heartbeatTimer) return;
|
|
943
|
+
const intervalMs = this.config.heartbeatIntervalMs ?? 1e3;
|
|
944
|
+
this.heartbeatTimer = setInterval(() => {
|
|
945
|
+
this.sendHeartbeatToAll();
|
|
946
|
+
}, intervalMs);
|
|
947
|
+
this.failureDetector.start();
|
|
948
|
+
logger.debug({ intervalMs }, "Heartbeat started");
|
|
949
|
+
}
|
|
950
|
+
/**
|
|
951
|
+
* Stop sending heartbeats.
|
|
952
|
+
*/
|
|
953
|
+
stopHeartbeat() {
|
|
954
|
+
if (this.heartbeatTimer) {
|
|
955
|
+
clearInterval(this.heartbeatTimer);
|
|
956
|
+
this.heartbeatTimer = void 0;
|
|
957
|
+
}
|
|
958
|
+
}
|
|
959
|
+
/**
|
|
960
|
+
* Send heartbeat to all connected peers.
|
|
961
|
+
*/
|
|
962
|
+
sendHeartbeatToAll() {
|
|
963
|
+
for (const [nodeId, member] of this.members) {
|
|
964
|
+
if (member.isSelf) continue;
|
|
965
|
+
if (member.socket && member.socket.readyState === WebSocket.OPEN) {
|
|
966
|
+
this.send(nodeId, "HEARTBEAT", { timestamp: Date.now() });
|
|
967
|
+
}
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
/**
|
|
971
|
+
* Handle incoming heartbeat from a peer.
|
|
972
|
+
*/
|
|
973
|
+
handleHeartbeat(senderId, _payload) {
|
|
974
|
+
this.failureDetector.recordHeartbeat(senderId);
|
|
975
|
+
}
|
|
976
|
+
/**
|
|
977
|
+
* Handle confirmed node failure.
|
|
978
|
+
*/
|
|
979
|
+
handleNodeFailure(nodeId) {
|
|
980
|
+
const member = this.members.get(nodeId);
|
|
981
|
+
if (!member) return;
|
|
982
|
+
logger.warn({ nodeId }, "Removing failed node from cluster");
|
|
983
|
+
if (member.socket && member.socket.readyState !== WebSocket.CLOSED) {
|
|
984
|
+
try {
|
|
985
|
+
member.socket.terminate();
|
|
986
|
+
} catch (e) {
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
this.members.delete(nodeId);
|
|
990
|
+
this.failureDetector.stopMonitoring(nodeId);
|
|
991
|
+
this.emit("memberLeft", nodeId);
|
|
992
|
+
}
|
|
655
993
|
connectToPeers() {
|
|
656
994
|
for (const peer of this.config.peers) {
|
|
657
995
|
this.connectToPeer(peer);
|
|
@@ -770,7 +1108,13 @@ var ClusterManager = class extends EventEmitter {
|
|
|
770
1108
|
socket: ws,
|
|
771
1109
|
isSelf: false
|
|
772
1110
|
});
|
|
1111
|
+
this.failureDetector.startMonitoring(remoteNodeId);
|
|
1112
|
+
this.startHeartbeat();
|
|
773
1113
|
this.emit("memberJoined", remoteNodeId);
|
|
1114
|
+
} else if (msg.type === "HEARTBEAT") {
|
|
1115
|
+
if (remoteNodeId) {
|
|
1116
|
+
this.handleHeartbeat(remoteNodeId, msg.payload);
|
|
1117
|
+
}
|
|
774
1118
|
} else {
|
|
775
1119
|
this.emit("message", msg);
|
|
776
1120
|
}
|
|
@@ -784,6 +1128,7 @@ var ClusterManager = class extends EventEmitter {
|
|
|
784
1128
|
if (current && current.socket === ws) {
|
|
785
1129
|
logger.info({ nodeId: remoteNodeId }, "Peer disconnected");
|
|
786
1130
|
this.members.delete(remoteNodeId);
|
|
1131
|
+
this.failureDetector.stopMonitoring(remoteNodeId);
|
|
787
1132
|
this.emit("memberLeft", remoteNodeId);
|
|
788
1133
|
if (initiated && peerAddress) {
|
|
789
1134
|
this.scheduleReconnect(peerAddress, 0);
|
|
@@ -837,526 +1182,1345 @@ var ClusterManager = class extends EventEmitter {
|
|
|
837
1182
|
};
|
|
838
1183
|
|
|
839
1184
|
// src/cluster/PartitionService.ts
|
|
840
|
-
import {
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
1185
|
+
import { EventEmitter as EventEmitter4 } from "events";
|
|
1186
|
+
|
|
1187
|
+
// src/cluster/MigrationManager.ts
|
|
1188
|
+
import { EventEmitter as EventEmitter3 } from "events";
|
|
1189
|
+
import {
|
|
1190
|
+
PartitionState,
|
|
1191
|
+
DEFAULT_MIGRATION_CONFIG
|
|
1192
|
+
} from "@topgunbuild/core";
|
|
1193
|
+
import { xxhash64AsNumber, createXxHash64State } from "@topgunbuild/native";
|
|
1194
|
+
var MigrationManager = class extends EventEmitter3 {
|
|
1195
|
+
constructor(clusterManager, partitionService, config = {}) {
|
|
1196
|
+
super();
|
|
1197
|
+
// Active outgoing migrations (this node is source)
|
|
1198
|
+
this.activeMigrations = /* @__PURE__ */ new Map();
|
|
1199
|
+
// Queue of migrations to process
|
|
1200
|
+
this.migrationQueue = [];
|
|
1201
|
+
// Incoming migrations (this node is target)
|
|
1202
|
+
this.incomingMigrations = /* @__PURE__ */ new Map();
|
|
1203
|
+
// Pending chunk acknowledgments
|
|
1204
|
+
this.pendingChunkAcks = /* @__PURE__ */ new Map();
|
|
1205
|
+
// Pending verification results
|
|
1206
|
+
this.pendingVerifications = /* @__PURE__ */ new Map();
|
|
1207
|
+
// Metrics tracking
|
|
1208
|
+
this.metrics = {
|
|
1209
|
+
migrationsStarted: 0,
|
|
1210
|
+
migrationsCompleted: 0,
|
|
1211
|
+
migrationsFailed: 0,
|
|
1212
|
+
chunksTransferred: 0,
|
|
1213
|
+
bytesTransferred: 0,
|
|
1214
|
+
activeMigrations: 0,
|
|
1215
|
+
queuedMigrations: 0
|
|
861
1216
|
};
|
|
1217
|
+
// Batch processing timer
|
|
1218
|
+
this.batchTimer = null;
|
|
1219
|
+
// Data collection callback (injected from ServerCoordinator)
|
|
1220
|
+
this.dataCollector = null;
|
|
1221
|
+
// Data storage callback (injected from ServerCoordinator)
|
|
1222
|
+
this.dataStorer = null;
|
|
1223
|
+
this.clusterManager = clusterManager;
|
|
1224
|
+
this.partitionService = partitionService;
|
|
1225
|
+
this.config = {
|
|
1226
|
+
...DEFAULT_MIGRATION_CONFIG,
|
|
1227
|
+
...config
|
|
1228
|
+
};
|
|
1229
|
+
this.setupMessageHandlers();
|
|
1230
|
+
}
|
|
1231
|
+
// ============================================
|
|
1232
|
+
// Configuration
|
|
1233
|
+
// ============================================
|
|
1234
|
+
/**
|
|
1235
|
+
* Set the data collector callback
|
|
1236
|
+
* Called to collect all records for a partition before migration
|
|
1237
|
+
*/
|
|
1238
|
+
setDataCollector(collector) {
|
|
1239
|
+
this.dataCollector = collector;
|
|
1240
|
+
}
|
|
1241
|
+
/**
|
|
1242
|
+
* Set the data storer callback
|
|
1243
|
+
* Called to store received records after successful migration
|
|
1244
|
+
*/
|
|
1245
|
+
setDataStorer(storer) {
|
|
1246
|
+
this.dataStorer = storer;
|
|
1247
|
+
}
|
|
1248
|
+
// ============================================
|
|
1249
|
+
// Migration Planning
|
|
1250
|
+
// ============================================
|
|
1251
|
+
/**
|
|
1252
|
+
* Plan migration for topology change
|
|
1253
|
+
*/
|
|
1254
|
+
planMigration(oldDistribution, newDistribution) {
|
|
1255
|
+
const migrations = [];
|
|
1256
|
+
for (const [partitionId, newDist] of newDistribution) {
|
|
1257
|
+
const oldDist = oldDistribution.get(partitionId);
|
|
1258
|
+
const oldOwner = oldDist?.owner ?? this.clusterManager.config.nodeId;
|
|
1259
|
+
const newOwner = newDist.owner;
|
|
1260
|
+
if (oldOwner !== newOwner && oldOwner === this.clusterManager.config.nodeId) {
|
|
1261
|
+
migrations.push({
|
|
1262
|
+
partitionId,
|
|
1263
|
+
state: PartitionState.STABLE,
|
|
1264
|
+
sourceNode: oldOwner,
|
|
1265
|
+
targetNode: newOwner,
|
|
1266
|
+
startTime: 0,
|
|
1267
|
+
bytesTransferred: 0,
|
|
1268
|
+
totalBytes: 0,
|
|
1269
|
+
retryCount: 0
|
|
1270
|
+
});
|
|
1271
|
+
}
|
|
1272
|
+
}
|
|
1273
|
+
migrations.sort((a, b) => a.partitionId - b.partitionId);
|
|
1274
|
+
this.migrationQueue = migrations;
|
|
1275
|
+
this.metrics.queuedMigrations = migrations.length;
|
|
1276
|
+
logger.info({ total: migrations.length }, "Migration planned");
|
|
1277
|
+
this.emit("migrationPlanned", { total: migrations.length });
|
|
1278
|
+
if (migrations.length > 0) {
|
|
1279
|
+
this.startBatchProcessing();
|
|
1280
|
+
}
|
|
862
1281
|
}
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
1282
|
+
/**
|
|
1283
|
+
* Start batch processing timer
|
|
1284
|
+
*/
|
|
1285
|
+
startBatchProcessing() {
|
|
1286
|
+
if (this.batchTimer) return;
|
|
1287
|
+
this.startNextBatch().catch((err) => {
|
|
1288
|
+
logger.error({ error: err }, "Failed to start first migration batch");
|
|
1289
|
+
this.emit("error", err);
|
|
1290
|
+
});
|
|
1291
|
+
this.batchTimer = setInterval(() => {
|
|
1292
|
+
this.startNextBatch().catch((err) => {
|
|
1293
|
+
logger.error({ error: err }, "Failed to start migration batch");
|
|
1294
|
+
this.emit("error", err);
|
|
1295
|
+
});
|
|
1296
|
+
}, this.config.batchIntervalMs);
|
|
868
1297
|
}
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
1298
|
+
/**
|
|
1299
|
+
* Stop batch processing
|
|
1300
|
+
*/
|
|
1301
|
+
stopBatchProcessing() {
|
|
1302
|
+
if (this.batchTimer) {
|
|
1303
|
+
clearInterval(this.batchTimer);
|
|
1304
|
+
this.batchTimer = null;
|
|
1305
|
+
}
|
|
872
1306
|
}
|
|
873
|
-
|
|
874
|
-
|
|
1307
|
+
/**
|
|
1308
|
+
* Start next batch of migrations
|
|
1309
|
+
*/
|
|
1310
|
+
async startNextBatch() {
|
|
1311
|
+
if (this.activeMigrations.size >= this.config.parallelTransfers) {
|
|
1312
|
+
return;
|
|
1313
|
+
}
|
|
1314
|
+
const slotsAvailable = this.config.parallelTransfers - this.activeMigrations.size;
|
|
1315
|
+
const batch = this.migrationQueue.splice(0, Math.min(slotsAvailable, this.config.batchSize));
|
|
1316
|
+
if (batch.length === 0) {
|
|
1317
|
+
if (this.migrationQueue.length === 0 && this.activeMigrations.size === 0) {
|
|
1318
|
+
this.stopBatchProcessing();
|
|
1319
|
+
}
|
|
1320
|
+
return;
|
|
1321
|
+
}
|
|
1322
|
+
for (const migration of batch) {
|
|
1323
|
+
migration.state = PartitionState.MIGRATING;
|
|
1324
|
+
migration.startTime = Date.now();
|
|
1325
|
+
this.activeMigrations.set(migration.partitionId, migration);
|
|
1326
|
+
this.metrics.migrationsStarted++;
|
|
1327
|
+
this.metrics.activeMigrations = this.activeMigrations.size;
|
|
1328
|
+
this.metrics.queuedMigrations = this.migrationQueue.length;
|
|
1329
|
+
this.startPartitionMigration(migration).catch((error) => {
|
|
1330
|
+
this.onMigrationFailed(migration.partitionId, error);
|
|
1331
|
+
});
|
|
1332
|
+
}
|
|
1333
|
+
logger.info({ count: batch.length, remaining: this.migrationQueue.length }, "Batch started");
|
|
1334
|
+
this.emit("batchStarted", { count: batch.length, remaining: this.migrationQueue.length });
|
|
875
1335
|
}
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
1336
|
+
// ============================================
|
|
1337
|
+
// Migration Execution
|
|
1338
|
+
// ============================================
|
|
1339
|
+
/**
|
|
1340
|
+
* Start migration for a single partition
|
|
1341
|
+
*/
|
|
1342
|
+
async startPartitionMigration(migration) {
|
|
1343
|
+
const { partitionId, targetNode } = migration;
|
|
1344
|
+
logger.info({ partitionId, targetNode }, "Starting partition migration");
|
|
1345
|
+
let records;
|
|
1346
|
+
if (this.dataCollector) {
|
|
1347
|
+
records = await this.dataCollector(partitionId);
|
|
1348
|
+
} else {
|
|
1349
|
+
records = [];
|
|
880
1350
|
}
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
backups.push(allMembers[backupIndex]);
|
|
1351
|
+
migration.totalBytes = records.reduce((sum, r) => sum + r.length, 0);
|
|
1352
|
+
this.clusterManager.send(targetNode, "OP_FORWARD", {
|
|
1353
|
+
_migration: {
|
|
1354
|
+
type: "MIGRATION_START",
|
|
1355
|
+
payload: {
|
|
1356
|
+
partitionId,
|
|
1357
|
+
sourceNode: this.clusterManager.config.nodeId,
|
|
1358
|
+
estimatedSize: migration.totalBytes
|
|
890
1359
|
}
|
|
891
1360
|
}
|
|
892
|
-
|
|
1361
|
+
});
|
|
1362
|
+
const chunks = this.chunkify(records);
|
|
1363
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
1364
|
+
const chunk = chunks[i];
|
|
1365
|
+
const checksum = this.calculateChecksum(chunk);
|
|
1366
|
+
this.clusterManager.send(targetNode, "OP_FORWARD", {
|
|
1367
|
+
_migration: {
|
|
1368
|
+
type: "MIGRATION_CHUNK",
|
|
1369
|
+
payload: {
|
|
1370
|
+
partitionId,
|
|
1371
|
+
chunkIndex: i,
|
|
1372
|
+
totalChunks: chunks.length,
|
|
1373
|
+
data: Array.from(chunk),
|
|
1374
|
+
// Convert Uint8Array to array for JSON serialization
|
|
1375
|
+
checksum
|
|
1376
|
+
}
|
|
1377
|
+
}
|
|
1378
|
+
});
|
|
1379
|
+
await this.waitForChunkAck(partitionId, i);
|
|
1380
|
+
migration.bytesTransferred += chunk.length;
|
|
1381
|
+
this.metrics.chunksTransferred++;
|
|
1382
|
+
this.metrics.bytesTransferred += chunk.length;
|
|
1383
|
+
this.emit("migrationProgress", migration);
|
|
1384
|
+
}
|
|
1385
|
+
const fullChecksum = this.calculatePartitionChecksum(records);
|
|
1386
|
+
migration.state = PartitionState.SYNC;
|
|
1387
|
+
this.clusterManager.send(targetNode, "OP_FORWARD", {
|
|
1388
|
+
_migration: {
|
|
1389
|
+
type: "MIGRATION_COMPLETE",
|
|
1390
|
+
payload: {
|
|
1391
|
+
partitionId,
|
|
1392
|
+
totalRecords: records.length,
|
|
1393
|
+
checksum: fullChecksum
|
|
1394
|
+
}
|
|
1395
|
+
}
|
|
1396
|
+
});
|
|
1397
|
+
const verified = await this.waitForVerification(partitionId);
|
|
1398
|
+
if (verified) {
|
|
1399
|
+
await this.onMigrationComplete(partitionId);
|
|
1400
|
+
} else {
|
|
1401
|
+
throw new Error(`Migration verification failed for partition ${partitionId}`);
|
|
893
1402
|
}
|
|
894
1403
|
}
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
if (!lock) {
|
|
913
|
-
lock = {
|
|
914
|
-
name,
|
|
915
|
-
owner: "",
|
|
916
|
-
fencingToken: 0,
|
|
917
|
-
expiry: 0,
|
|
918
|
-
queue: []
|
|
919
|
-
};
|
|
920
|
-
this.locks.set(name, lock);
|
|
1404
|
+
/**
|
|
1405
|
+
* Split records into chunks
|
|
1406
|
+
*/
|
|
1407
|
+
chunkify(records) {
|
|
1408
|
+
const chunks = [];
|
|
1409
|
+
let currentChunk = [];
|
|
1410
|
+
let currentSize = 0;
|
|
1411
|
+
for (const record of records) {
|
|
1412
|
+
const lengthPrefix = new Uint8Array(4);
|
|
1413
|
+
new DataView(lengthPrefix.buffer).setUint32(0, record.length, true);
|
|
1414
|
+
currentChunk.push(...lengthPrefix, ...record);
|
|
1415
|
+
currentSize += 4 + record.length;
|
|
1416
|
+
if (currentSize >= this.config.transferChunkSize) {
|
|
1417
|
+
chunks.push(new Uint8Array(currentChunk));
|
|
1418
|
+
currentChunk = [];
|
|
1419
|
+
currentSize = 0;
|
|
1420
|
+
}
|
|
921
1421
|
}
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
this.grantLock(lock, clientId, safeTtl);
|
|
925
|
-
return { granted: true, fencingToken: lock.fencingToken };
|
|
1422
|
+
if (currentChunk.length > 0) {
|
|
1423
|
+
chunks.push(new Uint8Array(currentChunk));
|
|
926
1424
|
}
|
|
927
|
-
if (
|
|
928
|
-
|
|
929
|
-
logger.info({ name, clientId, fencingToken: lock.fencingToken }, "Lock lease extended");
|
|
930
|
-
return { granted: true, fencingToken: lock.fencingToken };
|
|
1425
|
+
if (chunks.length === 0) {
|
|
1426
|
+
chunks.push(new Uint8Array(0));
|
|
931
1427
|
}
|
|
932
|
-
|
|
933
|
-
logger.info({ name, clientId, queueLength: lock.queue.length }, "Lock queued");
|
|
934
|
-
return { granted: false };
|
|
1428
|
+
return chunks;
|
|
935
1429
|
}
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
return false;
|
|
942
|
-
}
|
|
943
|
-
if (lock.fencingToken !== fencingToken) {
|
|
944
|
-
logger.warn({ name, clientId, sentToken: fencingToken, actualToken: lock.fencingToken }, "Release failed: Token mismatch");
|
|
945
|
-
return false;
|
|
946
|
-
}
|
|
947
|
-
this.processNext(lock);
|
|
948
|
-
return true;
|
|
1430
|
+
/**
|
|
1431
|
+
* Calculate checksum for a chunk using native xxhash
|
|
1432
|
+
*/
|
|
1433
|
+
calculateChecksum(data) {
|
|
1434
|
+
return String(xxhash64AsNumber(data));
|
|
949
1435
|
}
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
lock.queue = lock.queue.filter((req) => req.clientId !== clientId);
|
|
958
|
-
if (lock.queue.length < initialLen) {
|
|
959
|
-
logger.info({ name: lock.name, clientId }, "Removed from lock queue due to disconnect");
|
|
960
|
-
}
|
|
961
|
-
}
|
|
1436
|
+
/**
|
|
1437
|
+
* Calculate checksum for all partition records using streaming xxhash
|
|
1438
|
+
*/
|
|
1439
|
+
calculatePartitionChecksum(records) {
|
|
1440
|
+
const state = createXxHash64State();
|
|
1441
|
+
for (const record of records) {
|
|
1442
|
+
state.update(record);
|
|
962
1443
|
}
|
|
1444
|
+
return String(state.digestAsNumber());
|
|
963
1445
|
}
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
1446
|
+
/**
|
|
1447
|
+
* Wait for chunk acknowledgment
|
|
1448
|
+
*/
|
|
1449
|
+
waitForChunkAck(partitionId, chunkIndex) {
|
|
1450
|
+
return new Promise((resolve, reject) => {
|
|
1451
|
+
const key = `${partitionId}:${chunkIndex}`;
|
|
1452
|
+
const timeout = setTimeout(() => {
|
|
1453
|
+
this.pendingChunkAcks.delete(key);
|
|
1454
|
+
reject(new Error(`Chunk ack timeout for partition ${partitionId}, chunk ${chunkIndex}`));
|
|
1455
|
+
}, this.config.syncTimeoutMs);
|
|
1456
|
+
this.pendingChunkAcks.set(key, { resolve, reject, timeout });
|
|
1457
|
+
});
|
|
969
1458
|
}
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
const
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
1459
|
+
/**
|
|
1460
|
+
* Wait for migration verification
|
|
1461
|
+
*/
|
|
1462
|
+
waitForVerification(partitionId) {
|
|
1463
|
+
return new Promise((resolve) => {
|
|
1464
|
+
const timeout = setTimeout(() => {
|
|
1465
|
+
this.pendingVerifications.delete(partitionId);
|
|
1466
|
+
resolve(false);
|
|
1467
|
+
}, this.config.syncTimeoutMs);
|
|
1468
|
+
this.pendingVerifications.set(partitionId, { resolve, timeout });
|
|
1469
|
+
});
|
|
1470
|
+
}
|
|
1471
|
+
// ============================================
|
|
1472
|
+
// Migration Completion
|
|
1473
|
+
// ============================================
|
|
1474
|
+
/**
|
|
1475
|
+
* Handle successful migration completion
|
|
1476
|
+
*/
|
|
1477
|
+
async onMigrationComplete(partitionId) {
|
|
1478
|
+
const migration = this.activeMigrations.get(partitionId);
|
|
1479
|
+
if (!migration) return;
|
|
1480
|
+
migration.state = PartitionState.STABLE;
|
|
1481
|
+
this.activeMigrations.delete(partitionId);
|
|
1482
|
+
this.metrics.migrationsCompleted++;
|
|
1483
|
+
this.metrics.activeMigrations = this.activeMigrations.size;
|
|
1484
|
+
logger.info({
|
|
1485
|
+
partitionId,
|
|
1486
|
+
duration: Date.now() - migration.startTime,
|
|
1487
|
+
bytesTransferred: migration.bytesTransferred
|
|
1488
|
+
}, "Migration completed");
|
|
1489
|
+
this.emit("migrationComplete", partitionId);
|
|
1490
|
+
}
|
|
1491
|
+
/**
|
|
1492
|
+
* Handle migration failure
|
|
1493
|
+
*/
|
|
1494
|
+
async onMigrationFailed(partitionId, error) {
|
|
1495
|
+
const migration = this.activeMigrations.get(partitionId);
|
|
1496
|
+
if (!migration) return;
|
|
1497
|
+
migration.retryCount++;
|
|
1498
|
+
if (migration.retryCount <= this.config.maxRetries) {
|
|
1499
|
+
migration.state = PartitionState.STABLE;
|
|
1500
|
+
migration.bytesTransferred = 0;
|
|
1501
|
+
this.activeMigrations.delete(partitionId);
|
|
1502
|
+
this.migrationQueue.unshift(migration);
|
|
1503
|
+
this.metrics.queuedMigrations = this.migrationQueue.length;
|
|
1504
|
+
this.metrics.activeMigrations = this.activeMigrations.size;
|
|
1505
|
+
logger.warn({
|
|
1506
|
+
partitionId,
|
|
1507
|
+
retryCount: migration.retryCount,
|
|
1508
|
+
error: error.message
|
|
1509
|
+
}, "Migration failed, will retry");
|
|
1510
|
+
} else {
|
|
1511
|
+
migration.state = PartitionState.FAILED;
|
|
1512
|
+
this.activeMigrations.delete(partitionId);
|
|
1513
|
+
this.metrics.migrationsFailed++;
|
|
1514
|
+
this.metrics.activeMigrations = this.activeMigrations.size;
|
|
1515
|
+
logger.error({
|
|
1516
|
+
partitionId,
|
|
1517
|
+
retryCount: migration.retryCount,
|
|
1518
|
+
error: error.message
|
|
1519
|
+
}, "Migration failed permanently");
|
|
1520
|
+
this.emit("migrationFailed", partitionId, error);
|
|
1521
|
+
}
|
|
1522
|
+
}
|
|
1523
|
+
// ============================================
|
|
1524
|
+
// Incoming Migration Handlers (Target Node)
|
|
1525
|
+
// ============================================
|
|
1526
|
+
/**
|
|
1527
|
+
* Handle MIGRATION_START message
|
|
1528
|
+
*/
|
|
1529
|
+
handleMigrationStart(payload) {
|
|
1530
|
+
const { partitionId, sourceNode, estimatedSize } = payload;
|
|
1531
|
+
logger.info({ partitionId, sourceNode, estimatedSize }, "Receiving migration");
|
|
1532
|
+
this.incomingMigrations.set(partitionId, {
|
|
1533
|
+
sourceNode,
|
|
1534
|
+
chunks: [],
|
|
1535
|
+
expectedSize: estimatedSize,
|
|
1536
|
+
receivedSize: 0,
|
|
1537
|
+
startTime: Date.now()
|
|
1538
|
+
});
|
|
1539
|
+
}
|
|
1540
|
+
/**
|
|
1541
|
+
* Handle MIGRATION_CHUNK message
|
|
1542
|
+
*/
|
|
1543
|
+
handleMigrationChunk(payload) {
|
|
1544
|
+
const { partitionId, chunkIndex, data, checksum } = payload;
|
|
1545
|
+
const incoming = this.incomingMigrations.get(partitionId);
|
|
1546
|
+
if (!incoming) {
|
|
1547
|
+
logger.warn({ partitionId, chunkIndex }, "Received chunk for unknown migration");
|
|
983
1548
|
return;
|
|
984
1549
|
}
|
|
985
|
-
|
|
986
|
-
|
|
1550
|
+
const chunkData = new Uint8Array(data);
|
|
1551
|
+
const actualChecksum = this.calculateChecksum(chunkData);
|
|
1552
|
+
const success = actualChecksum === checksum;
|
|
1553
|
+
if (success) {
|
|
1554
|
+
incoming.chunks[chunkIndex] = chunkData;
|
|
1555
|
+
incoming.receivedSize += chunkData.length;
|
|
1556
|
+
} else {
|
|
1557
|
+
logger.warn({ partitionId, chunkIndex, expected: checksum, actual: actualChecksum }, "Chunk checksum mismatch");
|
|
987
1558
|
}
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
logger.info({ name: lock.name, owner: lock.owner }, "Lock expired, processing next");
|
|
997
|
-
this.processNext(lock);
|
|
998
|
-
} else if (!lock.owner && lock.queue.length === 0) {
|
|
999
|
-
this.locks.delete(name);
|
|
1559
|
+
this.clusterManager.send(incoming.sourceNode, "OP_FORWARD", {
|
|
1560
|
+
_migration: {
|
|
1561
|
+
type: "MIGRATION_CHUNK_ACK",
|
|
1562
|
+
payload: {
|
|
1563
|
+
partitionId,
|
|
1564
|
+
chunkIndex,
|
|
1565
|
+
success
|
|
1566
|
+
}
|
|
1000
1567
|
}
|
|
1001
|
-
}
|
|
1002
|
-
}
|
|
1003
|
-
};
|
|
1004
|
-
_LockManager.MIN_TTL = 1e3;
|
|
1005
|
-
// 1 second
|
|
1006
|
-
_LockManager.MAX_TTL = 3e5;
|
|
1007
|
-
var LockManager = _LockManager;
|
|
1008
|
-
|
|
1009
|
-
// src/security/SecurityManager.ts
|
|
1010
|
-
var SecurityManager = class {
|
|
1011
|
-
constructor(policies = []) {
|
|
1012
|
-
this.policies = [];
|
|
1013
|
-
this.policies = policies;
|
|
1014
|
-
}
|
|
1015
|
-
addPolicy(policy) {
|
|
1016
|
-
this.policies.push(policy);
|
|
1568
|
+
});
|
|
1017
1569
|
}
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1570
|
+
/**
|
|
1571
|
+
* Handle MIGRATION_COMPLETE message
|
|
1572
|
+
*/
|
|
1573
|
+
async handleMigrationComplete(payload) {
|
|
1574
|
+
const { partitionId, totalRecords, checksum } = payload;
|
|
1575
|
+
const incoming = this.incomingMigrations.get(partitionId);
|
|
1576
|
+
if (!incoming) {
|
|
1577
|
+
logger.warn({ partitionId }, "Received complete for unknown migration");
|
|
1578
|
+
return;
|
|
1021
1579
|
}
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1580
|
+
const allData = this.reassemble(incoming.chunks);
|
|
1581
|
+
const records = this.deserializeRecords(allData);
|
|
1582
|
+
const actualChecksum = this.calculatePartitionChecksum(records);
|
|
1583
|
+
const checksumMatch = actualChecksum === checksum;
|
|
1584
|
+
const success = checksumMatch && records.length === totalRecords;
|
|
1585
|
+
if (success && this.dataStorer) {
|
|
1586
|
+
await this.dataStorer(partitionId, records);
|
|
1025
1587
|
}
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1588
|
+
logger.info({
|
|
1589
|
+
partitionId,
|
|
1590
|
+
duration: Date.now() - incoming.startTime,
|
|
1591
|
+
records: records.length,
|
|
1592
|
+
checksumMatch
|
|
1593
|
+
}, "Migration received");
|
|
1594
|
+
this.clusterManager.send(incoming.sourceNode, "OP_FORWARD", {
|
|
1595
|
+
_migration: {
|
|
1596
|
+
type: "MIGRATION_VERIFY",
|
|
1597
|
+
payload: {
|
|
1598
|
+
partitionId,
|
|
1599
|
+
success,
|
|
1600
|
+
checksumMatch
|
|
1032
1601
|
}
|
|
1602
|
+
}
|
|
1603
|
+
});
|
|
1604
|
+
this.incomingMigrations.delete(partitionId);
|
|
1605
|
+
}
|
|
1606
|
+
/**
|
|
1607
|
+
* Handle MIGRATION_CHUNK_ACK message
|
|
1608
|
+
*/
|
|
1609
|
+
handleMigrationChunkAck(payload) {
|
|
1610
|
+
const { partitionId, chunkIndex, success } = payload;
|
|
1611
|
+
const key = `${partitionId}:${chunkIndex}`;
|
|
1612
|
+
const pending = this.pendingChunkAcks.get(key);
|
|
1613
|
+
if (pending) {
|
|
1614
|
+
clearTimeout(pending.timeout);
|
|
1615
|
+
this.pendingChunkAcks.delete(key);
|
|
1616
|
+
if (success) {
|
|
1617
|
+
pending.resolve();
|
|
1033
1618
|
} else {
|
|
1619
|
+
pending.reject(new Error(`Chunk ${chunkIndex} rejected by target`));
|
|
1034
1620
|
}
|
|
1035
1621
|
}
|
|
1036
|
-
logger.warn({
|
|
1037
|
-
userId: principal.userId,
|
|
1038
|
-
roles: principal.roles,
|
|
1039
|
-
mapName,
|
|
1040
|
-
action,
|
|
1041
|
-
policyCount: this.policies.length
|
|
1042
|
-
}, "SecurityManager: Access Denied - No matching policy found");
|
|
1043
|
-
return false;
|
|
1044
1622
|
}
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1623
|
+
/**
|
|
1624
|
+
* Handle MIGRATION_VERIFY message
|
|
1625
|
+
*/
|
|
1626
|
+
handleMigrationVerify(payload) {
|
|
1627
|
+
const { partitionId, success } = payload;
|
|
1628
|
+
const pending = this.pendingVerifications.get(partitionId);
|
|
1629
|
+
if (pending) {
|
|
1630
|
+
clearTimeout(pending.timeout);
|
|
1631
|
+
this.pendingVerifications.delete(partitionId);
|
|
1632
|
+
pending.resolve(success);
|
|
1050
1633
|
}
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1634
|
+
}
|
|
1635
|
+
/**
|
|
1636
|
+
* Reassemble chunks into continuous data
|
|
1637
|
+
*/
|
|
1638
|
+
reassemble(chunks) {
|
|
1639
|
+
const totalLength = chunks.reduce((sum, c) => sum + (c?.length ?? 0), 0);
|
|
1640
|
+
const result = new Uint8Array(totalLength);
|
|
1641
|
+
let offset = 0;
|
|
1642
|
+
for (const chunk of chunks) {
|
|
1643
|
+
if (chunk) {
|
|
1644
|
+
result.set(chunk, offset);
|
|
1645
|
+
offset += chunk.length;
|
|
1063
1646
|
}
|
|
1064
1647
|
}
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1648
|
+
return result;
|
|
1649
|
+
}
|
|
1650
|
+
/**
|
|
1651
|
+
* Deserialize records from chunk data
|
|
1652
|
+
*/
|
|
1653
|
+
deserializeRecords(data) {
|
|
1654
|
+
const records = [];
|
|
1655
|
+
let offset = 0;
|
|
1656
|
+
while (offset < data.length) {
|
|
1657
|
+
if (offset + 4 > data.length) break;
|
|
1658
|
+
const length = new DataView(data.buffer, data.byteOffset + offset, 4).getUint32(0, true);
|
|
1659
|
+
offset += 4;
|
|
1660
|
+
if (offset + length > data.length) break;
|
|
1661
|
+
records.push(data.slice(offset, offset + length));
|
|
1662
|
+
offset += length;
|
|
1072
1663
|
}
|
|
1073
|
-
return
|
|
1664
|
+
return records;
|
|
1074
1665
|
}
|
|
1075
|
-
|
|
1076
|
-
|
|
1666
|
+
// ============================================
|
|
1667
|
+
// Message Handling
|
|
1668
|
+
// ============================================
|
|
1669
|
+
/**
|
|
1670
|
+
* Setup cluster message handlers
|
|
1671
|
+
*/
|
|
1672
|
+
setupMessageHandlers() {
|
|
1673
|
+
this.clusterManager.on("message", (msg) => {
|
|
1674
|
+
if (msg.payload?._migration) {
|
|
1675
|
+
const migration = msg.payload._migration;
|
|
1676
|
+
switch (migration.type) {
|
|
1677
|
+
case "MIGRATION_START":
|
|
1678
|
+
this.handleMigrationStart(migration.payload);
|
|
1679
|
+
break;
|
|
1680
|
+
case "MIGRATION_CHUNK":
|
|
1681
|
+
this.handleMigrationChunk(migration.payload);
|
|
1682
|
+
break;
|
|
1683
|
+
case "MIGRATION_COMPLETE":
|
|
1684
|
+
this.handleMigrationComplete(migration.payload).catch((err) => {
|
|
1685
|
+
logger.error({ error: err }, "Error handling migration complete");
|
|
1686
|
+
});
|
|
1687
|
+
break;
|
|
1688
|
+
case "MIGRATION_CHUNK_ACK":
|
|
1689
|
+
this.handleMigrationChunkAck(migration.payload);
|
|
1690
|
+
break;
|
|
1691
|
+
case "MIGRATION_VERIFY":
|
|
1692
|
+
this.handleMigrationVerify(migration.payload);
|
|
1693
|
+
break;
|
|
1694
|
+
}
|
|
1695
|
+
}
|
|
1696
|
+
});
|
|
1077
1697
|
}
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1698
|
+
// ============================================
|
|
1699
|
+
// Status and Metrics
|
|
1700
|
+
// ============================================
|
|
1701
|
+
/**
|
|
1702
|
+
* Check if a partition is currently migrating
|
|
1703
|
+
*/
|
|
1704
|
+
isActive(partitionId) {
|
|
1705
|
+
return this.activeMigrations.has(partitionId) || this.incomingMigrations.has(partitionId);
|
|
1706
|
+
}
|
|
1707
|
+
/**
|
|
1708
|
+
* Get migration status
|
|
1709
|
+
*/
|
|
1710
|
+
getStatus() {
|
|
1711
|
+
const avgMigrationTime = this.metrics.migrationsCompleted > 0 ? Date.now() - (this.activeMigrations.values().next().value?.startTime ?? Date.now()) : 0;
|
|
1712
|
+
const estimatedTimeRemainingMs = (this.migrationQueue.length + this.activeMigrations.size) * (avgMigrationTime || 1e3);
|
|
1713
|
+
return {
|
|
1714
|
+
inProgress: this.activeMigrations.size > 0 || this.migrationQueue.length > 0,
|
|
1715
|
+
active: Array.from(this.activeMigrations.values()),
|
|
1716
|
+
queued: this.migrationQueue.length,
|
|
1717
|
+
completed: this.metrics.migrationsCompleted,
|
|
1718
|
+
failed: this.metrics.migrationsFailed,
|
|
1719
|
+
estimatedTimeRemainingMs
|
|
1720
|
+
};
|
|
1721
|
+
}
|
|
1722
|
+
/**
|
|
1723
|
+
* Get migration metrics
|
|
1724
|
+
*/
|
|
1725
|
+
getMetrics() {
|
|
1726
|
+
return { ...this.metrics };
|
|
1727
|
+
}
|
|
1728
|
+
/**
|
|
1729
|
+
* Cancel all active and queued migrations
|
|
1730
|
+
*/
|
|
1731
|
+
async cancelAll() {
|
|
1732
|
+
this.stopBatchProcessing();
|
|
1733
|
+
this.migrationQueue = [];
|
|
1734
|
+
this.metrics.queuedMigrations = 0;
|
|
1735
|
+
for (const [partitionId, migration] of this.activeMigrations) {
|
|
1736
|
+
migration.state = PartitionState.FAILED;
|
|
1737
|
+
this.metrics.migrationsFailed++;
|
|
1738
|
+
this.emit("migrationFailed", partitionId, new Error("Migration cancelled"));
|
|
1082
1739
|
}
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1740
|
+
this.activeMigrations.clear();
|
|
1741
|
+
this.metrics.activeMigrations = 0;
|
|
1742
|
+
for (const pending of this.pendingChunkAcks.values()) {
|
|
1743
|
+
clearTimeout(pending.timeout);
|
|
1744
|
+
pending.reject(new Error("Migration cancelled"));
|
|
1088
1745
|
}
|
|
1089
|
-
|
|
1746
|
+
this.pendingChunkAcks.clear();
|
|
1747
|
+
for (const pending of this.pendingVerifications.values()) {
|
|
1748
|
+
clearTimeout(pending.timeout);
|
|
1749
|
+
pending.resolve(false);
|
|
1750
|
+
}
|
|
1751
|
+
this.pendingVerifications.clear();
|
|
1752
|
+
this.incomingMigrations.clear();
|
|
1753
|
+
logger.info("All migrations cancelled");
|
|
1754
|
+
}
|
|
1755
|
+
/**
|
|
1756
|
+
* Cleanup resources (sync version for backwards compatibility)
|
|
1757
|
+
*/
|
|
1758
|
+
close() {
|
|
1759
|
+
this.cancelAll();
|
|
1760
|
+
}
|
|
1761
|
+
/**
|
|
1762
|
+
* Async cleanup - waits for cancellation to complete
|
|
1763
|
+
*/
|
|
1764
|
+
async closeAsync() {
|
|
1765
|
+
await this.cancelAll();
|
|
1766
|
+
this.removeAllListeners();
|
|
1090
1767
|
}
|
|
1091
1768
|
};
|
|
1092
1769
|
|
|
1093
|
-
// src/
|
|
1094
|
-
import {
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
this.
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
this.
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
this.
|
|
1135
|
-
|
|
1136
|
-
help: "Events NOT sent due to no active subscriptions",
|
|
1137
|
-
registers: [this.registry]
|
|
1138
|
-
});
|
|
1139
|
-
this.subscribersPerEvent = new Summary({
|
|
1140
|
-
name: "topgun_subscribers_per_event",
|
|
1141
|
-
help: "Distribution of subscribers per event",
|
|
1142
|
-
percentiles: [0.5, 0.9, 0.99],
|
|
1143
|
-
registers: [this.registry]
|
|
1144
|
-
});
|
|
1145
|
-
this.eventQueueSize = new Gauge({
|
|
1146
|
-
name: "topgun_event_queue_size",
|
|
1147
|
-
help: "Current size of the event queue across all stripes",
|
|
1148
|
-
labelNames: ["stripe"],
|
|
1149
|
-
registers: [this.registry]
|
|
1150
|
-
});
|
|
1151
|
-
this.eventQueueEnqueued = new Counter({
|
|
1152
|
-
name: "topgun_event_queue_enqueued_total",
|
|
1153
|
-
help: "Total number of events enqueued",
|
|
1154
|
-
registers: [this.registry]
|
|
1155
|
-
});
|
|
1156
|
-
this.eventQueueDequeued = new Counter({
|
|
1157
|
-
name: "topgun_event_queue_dequeued_total",
|
|
1158
|
-
help: "Total number of events dequeued",
|
|
1159
|
-
registers: [this.registry]
|
|
1160
|
-
});
|
|
1161
|
-
this.eventQueueRejected = new Counter({
|
|
1162
|
-
name: "topgun_event_queue_rejected_total",
|
|
1163
|
-
help: "Total number of events rejected due to queue capacity",
|
|
1164
|
-
registers: [this.registry]
|
|
1165
|
-
});
|
|
1166
|
-
this.backpressureSyncForcedTotal = new Counter({
|
|
1167
|
-
name: "topgun_backpressure_sync_forced_total",
|
|
1168
|
-
help: "Total number of times sync processing was forced",
|
|
1169
|
-
registers: [this.registry]
|
|
1170
|
-
});
|
|
1171
|
-
this.backpressurePendingOps = new Gauge({
|
|
1172
|
-
name: "topgun_backpressure_pending_ops",
|
|
1173
|
-
help: "Current number of pending async operations",
|
|
1174
|
-
registers: [this.registry]
|
|
1175
|
-
});
|
|
1176
|
-
this.backpressureWaitsTotal = new Counter({
|
|
1177
|
-
name: "topgun_backpressure_waits_total",
|
|
1178
|
-
help: "Total number of times had to wait for capacity",
|
|
1179
|
-
registers: [this.registry]
|
|
1180
|
-
});
|
|
1181
|
-
this.backpressureTimeoutsTotal = new Counter({
|
|
1182
|
-
name: "topgun_backpressure_timeouts_total",
|
|
1183
|
-
help: "Total number of backpressure timeouts",
|
|
1184
|
-
registers: [this.registry]
|
|
1185
|
-
});
|
|
1186
|
-
this.connectionsAcceptedTotal = new Counter({
|
|
1187
|
-
name: "topgun_connections_accepted_total",
|
|
1188
|
-
help: "Total number of connections accepted",
|
|
1189
|
-
registers: [this.registry]
|
|
1190
|
-
});
|
|
1191
|
-
this.connectionsRejectedTotal = new Counter({
|
|
1192
|
-
name: "topgun_connections_rejected_total",
|
|
1193
|
-
help: "Total number of connections rejected due to rate limiting",
|
|
1194
|
-
registers: [this.registry]
|
|
1195
|
-
});
|
|
1196
|
-
this.connectionsPending = new Gauge({
|
|
1197
|
-
name: "topgun_connections_pending",
|
|
1198
|
-
help: "Number of connections currently pending (handshake in progress)",
|
|
1199
|
-
registers: [this.registry]
|
|
1200
|
-
});
|
|
1201
|
-
this.connectionRatePerSecond = new Gauge({
|
|
1202
|
-
name: "topgun_connection_rate_per_second",
|
|
1203
|
-
help: "Current connection rate per second",
|
|
1204
|
-
registers: [this.registry]
|
|
1205
|
-
});
|
|
1770
|
+
// src/cluster/PartitionService.ts
|
|
1771
|
+
import {
|
|
1772
|
+
hashString,
|
|
1773
|
+
PARTITION_COUNT,
|
|
1774
|
+
DEFAULT_BACKUP_COUNT,
|
|
1775
|
+
DEFAULT_MIGRATION_CONFIG as DEFAULT_MIGRATION_CONFIG2
|
|
1776
|
+
} from "@topgunbuild/core";
|
|
1777
|
+
var DEFAULT_PARTITION_SERVICE_CONFIG = {
|
|
1778
|
+
gradualRebalancing: false,
|
|
1779
|
+
migration: DEFAULT_MIGRATION_CONFIG2
|
|
1780
|
+
};
|
|
1781
|
+
var PartitionService = class extends EventEmitter4 {
|
|
1782
|
+
constructor(cluster, config = {}) {
|
|
1783
|
+
super();
|
|
1784
|
+
// partitionId -> { owner, backups }
|
|
1785
|
+
this.partitions = /* @__PURE__ */ new Map();
|
|
1786
|
+
this.PARTITION_COUNT = PARTITION_COUNT;
|
|
1787
|
+
this.BACKUP_COUNT = DEFAULT_BACKUP_COUNT;
|
|
1788
|
+
// Phase 4: Version tracking for partition map
|
|
1789
|
+
this.mapVersion = 0;
|
|
1790
|
+
this.lastRebalanceTime = 0;
|
|
1791
|
+
this.migrationManager = null;
|
|
1792
|
+
this.cluster = cluster;
|
|
1793
|
+
this.config = {
|
|
1794
|
+
...DEFAULT_PARTITION_SERVICE_CONFIG,
|
|
1795
|
+
...config
|
|
1796
|
+
};
|
|
1797
|
+
if (this.config.gradualRebalancing) {
|
|
1798
|
+
this.migrationManager = new MigrationManager(
|
|
1799
|
+
cluster,
|
|
1800
|
+
this,
|
|
1801
|
+
this.config.migration
|
|
1802
|
+
);
|
|
1803
|
+
this.migrationManager.on("migrationComplete", (partitionId) => {
|
|
1804
|
+
logger.info({ partitionId }, "Migration completed, updating ownership");
|
|
1805
|
+
});
|
|
1806
|
+
this.migrationManager.on("migrationFailed", (partitionId, error) => {
|
|
1807
|
+
logger.error({ partitionId, error: error.message }, "Migration failed");
|
|
1808
|
+
});
|
|
1809
|
+
}
|
|
1810
|
+
this.cluster.on("memberJoined", (nodeId) => this.onMembershipChange("JOIN", nodeId));
|
|
1811
|
+
this.cluster.on("memberLeft", (nodeId) => this.onMembershipChange("LEAVE", nodeId));
|
|
1812
|
+
this.rebalance("REBALANCE");
|
|
1206
1813
|
}
|
|
1207
|
-
|
|
1208
|
-
|
|
1814
|
+
/**
|
|
1815
|
+
* Handle membership change
|
|
1816
|
+
*/
|
|
1817
|
+
onMembershipChange(reason, nodeId) {
|
|
1818
|
+
if (this.config.gradualRebalancing && this.migrationManager) {
|
|
1819
|
+
this.rebalanceGradual(reason, nodeId);
|
|
1820
|
+
} else {
|
|
1821
|
+
this.rebalance(reason, nodeId);
|
|
1822
|
+
}
|
|
1209
1823
|
}
|
|
1210
|
-
|
|
1211
|
-
|
|
1824
|
+
getPartitionId(key) {
|
|
1825
|
+
return Math.abs(hashString(key)) % this.PARTITION_COUNT;
|
|
1212
1826
|
}
|
|
1213
|
-
|
|
1214
|
-
this.
|
|
1827
|
+
getDistribution(key) {
|
|
1828
|
+
const pId = this.getPartitionId(key);
|
|
1829
|
+
return this.partitions.get(pId) || {
|
|
1830
|
+
owner: this.cluster.config.nodeId,
|
|
1831
|
+
backups: []
|
|
1832
|
+
};
|
|
1215
1833
|
}
|
|
1216
|
-
|
|
1217
|
-
this.
|
|
1834
|
+
getOwner(key) {
|
|
1835
|
+
return this.getDistribution(key).owner;
|
|
1218
1836
|
}
|
|
1219
|
-
|
|
1220
|
-
this.
|
|
1837
|
+
isLocalOwner(key) {
|
|
1838
|
+
return this.getOwner(key) === this.cluster.config.nodeId;
|
|
1221
1839
|
}
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
*/
|
|
1226
|
-
incEventsRouted() {
|
|
1227
|
-
this.eventsRoutedTotal.inc();
|
|
1840
|
+
isLocalBackup(key) {
|
|
1841
|
+
const dist = this.getDistribution(key);
|
|
1842
|
+
return dist.backups.includes(this.cluster.config.nodeId);
|
|
1228
1843
|
}
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
*/
|
|
1232
|
-
incEventsFilteredBySubscription() {
|
|
1233
|
-
this.eventsFilteredBySubscription.inc();
|
|
1844
|
+
isRelated(key) {
|
|
1845
|
+
return this.isLocalOwner(key) || this.isLocalBackup(key);
|
|
1234
1846
|
}
|
|
1847
|
+
// ============================================
|
|
1848
|
+
// Phase 4: Partition Map Methods
|
|
1849
|
+
// ============================================
|
|
1235
1850
|
/**
|
|
1236
|
-
*
|
|
1851
|
+
* Get current partition map version
|
|
1237
1852
|
*/
|
|
1238
|
-
|
|
1239
|
-
this.
|
|
1853
|
+
getMapVersion() {
|
|
1854
|
+
return this.mapVersion;
|
|
1240
1855
|
}
|
|
1241
|
-
// === Bounded event queue metric methods ===
|
|
1242
1856
|
/**
|
|
1243
|
-
*
|
|
1857
|
+
* Generate full PartitionMap for client consumption
|
|
1244
1858
|
*/
|
|
1245
|
-
|
|
1246
|
-
|
|
1859
|
+
getPartitionMap() {
|
|
1860
|
+
const nodes = [];
|
|
1861
|
+
const partitions = [];
|
|
1862
|
+
for (const nodeId of this.cluster.getMembers()) {
|
|
1863
|
+
const isSelf = nodeId === this.cluster.config.nodeId;
|
|
1864
|
+
const host = isSelf ? this.cluster.config.host : "unknown";
|
|
1865
|
+
const port = isSelf ? this.cluster.port : 0;
|
|
1866
|
+
nodes.push({
|
|
1867
|
+
nodeId,
|
|
1868
|
+
endpoints: {
|
|
1869
|
+
websocket: `ws://${host}:${port}`
|
|
1870
|
+
},
|
|
1871
|
+
status: "ACTIVE"
|
|
1872
|
+
});
|
|
1873
|
+
}
|
|
1874
|
+
for (let i = 0; i < this.PARTITION_COUNT; i++) {
|
|
1875
|
+
const dist = this.partitions.get(i);
|
|
1876
|
+
if (dist) {
|
|
1877
|
+
partitions.push({
|
|
1878
|
+
partitionId: i,
|
|
1879
|
+
ownerNodeId: dist.owner,
|
|
1880
|
+
backupNodeIds: dist.backups
|
|
1881
|
+
});
|
|
1882
|
+
}
|
|
1883
|
+
}
|
|
1884
|
+
return {
|
|
1885
|
+
version: this.mapVersion,
|
|
1886
|
+
partitionCount: this.PARTITION_COUNT,
|
|
1887
|
+
nodes,
|
|
1888
|
+
partitions,
|
|
1889
|
+
generatedAt: Date.now()
|
|
1890
|
+
};
|
|
1247
1891
|
}
|
|
1248
1892
|
/**
|
|
1249
|
-
*
|
|
1893
|
+
* Get partition info by ID
|
|
1250
1894
|
*/
|
|
1251
|
-
|
|
1252
|
-
this.
|
|
1895
|
+
getPartitionInfo(partitionId) {
|
|
1896
|
+
const dist = this.partitions.get(partitionId);
|
|
1897
|
+
if (!dist) return null;
|
|
1898
|
+
return {
|
|
1899
|
+
partitionId,
|
|
1900
|
+
ownerNodeId: dist.owner,
|
|
1901
|
+
backupNodeIds: dist.backups
|
|
1902
|
+
};
|
|
1253
1903
|
}
|
|
1254
1904
|
/**
|
|
1255
|
-
*
|
|
1905
|
+
* Get owner node for a partition ID
|
|
1256
1906
|
*/
|
|
1257
|
-
|
|
1258
|
-
this.
|
|
1907
|
+
getPartitionOwner(partitionId) {
|
|
1908
|
+
const dist = this.partitions.get(partitionId);
|
|
1909
|
+
return dist?.owner ?? null;
|
|
1259
1910
|
}
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1911
|
+
rebalance(reason = "REBALANCE", triggerNodeId) {
|
|
1912
|
+
const oldPartitions = new Map(this.partitions);
|
|
1913
|
+
let allMembers = this.cluster.getMembers().sort();
|
|
1914
|
+
if (allMembers.length === 0) {
|
|
1915
|
+
allMembers = [this.cluster.config.nodeId];
|
|
1916
|
+
}
|
|
1917
|
+
logger.info({ memberCount: allMembers.length, members: allMembers, reason }, "Rebalancing partitions");
|
|
1918
|
+
const changes = [];
|
|
1919
|
+
for (let i = 0; i < this.PARTITION_COUNT; i++) {
|
|
1920
|
+
const ownerIndex = i % allMembers.length;
|
|
1921
|
+
const owner = allMembers[ownerIndex];
|
|
1922
|
+
const backups = [];
|
|
1923
|
+
if (allMembers.length > 1) {
|
|
1924
|
+
for (let b = 1; b <= this.BACKUP_COUNT; b++) {
|
|
1925
|
+
const backupIndex = (ownerIndex + b) % allMembers.length;
|
|
1926
|
+
backups.push(allMembers[backupIndex]);
|
|
1927
|
+
}
|
|
1928
|
+
}
|
|
1929
|
+
const oldDist = oldPartitions.get(i);
|
|
1930
|
+
if (oldDist && oldDist.owner !== owner) {
|
|
1931
|
+
changes.push({
|
|
1932
|
+
partitionId: i,
|
|
1933
|
+
previousOwner: oldDist.owner,
|
|
1934
|
+
newOwner: owner,
|
|
1935
|
+
reason
|
|
1936
|
+
});
|
|
1937
|
+
}
|
|
1938
|
+
this.partitions.set(i, { owner, backups });
|
|
1939
|
+
}
|
|
1940
|
+
if (changes.length > 0 || this.mapVersion === 0) {
|
|
1941
|
+
this.mapVersion++;
|
|
1942
|
+
this.lastRebalanceTime = Date.now();
|
|
1943
|
+
logger.info({
|
|
1944
|
+
version: this.mapVersion,
|
|
1945
|
+
changesCount: changes.length,
|
|
1946
|
+
reason
|
|
1947
|
+
}, "Partition map updated");
|
|
1948
|
+
this.emit("rebalanced", this.getPartitionMap(), changes);
|
|
1949
|
+
}
|
|
1265
1950
|
}
|
|
1266
|
-
//
|
|
1951
|
+
// ============================================
|
|
1952
|
+
// Phase 4 Task 03: Gradual Rebalancing
|
|
1953
|
+
// ============================================
|
|
1267
1954
|
/**
|
|
1268
|
-
*
|
|
1955
|
+
* Perform gradual rebalancing using MigrationManager
|
|
1269
1956
|
*/
|
|
1270
|
-
|
|
1271
|
-
this.
|
|
1957
|
+
rebalanceGradual(reason, triggerNodeId) {
|
|
1958
|
+
if (!this.migrationManager) {
|
|
1959
|
+
this.rebalance(reason, triggerNodeId);
|
|
1960
|
+
return;
|
|
1961
|
+
}
|
|
1962
|
+
const oldDistribution = new Map(this.partitions);
|
|
1963
|
+
let allMembers = this.cluster.getMembers().sort();
|
|
1964
|
+
if (allMembers.length === 0) {
|
|
1965
|
+
allMembers = [this.cluster.config.nodeId];
|
|
1966
|
+
}
|
|
1967
|
+
const newDistribution = /* @__PURE__ */ new Map();
|
|
1968
|
+
for (let i = 0; i < this.PARTITION_COUNT; i++) {
|
|
1969
|
+
const ownerIndex = i % allMembers.length;
|
|
1970
|
+
const owner = allMembers[ownerIndex];
|
|
1971
|
+
const backups = [];
|
|
1972
|
+
if (allMembers.length > 1) {
|
|
1973
|
+
for (let b = 1; b <= this.BACKUP_COUNT; b++) {
|
|
1974
|
+
const backupIndex = (ownerIndex + b) % allMembers.length;
|
|
1975
|
+
backups.push(allMembers[backupIndex]);
|
|
1976
|
+
}
|
|
1977
|
+
}
|
|
1978
|
+
newDistribution.set(i, { owner, backups });
|
|
1979
|
+
}
|
|
1980
|
+
logger.info({ memberCount: allMembers.length, reason, triggerNodeId }, "Planning gradual rebalance");
|
|
1981
|
+
this.migrationManager.planMigration(oldDistribution, newDistribution);
|
|
1982
|
+
for (const [partitionId, dist] of newDistribution) {
|
|
1983
|
+
this.partitions.set(partitionId, dist);
|
|
1984
|
+
}
|
|
1985
|
+
this.mapVersion++;
|
|
1986
|
+
this.lastRebalanceTime = Date.now();
|
|
1987
|
+
const changes = [];
|
|
1988
|
+
for (const [partitionId, newDist] of newDistribution) {
|
|
1989
|
+
const oldDist = oldDistribution.get(partitionId);
|
|
1990
|
+
if (oldDist && oldDist.owner !== newDist.owner) {
|
|
1991
|
+
changes.push({
|
|
1992
|
+
partitionId,
|
|
1993
|
+
previousOwner: oldDist.owner,
|
|
1994
|
+
newOwner: newDist.owner,
|
|
1995
|
+
reason
|
|
1996
|
+
});
|
|
1997
|
+
}
|
|
1998
|
+
}
|
|
1999
|
+
this.emit("rebalanced", this.getPartitionMap(), changes);
|
|
1272
2000
|
}
|
|
1273
2001
|
/**
|
|
1274
|
-
* Set
|
|
2002
|
+
* Set partition owner (called after migration completes)
|
|
1275
2003
|
*/
|
|
1276
|
-
|
|
1277
|
-
this.
|
|
2004
|
+
setOwner(partitionId, nodeId) {
|
|
2005
|
+
const partition = this.partitions.get(partitionId);
|
|
2006
|
+
if (!partition) return;
|
|
2007
|
+
const previousOwner = partition.owner;
|
|
2008
|
+
if (previousOwner === nodeId) return;
|
|
2009
|
+
partition.owner = nodeId;
|
|
2010
|
+
this.mapVersion++;
|
|
2011
|
+
logger.info({ partitionId, previousOwner, newOwner: nodeId, version: this.mapVersion }, "Partition owner updated");
|
|
2012
|
+
this.emit("partitionMoved", {
|
|
2013
|
+
partitionId,
|
|
2014
|
+
previousOwner,
|
|
2015
|
+
newOwner: nodeId,
|
|
2016
|
+
version: this.mapVersion
|
|
2017
|
+
});
|
|
1278
2018
|
}
|
|
1279
2019
|
/**
|
|
1280
|
-
*
|
|
2020
|
+
* Get backups for a partition
|
|
1281
2021
|
*/
|
|
1282
|
-
|
|
1283
|
-
this.
|
|
2022
|
+
getBackups(partitionId) {
|
|
2023
|
+
const dist = this.partitions.get(partitionId);
|
|
2024
|
+
return dist?.backups ?? [];
|
|
1284
2025
|
}
|
|
1285
2026
|
/**
|
|
1286
|
-
*
|
|
2027
|
+
* Get migration status
|
|
1287
2028
|
*/
|
|
1288
|
-
|
|
1289
|
-
this.
|
|
2029
|
+
getMigrationStatus() {
|
|
2030
|
+
return this.migrationManager?.getStatus() ?? null;
|
|
1290
2031
|
}
|
|
1291
|
-
// === Connection scaling metric methods ===
|
|
1292
2032
|
/**
|
|
1293
|
-
*
|
|
2033
|
+
* Check if partition is currently migrating
|
|
1294
2034
|
*/
|
|
1295
|
-
|
|
1296
|
-
this.
|
|
2035
|
+
isMigrating(partitionId) {
|
|
2036
|
+
return this.migrationManager?.isActive(partitionId) ?? false;
|
|
1297
2037
|
}
|
|
1298
2038
|
/**
|
|
1299
|
-
*
|
|
2039
|
+
* Check if any partition is currently migrating
|
|
1300
2040
|
*/
|
|
1301
|
-
|
|
1302
|
-
this.
|
|
2041
|
+
isRebalancing() {
|
|
2042
|
+
const status = this.getMigrationStatus();
|
|
2043
|
+
return status?.inProgress ?? false;
|
|
1303
2044
|
}
|
|
1304
2045
|
/**
|
|
1305
|
-
*
|
|
2046
|
+
* Get MigrationManager for configuration
|
|
1306
2047
|
*/
|
|
1307
|
-
|
|
1308
|
-
this.
|
|
2048
|
+
getMigrationManager() {
|
|
2049
|
+
return this.migrationManager;
|
|
1309
2050
|
}
|
|
1310
2051
|
/**
|
|
1311
|
-
*
|
|
2052
|
+
* Cancel all migrations
|
|
1312
2053
|
*/
|
|
1313
|
-
|
|
1314
|
-
this.
|
|
1315
|
-
|
|
1316
|
-
async getMetrics() {
|
|
1317
|
-
return this.registry.metrics();
|
|
1318
|
-
}
|
|
1319
|
-
async getMetricsJson() {
|
|
1320
|
-
const metrics = await this.registry.getMetricsAsJSON();
|
|
1321
|
-
const result = {};
|
|
1322
|
-
for (const metric of metrics) {
|
|
1323
|
-
if (metric.values.length === 1) {
|
|
1324
|
-
result[metric.name] = metric.values[0].value;
|
|
1325
|
-
} else {
|
|
1326
|
-
result[metric.name] = metric.values;
|
|
1327
|
-
}
|
|
2054
|
+
async cancelMigrations() {
|
|
2055
|
+
if (this.migrationManager) {
|
|
2056
|
+
await this.migrationManager.cancelAll();
|
|
1328
2057
|
}
|
|
1329
|
-
return result;
|
|
1330
|
-
}
|
|
1331
|
-
getContentType() {
|
|
1332
|
-
return this.registry.contentType;
|
|
1333
2058
|
}
|
|
1334
2059
|
};
|
|
1335
2060
|
|
|
1336
|
-
// src/
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
this.setupClusterMap();
|
|
1345
|
-
this.setupStatsMap();
|
|
1346
|
-
this.setupMapsMap();
|
|
1347
|
-
this.statsInterval = setInterval(() => this.updateStats(), 5e3);
|
|
1348
|
-
this.cluster.on("memberJoined", () => this.updateClusterMap());
|
|
1349
|
-
this.cluster.on("memberLeft", () => this.updateClusterMap());
|
|
1350
|
-
this.updateClusterMap();
|
|
1351
|
-
this.updateStats();
|
|
2061
|
+
// src/cluster/LockManager.ts
|
|
2062
|
+
import { EventEmitter as EventEmitter5 } from "events";
|
|
2063
|
+
var _LockManager = class _LockManager extends EventEmitter5 {
|
|
2064
|
+
// 5 minutes
|
|
2065
|
+
constructor() {
|
|
2066
|
+
super();
|
|
2067
|
+
this.locks = /* @__PURE__ */ new Map();
|
|
2068
|
+
this.checkInterval = setInterval(() => this.cleanupExpiredLocks(), 1e3);
|
|
1352
2069
|
}
|
|
1353
2070
|
stop() {
|
|
1354
|
-
|
|
1355
|
-
clearInterval(this.statsInterval);
|
|
1356
|
-
}
|
|
2071
|
+
clearInterval(this.checkInterval);
|
|
1357
2072
|
}
|
|
1358
|
-
|
|
1359
|
-
|
|
2073
|
+
acquire(name, clientId, requestId, ttl) {
|
|
2074
|
+
const safeTtl = Math.max(_LockManager.MIN_TTL, Math.min(ttl || _LockManager.MIN_TTL, _LockManager.MAX_TTL));
|
|
2075
|
+
let lock = this.locks.get(name);
|
|
2076
|
+
if (!lock) {
|
|
2077
|
+
lock = {
|
|
2078
|
+
name,
|
|
2079
|
+
owner: "",
|
|
2080
|
+
fencingToken: 0,
|
|
2081
|
+
expiry: 0,
|
|
2082
|
+
queue: []
|
|
2083
|
+
};
|
|
2084
|
+
this.locks.set(name, lock);
|
|
2085
|
+
}
|
|
2086
|
+
const now = Date.now();
|
|
2087
|
+
if (!lock.owner || lock.expiry < now) {
|
|
2088
|
+
this.grantLock(lock, clientId, safeTtl);
|
|
2089
|
+
return { granted: true, fencingToken: lock.fencingToken };
|
|
2090
|
+
}
|
|
2091
|
+
if (lock.owner === clientId) {
|
|
2092
|
+
lock.expiry = Math.max(lock.expiry, now + safeTtl);
|
|
2093
|
+
logger.info({ name, clientId, fencingToken: lock.fencingToken }, "Lock lease extended");
|
|
2094
|
+
return { granted: true, fencingToken: lock.fencingToken };
|
|
2095
|
+
}
|
|
2096
|
+
lock.queue.push({ clientId, requestId, ttl: safeTtl, timestamp: now });
|
|
2097
|
+
logger.info({ name, clientId, queueLength: lock.queue.length }, "Lock queued");
|
|
2098
|
+
return { granted: false };
|
|
2099
|
+
}
|
|
2100
|
+
release(name, clientId, fencingToken) {
|
|
2101
|
+
const lock = this.locks.get(name);
|
|
2102
|
+
if (!lock) return false;
|
|
2103
|
+
if (lock.owner !== clientId) {
|
|
2104
|
+
logger.warn({ name, clientId, owner: lock.owner }, "Release failed: Not owner");
|
|
2105
|
+
return false;
|
|
2106
|
+
}
|
|
2107
|
+
if (lock.fencingToken !== fencingToken) {
|
|
2108
|
+
logger.warn({ name, clientId, sentToken: fencingToken, actualToken: lock.fencingToken }, "Release failed: Token mismatch");
|
|
2109
|
+
return false;
|
|
2110
|
+
}
|
|
2111
|
+
this.processNext(lock);
|
|
2112
|
+
return true;
|
|
2113
|
+
}
|
|
2114
|
+
handleClientDisconnect(clientId) {
|
|
2115
|
+
for (const lock of this.locks.values()) {
|
|
2116
|
+
if (lock.owner === clientId) {
|
|
2117
|
+
logger.info({ name: lock.name, clientId }, "Releasing lock due to disconnect");
|
|
2118
|
+
this.processNext(lock);
|
|
2119
|
+
} else {
|
|
2120
|
+
const initialLen = lock.queue.length;
|
|
2121
|
+
lock.queue = lock.queue.filter((req) => req.clientId !== clientId);
|
|
2122
|
+
if (lock.queue.length < initialLen) {
|
|
2123
|
+
logger.info({ name: lock.name, clientId }, "Removed from lock queue due to disconnect");
|
|
2124
|
+
}
|
|
2125
|
+
}
|
|
2126
|
+
}
|
|
2127
|
+
}
|
|
2128
|
+
grantLock(lock, clientId, ttl) {
|
|
2129
|
+
lock.owner = clientId;
|
|
2130
|
+
lock.expiry = Date.now() + ttl;
|
|
2131
|
+
lock.fencingToken++;
|
|
2132
|
+
logger.info({ name: lock.name, clientId, fencingToken: lock.fencingToken }, "Lock granted");
|
|
2133
|
+
}
|
|
2134
|
+
processNext(lock) {
|
|
2135
|
+
const now = Date.now();
|
|
2136
|
+
lock.owner = "";
|
|
2137
|
+
lock.expiry = 0;
|
|
2138
|
+
while (lock.queue.length > 0) {
|
|
2139
|
+
const next = lock.queue.shift();
|
|
2140
|
+
this.grantLock(lock, next.clientId, next.ttl);
|
|
2141
|
+
this.emit("lockGranted", {
|
|
2142
|
+
clientId: next.clientId,
|
|
2143
|
+
requestId: next.requestId,
|
|
2144
|
+
name: lock.name,
|
|
2145
|
+
fencingToken: lock.fencingToken
|
|
2146
|
+
});
|
|
2147
|
+
return;
|
|
2148
|
+
}
|
|
2149
|
+
if (lock.queue.length === 0) {
|
|
2150
|
+
this.locks.delete(lock.name);
|
|
2151
|
+
}
|
|
2152
|
+
}
|
|
2153
|
+
cleanupExpiredLocks() {
|
|
2154
|
+
const now = Date.now();
|
|
2155
|
+
const lockNames = Array.from(this.locks.keys());
|
|
2156
|
+
for (const name of lockNames) {
|
|
2157
|
+
const lock = this.locks.get(name);
|
|
2158
|
+
if (!lock) continue;
|
|
2159
|
+
if (lock.owner && lock.expiry < now) {
|
|
2160
|
+
logger.info({ name: lock.name, owner: lock.owner }, "Lock expired, processing next");
|
|
2161
|
+
this.processNext(lock);
|
|
2162
|
+
} else if (!lock.owner && lock.queue.length === 0) {
|
|
2163
|
+
this.locks.delete(name);
|
|
2164
|
+
}
|
|
2165
|
+
}
|
|
2166
|
+
}
|
|
2167
|
+
};
|
|
2168
|
+
_LockManager.MIN_TTL = 1e3;
|
|
2169
|
+
// 1 second
|
|
2170
|
+
_LockManager.MAX_TTL = 3e5;
|
|
2171
|
+
var LockManager = _LockManager;
|
|
2172
|
+
|
|
2173
|
+
// src/security/SecurityManager.ts
|
|
2174
|
+
var SecurityManager = class {
|
|
2175
|
+
constructor(policies = []) {
|
|
2176
|
+
this.policies = [];
|
|
2177
|
+
this.policies = policies;
|
|
2178
|
+
}
|
|
2179
|
+
addPolicy(policy) {
|
|
2180
|
+
this.policies.push(policy);
|
|
2181
|
+
}
|
|
2182
|
+
checkPermission(principal, mapName, action) {
|
|
2183
|
+
if (principal.roles.includes("ADMIN")) {
|
|
2184
|
+
return true;
|
|
2185
|
+
}
|
|
2186
|
+
if (mapName.startsWith("$sys/")) {
|
|
2187
|
+
logger.warn({ userId: principal.userId, mapName }, "Access Denied: System Map requires ADMIN role");
|
|
2188
|
+
return false;
|
|
2189
|
+
}
|
|
2190
|
+
for (const policy of this.policies) {
|
|
2191
|
+
const hasRole = this.hasRole(principal, policy.role);
|
|
2192
|
+
const matchesMap = this.matchesMap(mapName, policy.mapNamePattern, principal);
|
|
2193
|
+
if (hasRole && matchesMap) {
|
|
2194
|
+
if (policy.actions.includes("ALL") || policy.actions.includes(action)) {
|
|
2195
|
+
return true;
|
|
2196
|
+
}
|
|
2197
|
+
} else {
|
|
2198
|
+
}
|
|
2199
|
+
}
|
|
2200
|
+
logger.warn({
|
|
2201
|
+
userId: principal.userId,
|
|
2202
|
+
roles: principal.roles,
|
|
2203
|
+
mapName,
|
|
2204
|
+
action,
|
|
2205
|
+
policyCount: this.policies.length
|
|
2206
|
+
}, "SecurityManager: Access Denied - No matching policy found");
|
|
2207
|
+
return false;
|
|
2208
|
+
}
|
|
2209
|
+
filterObject(object, principal, mapName) {
|
|
2210
|
+
if (!object || typeof object !== "object") return object;
|
|
2211
|
+
if (principal.roles.includes("ADMIN")) return object;
|
|
2212
|
+
if (Array.isArray(object)) {
|
|
2213
|
+
return object.map((item) => this.filterObject(item, principal, mapName));
|
|
2214
|
+
}
|
|
2215
|
+
let allowedFields = null;
|
|
2216
|
+
let accessGranted = false;
|
|
2217
|
+
for (const policy of this.policies) {
|
|
2218
|
+
if (this.hasRole(principal, policy.role) && this.matchesMap(mapName, policy.mapNamePattern, principal)) {
|
|
2219
|
+
if (policy.actions.includes("ALL") || policy.actions.includes("READ")) {
|
|
2220
|
+
accessGranted = true;
|
|
2221
|
+
if (!policy.allowedFields || policy.allowedFields.length === 0 || policy.allowedFields.includes("*")) {
|
|
2222
|
+
return object;
|
|
2223
|
+
}
|
|
2224
|
+
if (allowedFields === null) allowedFields = /* @__PURE__ */ new Set();
|
|
2225
|
+
policy.allowedFields.forEach((f) => allowedFields.add(f));
|
|
2226
|
+
}
|
|
2227
|
+
}
|
|
2228
|
+
}
|
|
2229
|
+
if (!accessGranted) return null;
|
|
2230
|
+
if (allowedFields === null) return object;
|
|
2231
|
+
const filtered = {};
|
|
2232
|
+
for (const key of Object.keys(object)) {
|
|
2233
|
+
if (allowedFields.has(key)) {
|
|
2234
|
+
filtered[key] = object[key];
|
|
2235
|
+
}
|
|
2236
|
+
}
|
|
2237
|
+
return filtered;
|
|
2238
|
+
}
|
|
2239
|
+
hasRole(principal, role) {
|
|
2240
|
+
return principal.roles.includes(role);
|
|
2241
|
+
}
|
|
2242
|
+
matchesMap(mapName, pattern, principal) {
|
|
2243
|
+
let finalPattern = pattern;
|
|
2244
|
+
if (pattern.includes("{userId}") && principal) {
|
|
2245
|
+
finalPattern = pattern.replace("{userId}", principal.userId);
|
|
2246
|
+
}
|
|
2247
|
+
if (finalPattern === "*") return true;
|
|
2248
|
+
if (finalPattern === mapName) return true;
|
|
2249
|
+
if (finalPattern.endsWith("*")) {
|
|
2250
|
+
const prefix = finalPattern.slice(0, -1);
|
|
2251
|
+
return mapName.startsWith(prefix);
|
|
2252
|
+
}
|
|
2253
|
+
return false;
|
|
2254
|
+
}
|
|
2255
|
+
};
|
|
2256
|
+
|
|
2257
|
+
// src/monitoring/MetricsService.ts
|
|
2258
|
+
import { Registry, Gauge, Counter, Summary, collectDefaultMetrics } from "prom-client";
|
|
2259
|
+
var MetricsService = class {
|
|
2260
|
+
constructor() {
|
|
2261
|
+
this.registry = new Registry();
|
|
2262
|
+
collectDefaultMetrics({ register: this.registry, prefix: "topgun_" });
|
|
2263
|
+
this.connectedClients = new Gauge({
|
|
2264
|
+
name: "topgun_connected_clients",
|
|
2265
|
+
help: "Number of currently connected clients",
|
|
2266
|
+
registers: [this.registry]
|
|
2267
|
+
});
|
|
2268
|
+
this.mapSizeItems = new Gauge({
|
|
2269
|
+
name: "topgun_map_size_items",
|
|
2270
|
+
help: "Number of items in a map",
|
|
2271
|
+
labelNames: ["map"],
|
|
2272
|
+
registers: [this.registry]
|
|
2273
|
+
});
|
|
2274
|
+
this.opsTotal = new Counter({
|
|
2275
|
+
name: "topgun_ops_total",
|
|
2276
|
+
help: "Total number of operations",
|
|
2277
|
+
labelNames: ["type", "map"],
|
|
2278
|
+
registers: [this.registry]
|
|
2279
|
+
});
|
|
2280
|
+
this.memoryUsage = new Gauge({
|
|
2281
|
+
name: "topgun_memory_usage_bytes",
|
|
2282
|
+
help: "Current memory usage in bytes",
|
|
2283
|
+
registers: [this.registry],
|
|
2284
|
+
collect() {
|
|
2285
|
+
this.set(process.memoryUsage().heapUsed);
|
|
2286
|
+
}
|
|
2287
|
+
});
|
|
2288
|
+
this.clusterMembers = new Gauge({
|
|
2289
|
+
name: "topgun_cluster_members",
|
|
2290
|
+
help: "Number of active cluster members",
|
|
2291
|
+
registers: [this.registry]
|
|
2292
|
+
});
|
|
2293
|
+
this.eventsRoutedTotal = new Counter({
|
|
2294
|
+
name: "topgun_events_routed_total",
|
|
2295
|
+
help: "Total number of events processed for routing",
|
|
2296
|
+
registers: [this.registry]
|
|
2297
|
+
});
|
|
2298
|
+
this.eventsFilteredBySubscription = new Counter({
|
|
2299
|
+
name: "topgun_events_filtered_by_subscription",
|
|
2300
|
+
help: "Events NOT sent due to no active subscriptions",
|
|
2301
|
+
registers: [this.registry]
|
|
2302
|
+
});
|
|
2303
|
+
this.subscribersPerEvent = new Summary({
|
|
2304
|
+
name: "topgun_subscribers_per_event",
|
|
2305
|
+
help: "Distribution of subscribers per event",
|
|
2306
|
+
percentiles: [0.5, 0.9, 0.99],
|
|
2307
|
+
registers: [this.registry]
|
|
2308
|
+
});
|
|
2309
|
+
this.eventQueueSize = new Gauge({
|
|
2310
|
+
name: "topgun_event_queue_size",
|
|
2311
|
+
help: "Current size of the event queue across all stripes",
|
|
2312
|
+
labelNames: ["stripe"],
|
|
2313
|
+
registers: [this.registry]
|
|
2314
|
+
});
|
|
2315
|
+
this.eventQueueEnqueued = new Counter({
|
|
2316
|
+
name: "topgun_event_queue_enqueued_total",
|
|
2317
|
+
help: "Total number of events enqueued",
|
|
2318
|
+
registers: [this.registry]
|
|
2319
|
+
});
|
|
2320
|
+
this.eventQueueDequeued = new Counter({
|
|
2321
|
+
name: "topgun_event_queue_dequeued_total",
|
|
2322
|
+
help: "Total number of events dequeued",
|
|
2323
|
+
registers: [this.registry]
|
|
2324
|
+
});
|
|
2325
|
+
this.eventQueueRejected = new Counter({
|
|
2326
|
+
name: "topgun_event_queue_rejected_total",
|
|
2327
|
+
help: "Total number of events rejected due to queue capacity",
|
|
2328
|
+
registers: [this.registry]
|
|
2329
|
+
});
|
|
2330
|
+
this.backpressureSyncForcedTotal = new Counter({
|
|
2331
|
+
name: "topgun_backpressure_sync_forced_total",
|
|
2332
|
+
help: "Total number of times sync processing was forced",
|
|
2333
|
+
registers: [this.registry]
|
|
2334
|
+
});
|
|
2335
|
+
this.backpressurePendingOps = new Gauge({
|
|
2336
|
+
name: "topgun_backpressure_pending_ops",
|
|
2337
|
+
help: "Current number of pending async operations",
|
|
2338
|
+
registers: [this.registry]
|
|
2339
|
+
});
|
|
2340
|
+
this.backpressureWaitsTotal = new Counter({
|
|
2341
|
+
name: "topgun_backpressure_waits_total",
|
|
2342
|
+
help: "Total number of times had to wait for capacity",
|
|
2343
|
+
registers: [this.registry]
|
|
2344
|
+
});
|
|
2345
|
+
this.backpressureTimeoutsTotal = new Counter({
|
|
2346
|
+
name: "topgun_backpressure_timeouts_total",
|
|
2347
|
+
help: "Total number of backpressure timeouts",
|
|
2348
|
+
registers: [this.registry]
|
|
2349
|
+
});
|
|
2350
|
+
this.connectionsAcceptedTotal = new Counter({
|
|
2351
|
+
name: "topgun_connections_accepted_total",
|
|
2352
|
+
help: "Total number of connections accepted",
|
|
2353
|
+
registers: [this.registry]
|
|
2354
|
+
});
|
|
2355
|
+
this.connectionsRejectedTotal = new Counter({
|
|
2356
|
+
name: "topgun_connections_rejected_total",
|
|
2357
|
+
help: "Total number of connections rejected due to rate limiting",
|
|
2358
|
+
registers: [this.registry]
|
|
2359
|
+
});
|
|
2360
|
+
this.connectionsPending = new Gauge({
|
|
2361
|
+
name: "topgun_connections_pending",
|
|
2362
|
+
help: "Number of connections currently pending (handshake in progress)",
|
|
2363
|
+
registers: [this.registry]
|
|
2364
|
+
});
|
|
2365
|
+
this.connectionRatePerSecond = new Gauge({
|
|
2366
|
+
name: "topgun_connection_rate_per_second",
|
|
2367
|
+
help: "Current connection rate per second",
|
|
2368
|
+
registers: [this.registry]
|
|
2369
|
+
});
|
|
2370
|
+
}
|
|
2371
|
+
destroy() {
|
|
2372
|
+
this.registry.clear();
|
|
2373
|
+
}
|
|
2374
|
+
setConnectedClients(count) {
|
|
2375
|
+
this.connectedClients.set(count);
|
|
2376
|
+
}
|
|
2377
|
+
setMapSize(mapName, size) {
|
|
2378
|
+
this.mapSizeItems.set({ map: mapName }, size);
|
|
2379
|
+
}
|
|
2380
|
+
incOp(type, mapName) {
|
|
2381
|
+
this.opsTotal.inc({ type, map: mapName });
|
|
2382
|
+
}
|
|
2383
|
+
setClusterMembers(count) {
|
|
2384
|
+
this.clusterMembers.set(count);
|
|
2385
|
+
}
|
|
2386
|
+
// === Subscription-based routing metric methods ===
|
|
2387
|
+
/**
|
|
2388
|
+
* Increment counter for total events processed for routing.
|
|
2389
|
+
*/
|
|
2390
|
+
incEventsRouted() {
|
|
2391
|
+
this.eventsRoutedTotal.inc();
|
|
2392
|
+
}
|
|
2393
|
+
/**
|
|
2394
|
+
* Increment counter for events filtered out due to no subscribers.
|
|
2395
|
+
*/
|
|
2396
|
+
incEventsFilteredBySubscription() {
|
|
2397
|
+
this.eventsFilteredBySubscription.inc();
|
|
2398
|
+
}
|
|
2399
|
+
/**
|
|
2400
|
+
* Record the number of subscribers for an event (for average calculation).
|
|
2401
|
+
*/
|
|
2402
|
+
recordSubscribersPerEvent(count) {
|
|
2403
|
+
this.subscribersPerEvent.observe(count);
|
|
2404
|
+
}
|
|
2405
|
+
// === Bounded event queue metric methods ===
|
|
2406
|
+
/**
|
|
2407
|
+
* Set the current size of a specific queue stripe.
|
|
2408
|
+
*/
|
|
2409
|
+
setEventQueueSize(stripe, size) {
|
|
2410
|
+
this.eventQueueSize.set({ stripe: String(stripe) }, size);
|
|
2411
|
+
}
|
|
2412
|
+
/**
|
|
2413
|
+
* Increment counter for events enqueued.
|
|
2414
|
+
*/
|
|
2415
|
+
incEventQueueEnqueued() {
|
|
2416
|
+
this.eventQueueEnqueued.inc();
|
|
2417
|
+
}
|
|
2418
|
+
/**
|
|
2419
|
+
* Increment counter for events dequeued.
|
|
2420
|
+
*/
|
|
2421
|
+
incEventQueueDequeued() {
|
|
2422
|
+
this.eventQueueDequeued.inc();
|
|
2423
|
+
}
|
|
2424
|
+
/**
|
|
2425
|
+
* Increment counter for events rejected due to queue capacity.
|
|
2426
|
+
*/
|
|
2427
|
+
incEventQueueRejected() {
|
|
2428
|
+
this.eventQueueRejected.inc();
|
|
2429
|
+
}
|
|
2430
|
+
// === Backpressure metric methods ===
|
|
2431
|
+
/**
|
|
2432
|
+
* Increment counter for forced sync operations.
|
|
2433
|
+
*/
|
|
2434
|
+
incBackpressureSyncForced() {
|
|
2435
|
+
this.backpressureSyncForcedTotal.inc();
|
|
2436
|
+
}
|
|
2437
|
+
/**
|
|
2438
|
+
* Set the current number of pending async operations.
|
|
2439
|
+
*/
|
|
2440
|
+
setBackpressurePendingOps(count) {
|
|
2441
|
+
this.backpressurePendingOps.set(count);
|
|
2442
|
+
}
|
|
2443
|
+
/**
|
|
2444
|
+
* Increment counter for times had to wait for capacity.
|
|
2445
|
+
*/
|
|
2446
|
+
incBackpressureWaits() {
|
|
2447
|
+
this.backpressureWaitsTotal.inc();
|
|
2448
|
+
}
|
|
2449
|
+
/**
|
|
2450
|
+
* Increment counter for backpressure timeouts.
|
|
2451
|
+
*/
|
|
2452
|
+
incBackpressureTimeouts() {
|
|
2453
|
+
this.backpressureTimeoutsTotal.inc();
|
|
2454
|
+
}
|
|
2455
|
+
// === Connection scaling metric methods ===
|
|
2456
|
+
/**
|
|
2457
|
+
* Increment counter for accepted connections.
|
|
2458
|
+
*/
|
|
2459
|
+
incConnectionsAccepted() {
|
|
2460
|
+
this.connectionsAcceptedTotal.inc();
|
|
2461
|
+
}
|
|
2462
|
+
/**
|
|
2463
|
+
* Increment counter for rejected connections.
|
|
2464
|
+
*/
|
|
2465
|
+
incConnectionsRejected() {
|
|
2466
|
+
this.connectionsRejectedTotal.inc();
|
|
2467
|
+
}
|
|
2468
|
+
/**
|
|
2469
|
+
* Set the current number of pending connections.
|
|
2470
|
+
*/
|
|
2471
|
+
setConnectionsPending(count) {
|
|
2472
|
+
this.connectionsPending.set(count);
|
|
2473
|
+
}
|
|
2474
|
+
/**
|
|
2475
|
+
* Set the current connection rate per second.
|
|
2476
|
+
*/
|
|
2477
|
+
setConnectionRatePerSecond(rate) {
|
|
2478
|
+
this.connectionRatePerSecond.set(rate);
|
|
2479
|
+
}
|
|
2480
|
+
async getMetrics() {
|
|
2481
|
+
return this.registry.metrics();
|
|
2482
|
+
}
|
|
2483
|
+
async getMetricsJson() {
|
|
2484
|
+
const metrics = await this.registry.getMetricsAsJSON();
|
|
2485
|
+
const result = {};
|
|
2486
|
+
for (const metric of metrics) {
|
|
2487
|
+
if (metric.values.length === 1) {
|
|
2488
|
+
result[metric.name] = metric.values[0].value;
|
|
2489
|
+
} else {
|
|
2490
|
+
result[metric.name] = metric.values;
|
|
2491
|
+
}
|
|
2492
|
+
}
|
|
2493
|
+
return result;
|
|
2494
|
+
}
|
|
2495
|
+
getContentType() {
|
|
2496
|
+
return this.registry.contentType;
|
|
2497
|
+
}
|
|
2498
|
+
};
|
|
2499
|
+
|
|
2500
|
+
// src/system/SystemManager.ts
|
|
2501
|
+
var SystemManager = class {
|
|
2502
|
+
constructor(cluster, metrics, getMap) {
|
|
2503
|
+
this.cluster = cluster;
|
|
2504
|
+
this.metrics = metrics;
|
|
2505
|
+
this.getMap = getMap;
|
|
2506
|
+
}
|
|
2507
|
+
start() {
|
|
2508
|
+
this.setupClusterMap();
|
|
2509
|
+
this.setupStatsMap();
|
|
2510
|
+
this.setupMapsMap();
|
|
2511
|
+
this.statsInterval = setInterval(() => this.updateStats(), 5e3);
|
|
2512
|
+
this.cluster.on("memberJoined", () => this.updateClusterMap());
|
|
2513
|
+
this.cluster.on("memberLeft", () => this.updateClusterMap());
|
|
2514
|
+
this.updateClusterMap();
|
|
2515
|
+
this.updateStats();
|
|
2516
|
+
}
|
|
2517
|
+
stop() {
|
|
2518
|
+
if (this.statsInterval) {
|
|
2519
|
+
clearInterval(this.statsInterval);
|
|
2520
|
+
}
|
|
2521
|
+
}
|
|
2522
|
+
notifyMapCreated(mapName) {
|
|
2523
|
+
if (mapName.startsWith("$sys/")) return;
|
|
1360
2524
|
this.updateMapsMap(mapName);
|
|
1361
2525
|
}
|
|
1362
2526
|
setupClusterMap() {
|
|
@@ -1411,8 +2575,8 @@ var SystemManager = class {
|
|
|
1411
2575
|
};
|
|
1412
2576
|
|
|
1413
2577
|
// src/utils/BoundedEventQueue.ts
|
|
1414
|
-
import { EventEmitter as
|
|
1415
|
-
var BoundedEventQueue = class extends
|
|
2578
|
+
import { EventEmitter as EventEmitter6 } from "events";
|
|
2579
|
+
var BoundedEventQueue = class extends EventEmitter6 {
|
|
1416
2580
|
constructor(options) {
|
|
1417
2581
|
super();
|
|
1418
2582
|
this.queue = [];
|
|
@@ -1844,7 +3008,7 @@ var BackpressureRegulator = class {
|
|
|
1844
3008
|
|
|
1845
3009
|
// src/utils/CoalescingWriter.ts
|
|
1846
3010
|
import { WebSocket as WebSocket2 } from "ws";
|
|
1847
|
-
import { serialize as
|
|
3011
|
+
import { serialize as serialize3 } from "@topgunbuild/core";
|
|
1848
3012
|
|
|
1849
3013
|
// src/memory/BufferPool.ts
|
|
1850
3014
|
var DEFAULT_CONFIG2 = {
|
|
@@ -2375,7 +3539,7 @@ var CoalescingWriter = class {
|
|
|
2375
3539
|
if (this.closed) {
|
|
2376
3540
|
return;
|
|
2377
3541
|
}
|
|
2378
|
-
const data =
|
|
3542
|
+
const data = serialize3(message);
|
|
2379
3543
|
this.writeRaw(data, urgent);
|
|
2380
3544
|
}
|
|
2381
3545
|
/**
|
|
@@ -2559,7 +3723,7 @@ var CoalescingWriter = class {
|
|
|
2559
3723
|
offset += msg.data.length;
|
|
2560
3724
|
}
|
|
2561
3725
|
const usedBatch = batch.subarray(0, totalSize);
|
|
2562
|
-
const batchEnvelope =
|
|
3726
|
+
const batchEnvelope = serialize3({
|
|
2563
3727
|
type: "BATCH",
|
|
2564
3728
|
count: messages.length,
|
|
2565
3729
|
data: usedBatch
|
|
@@ -2574,13 +3738,23 @@ var CoalescingWriter = class {
|
|
|
2574
3738
|
// src/utils/coalescingPresets.ts
|
|
2575
3739
|
var coalescingPresets = {
|
|
2576
3740
|
/**
|
|
2577
|
-
*
|
|
2578
|
-
*
|
|
2579
|
-
*
|
|
3741
|
+
* Low latency - optimized for minimal response time.
|
|
3742
|
+
* Best for: gaming, real-time chat, interactive applications.
|
|
3743
|
+
* Benchmark: p50=2ms, ~18K ops/sec
|
|
3744
|
+
*/
|
|
3745
|
+
lowLatency: {
|
|
3746
|
+
maxBatchSize: 100,
|
|
3747
|
+
maxDelayMs: 1,
|
|
3748
|
+
maxBatchBytes: 65536
|
|
3749
|
+
// 64KB
|
|
3750
|
+
},
|
|
3751
|
+
/**
|
|
3752
|
+
* Conservative - good balance of latency and batching.
|
|
3753
|
+
* Use for: general purpose with latency sensitivity.
|
|
2580
3754
|
*/
|
|
2581
3755
|
conservative: {
|
|
2582
3756
|
maxBatchSize: 100,
|
|
2583
|
-
maxDelayMs:
|
|
3757
|
+
maxDelayMs: 2,
|
|
2584
3758
|
maxBatchBytes: 65536
|
|
2585
3759
|
// 64KB
|
|
2586
3760
|
},
|
|
@@ -2591,7 +3765,7 @@ var coalescingPresets = {
|
|
|
2591
3765
|
*/
|
|
2592
3766
|
balanced: {
|
|
2593
3767
|
maxBatchSize: 300,
|
|
2594
|
-
maxDelayMs:
|
|
3768
|
+
maxDelayMs: 2,
|
|
2595
3769
|
maxBatchBytes: 131072
|
|
2596
3770
|
// 128KB
|
|
2597
3771
|
},
|
|
@@ -2599,10 +3773,11 @@ var coalescingPresets = {
|
|
|
2599
3773
|
* High throughput - optimized for write-heavy workloads.
|
|
2600
3774
|
* Higher batching for better network utilization.
|
|
2601
3775
|
* Use for: data ingestion, logging, IoT data streams.
|
|
3776
|
+
* Benchmark: p50=7ms, ~18K ops/sec
|
|
2602
3777
|
*/
|
|
2603
3778
|
highThroughput: {
|
|
2604
3779
|
maxBatchSize: 500,
|
|
2605
|
-
maxDelayMs:
|
|
3780
|
+
maxDelayMs: 2,
|
|
2606
3781
|
maxBatchBytes: 262144
|
|
2607
3782
|
// 256KB
|
|
2608
3783
|
},
|
|
@@ -2613,7 +3788,7 @@ var coalescingPresets = {
|
|
|
2613
3788
|
*/
|
|
2614
3789
|
aggressive: {
|
|
2615
3790
|
maxBatchSize: 1e3,
|
|
2616
|
-
maxDelayMs:
|
|
3791
|
+
maxDelayMs: 5,
|
|
2617
3792
|
maxBatchBytes: 524288
|
|
2618
3793
|
// 512KB
|
|
2619
3794
|
}
|
|
@@ -3144,6 +4319,7 @@ var WorkerPool = class {
|
|
|
3144
4319
|
|
|
3145
4320
|
// src/workers/MerkleWorker.ts
|
|
3146
4321
|
import { join as join2 } from "path";
|
|
4322
|
+
import { hashString as coreHashString } from "@topgunbuild/core";
|
|
3147
4323
|
var WORKER_THRESHOLD = 10;
|
|
3148
4324
|
var taskIdCounter = 0;
|
|
3149
4325
|
function generateTaskId() {
|
|
@@ -3353,12 +4529,7 @@ var MerkleWorker = class {
|
|
|
3353
4529
|
}
|
|
3354
4530
|
// ============ Hash utilities ============
|
|
3355
4531
|
hashString(str) {
|
|
3356
|
-
|
|
3357
|
-
for (let i = 0; i < str.length; i++) {
|
|
3358
|
-
hash ^= str.charCodeAt(i);
|
|
3359
|
-
hash = Math.imul(hash, 16777619);
|
|
3360
|
-
}
|
|
3361
|
-
return hash >>> 0;
|
|
4532
|
+
return coreHashString(str);
|
|
3362
4533
|
}
|
|
3363
4534
|
buildTree(entries, depth) {
|
|
3364
4535
|
const root = { hash: 0, children: {} };
|
|
@@ -3857,726 +5028,1387 @@ var SharedMemoryManager = class {
|
|
|
3857
5028
|
return true;
|
|
3858
5029
|
}
|
|
3859
5030
|
/**
|
|
3860
|
-
* Read data length from a slot.
|
|
5031
|
+
* Read data length from a slot.
|
|
5032
|
+
*/
|
|
5033
|
+
getDataLength(slotIndex) {
|
|
5034
|
+
const lengthView = new DataView(
|
|
5035
|
+
this.buffer,
|
|
5036
|
+
this.getLengthOffset(slotIndex),
|
|
5037
|
+
4
|
|
5038
|
+
);
|
|
5039
|
+
return lengthView.getUint32(0, true);
|
|
5040
|
+
}
|
|
5041
|
+
/**
|
|
5042
|
+
* Get data view for a slot (for reading).
|
|
5043
|
+
*/
|
|
5044
|
+
getDataView(slotIndex) {
|
|
5045
|
+
const length = this.getDataLength(slotIndex);
|
|
5046
|
+
const dataOffset = this.getDataOffset(slotIndex);
|
|
5047
|
+
return new Uint8Array(this.buffer, dataOffset, length);
|
|
5048
|
+
}
|
|
5049
|
+
/**
|
|
5050
|
+
* Get slot status.
|
|
5051
|
+
*/
|
|
5052
|
+
getStatus(slotIndex) {
|
|
5053
|
+
return Atomics.load(this.statusArray, this.getStatusOffset(slotIndex));
|
|
5054
|
+
}
|
|
5055
|
+
/**
|
|
5056
|
+
* Wait for a specific status with timeout.
|
|
5057
|
+
* Returns the actual status (may differ if timeout occurred).
|
|
5058
|
+
*/
|
|
5059
|
+
waitForStatus(slotIndex, expectedStatus, timeoutMs = 5e3) {
|
|
5060
|
+
const statusOffset = this.getStatusOffset(slotIndex);
|
|
5061
|
+
const deadline = Date.now() + timeoutMs;
|
|
5062
|
+
while (Date.now() < deadline) {
|
|
5063
|
+
const status = Atomics.load(this.statusArray, statusOffset);
|
|
5064
|
+
if (status === expectedStatus || status === 255 /* ERROR */) {
|
|
5065
|
+
return status;
|
|
5066
|
+
}
|
|
5067
|
+
const remaining = deadline - Date.now();
|
|
5068
|
+
if (remaining > 0) {
|
|
5069
|
+
Atomics.wait(
|
|
5070
|
+
this.statusArray,
|
|
5071
|
+
statusOffset,
|
|
5072
|
+
status,
|
|
5073
|
+
Math.min(remaining, 100)
|
|
5074
|
+
);
|
|
5075
|
+
}
|
|
5076
|
+
}
|
|
5077
|
+
return Atomics.load(this.statusArray, statusOffset);
|
|
5078
|
+
}
|
|
5079
|
+
/**
|
|
5080
|
+
* Wait for result and read it.
|
|
5081
|
+
* Returns null on timeout or error.
|
|
5082
|
+
*/
|
|
5083
|
+
waitForResult(slot, timeoutMs = 5e3) {
|
|
5084
|
+
const status = this.waitForStatus(
|
|
5085
|
+
slot.index,
|
|
5086
|
+
4 /* RESULT_READY */,
|
|
5087
|
+
timeoutMs
|
|
5088
|
+
);
|
|
5089
|
+
if (status === 4 /* RESULT_READY */) {
|
|
5090
|
+
const length = this.getDataLength(slot.index);
|
|
5091
|
+
const result = new Uint8Array(length);
|
|
5092
|
+
result.set(slot.dataView.subarray(0, length));
|
|
5093
|
+
return result;
|
|
5094
|
+
}
|
|
5095
|
+
return null;
|
|
5096
|
+
}
|
|
5097
|
+
/**
|
|
5098
|
+
* Get the SharedArrayBuffer for passing to workers.
|
|
5099
|
+
*/
|
|
5100
|
+
getBuffer() {
|
|
5101
|
+
return this.buffer;
|
|
5102
|
+
}
|
|
5103
|
+
/**
|
|
5104
|
+
* Get configuration needed by workers.
|
|
5105
|
+
*/
|
|
5106
|
+
getWorkerConfig() {
|
|
5107
|
+
return {
|
|
5108
|
+
sharedBuffer: this.buffer,
|
|
5109
|
+
slotSize: this.slotSize,
|
|
5110
|
+
slotCount: this.slotCount,
|
|
5111
|
+
metadataSize: this.metadataSize
|
|
5112
|
+
};
|
|
5113
|
+
}
|
|
5114
|
+
/**
|
|
5115
|
+
* Get statistics.
|
|
5116
|
+
*/
|
|
5117
|
+
getStats() {
|
|
5118
|
+
return {
|
|
5119
|
+
totalSize: this.buffer.byteLength,
|
|
5120
|
+
slotCount: this.slotCount,
|
|
5121
|
+
slotSize: this.slotSize,
|
|
5122
|
+
allocatedSlots: this.allocatedCount,
|
|
5123
|
+
availableSlots: this.freeSlots.size,
|
|
5124
|
+
peakUsage: this.peakUsage,
|
|
5125
|
+
totalAllocations: this.totalAllocations,
|
|
5126
|
+
totalReleases: this.totalReleases
|
|
5127
|
+
};
|
|
5128
|
+
}
|
|
5129
|
+
/**
|
|
5130
|
+
* Check if SharedArrayBuffer is available in current environment.
|
|
5131
|
+
*/
|
|
5132
|
+
static isAvailable() {
|
|
5133
|
+
try {
|
|
5134
|
+
new SharedArrayBuffer(1);
|
|
5135
|
+
return true;
|
|
5136
|
+
} catch {
|
|
5137
|
+
return false;
|
|
5138
|
+
}
|
|
5139
|
+
}
|
|
5140
|
+
/**
|
|
5141
|
+
* Shutdown and release resources.
|
|
5142
|
+
* Resets all slots to FREE status.
|
|
5143
|
+
*/
|
|
5144
|
+
shutdown() {
|
|
5145
|
+
for (let i = 0; i < this.slotCount; i++) {
|
|
5146
|
+
Atomics.store(this.statusArray, this.getStatusOffset(i), 0 /* FREE */);
|
|
5147
|
+
}
|
|
5148
|
+
this.freeSlots.clear();
|
|
5149
|
+
for (let i = 0; i < this.slotCount; i++) {
|
|
5150
|
+
this.freeSlots.add(i);
|
|
5151
|
+
}
|
|
5152
|
+
this.allocatedCount = 0;
|
|
5153
|
+
}
|
|
5154
|
+
};
|
|
5155
|
+
|
|
5156
|
+
// src/tasklet/TaskletScheduler.ts
|
|
5157
|
+
var DEFAULT_CONFIG4 = {
|
|
5158
|
+
defaultTimeBudgetMs: 5,
|
|
5159
|
+
maxConcurrent: 10,
|
|
5160
|
+
tickIntervalMs: 1,
|
|
5161
|
+
metricsEnabled: true
|
|
5162
|
+
};
|
|
5163
|
+
var TaskletScheduler = class {
|
|
5164
|
+
constructor(config) {
|
|
5165
|
+
this.activeTasklets = /* @__PURE__ */ new Map();
|
|
5166
|
+
this.tickTimer = null;
|
|
5167
|
+
this.isRunning = false;
|
|
5168
|
+
this.isShuttingDown = false;
|
|
5169
|
+
// Metrics
|
|
5170
|
+
this.totalScheduled = 0;
|
|
5171
|
+
this.completedTasklets = 0;
|
|
5172
|
+
this.cancelledTasklets = 0;
|
|
5173
|
+
this.totalIterations = 0;
|
|
5174
|
+
this.singleIterationCompletions = 0;
|
|
5175
|
+
this.totalExecutionTimeMs = 0;
|
|
5176
|
+
this.config = { ...DEFAULT_CONFIG4, ...config };
|
|
5177
|
+
}
|
|
5178
|
+
/**
|
|
5179
|
+
* Schedule a tasklet for execution.
|
|
5180
|
+
* Returns a promise that resolves when the tasklet completes.
|
|
5181
|
+
*/
|
|
5182
|
+
schedule(tasklet) {
|
|
5183
|
+
if (this.isShuttingDown) {
|
|
5184
|
+
return Promise.reject(new Error("Scheduler is shutting down"));
|
|
5185
|
+
}
|
|
5186
|
+
return new Promise((resolve, reject) => {
|
|
5187
|
+
const taskletId = `${tasklet.name}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
5188
|
+
if (this.activeTasklets.size >= this.config.maxConcurrent) {
|
|
5189
|
+
reject(new Error(`Max concurrent tasklets (${this.config.maxConcurrent}) reached`));
|
|
5190
|
+
return;
|
|
5191
|
+
}
|
|
5192
|
+
const state = {
|
|
5193
|
+
tasklet,
|
|
5194
|
+
resolve,
|
|
5195
|
+
reject,
|
|
5196
|
+
iterations: 0,
|
|
5197
|
+
startTime: Date.now(),
|
|
5198
|
+
lastProgressTime: Date.now()
|
|
5199
|
+
};
|
|
5200
|
+
this.activeTasklets.set(taskletId, state);
|
|
5201
|
+
this.totalScheduled++;
|
|
5202
|
+
if (!this.isRunning) {
|
|
5203
|
+
this.startScheduler();
|
|
5204
|
+
}
|
|
5205
|
+
});
|
|
5206
|
+
}
|
|
5207
|
+
/**
|
|
5208
|
+
* Run a tasklet synchronously (blocking).
|
|
5209
|
+
* Useful for small operations or when cooperative scheduling isn't needed.
|
|
5210
|
+
*/
|
|
5211
|
+
runSync(tasklet) {
|
|
5212
|
+
let state;
|
|
5213
|
+
let iterations = 0;
|
|
5214
|
+
const startTime = Date.now();
|
|
5215
|
+
do {
|
|
5216
|
+
state = tasklet.call();
|
|
5217
|
+
iterations++;
|
|
5218
|
+
} while (state === "MADE_PROGRESS");
|
|
5219
|
+
if (state === "NO_PROGRESS") {
|
|
5220
|
+
throw new Error(`Tasklet ${tasklet.name} made no progress`);
|
|
5221
|
+
}
|
|
5222
|
+
if (this.config.metricsEnabled) {
|
|
5223
|
+
this.totalIterations += iterations;
|
|
5224
|
+
this.totalExecutionTimeMs += Date.now() - startTime;
|
|
5225
|
+
if (iterations === 1) {
|
|
5226
|
+
this.singleIterationCompletions++;
|
|
5227
|
+
}
|
|
5228
|
+
}
|
|
5229
|
+
return tasklet.getResult();
|
|
5230
|
+
}
|
|
5231
|
+
/**
|
|
5232
|
+
* Cancel a specific tasklet by name pattern.
|
|
5233
|
+
* Returns the number of tasklets cancelled.
|
|
5234
|
+
*/
|
|
5235
|
+
cancel(namePattern) {
|
|
5236
|
+
let cancelled = 0;
|
|
5237
|
+
const pattern = typeof namePattern === "string" ? new RegExp(`^${namePattern}`) : namePattern;
|
|
5238
|
+
for (const [id, state] of this.activeTasklets) {
|
|
5239
|
+
if (pattern.test(state.tasklet.name)) {
|
|
5240
|
+
this.cancelTasklet(id, state);
|
|
5241
|
+
cancelled++;
|
|
5242
|
+
}
|
|
5243
|
+
}
|
|
5244
|
+
return cancelled;
|
|
5245
|
+
}
|
|
5246
|
+
/**
|
|
5247
|
+
* Cancel all running tasklets.
|
|
5248
|
+
*/
|
|
5249
|
+
cancelAll() {
|
|
5250
|
+
let cancelled = 0;
|
|
5251
|
+
for (const [id, state] of this.activeTasklets) {
|
|
5252
|
+
this.cancelTasklet(id, state);
|
|
5253
|
+
cancelled++;
|
|
5254
|
+
}
|
|
5255
|
+
return cancelled;
|
|
5256
|
+
}
|
|
5257
|
+
/**
|
|
5258
|
+
* Get scheduler statistics.
|
|
5259
|
+
*/
|
|
5260
|
+
getStats() {
|
|
5261
|
+
return {
|
|
5262
|
+
totalScheduled: this.totalScheduled,
|
|
5263
|
+
activeTasklets: this.activeTasklets.size,
|
|
5264
|
+
completedTasklets: this.completedTasklets,
|
|
5265
|
+
cancelledTasklets: this.cancelledTasklets,
|
|
5266
|
+
totalIterations: this.totalIterations,
|
|
5267
|
+
avgIterationsPerTasklet: this.completedTasklets > 0 ? this.totalIterations / this.completedTasklets : 0,
|
|
5268
|
+
singleIterationCompletions: this.singleIterationCompletions,
|
|
5269
|
+
totalExecutionTimeMs: this.totalExecutionTimeMs
|
|
5270
|
+
};
|
|
5271
|
+
}
|
|
5272
|
+
/**
|
|
5273
|
+
* Reset statistics.
|
|
3861
5274
|
*/
|
|
3862
|
-
|
|
3863
|
-
|
|
3864
|
-
|
|
3865
|
-
|
|
3866
|
-
|
|
3867
|
-
|
|
3868
|
-
|
|
5275
|
+
resetStats() {
|
|
5276
|
+
this.totalScheduled = 0;
|
|
5277
|
+
this.completedTasklets = 0;
|
|
5278
|
+
this.cancelledTasklets = 0;
|
|
5279
|
+
this.totalIterations = 0;
|
|
5280
|
+
this.singleIterationCompletions = 0;
|
|
5281
|
+
this.totalExecutionTimeMs = 0;
|
|
3869
5282
|
}
|
|
3870
5283
|
/**
|
|
3871
|
-
*
|
|
5284
|
+
* Shutdown the scheduler.
|
|
5285
|
+
* Cancels all running tasklets and stops the tick timer.
|
|
3872
5286
|
*/
|
|
3873
|
-
|
|
3874
|
-
|
|
3875
|
-
|
|
3876
|
-
|
|
5287
|
+
shutdown() {
|
|
5288
|
+
this.isShuttingDown = true;
|
|
5289
|
+
this.cancelAll();
|
|
5290
|
+
this.stopScheduler();
|
|
3877
5291
|
}
|
|
3878
5292
|
/**
|
|
3879
|
-
*
|
|
5293
|
+
* Check if scheduler is running.
|
|
3880
5294
|
*/
|
|
3881
|
-
|
|
3882
|
-
return
|
|
5295
|
+
get running() {
|
|
5296
|
+
return this.isRunning;
|
|
3883
5297
|
}
|
|
3884
5298
|
/**
|
|
3885
|
-
*
|
|
3886
|
-
* Returns the actual status (may differ if timeout occurred).
|
|
5299
|
+
* Get number of active tasklets.
|
|
3887
5300
|
*/
|
|
3888
|
-
|
|
3889
|
-
|
|
3890
|
-
|
|
3891
|
-
|
|
3892
|
-
|
|
3893
|
-
|
|
3894
|
-
|
|
5301
|
+
get activeCount() {
|
|
5302
|
+
return this.activeTasklets.size;
|
|
5303
|
+
}
|
|
5304
|
+
startScheduler() {
|
|
5305
|
+
if (this.isRunning) return;
|
|
5306
|
+
this.isRunning = true;
|
|
5307
|
+
this.scheduleTick();
|
|
5308
|
+
}
|
|
5309
|
+
stopScheduler() {
|
|
5310
|
+
this.isRunning = false;
|
|
5311
|
+
if (this.tickTimer) {
|
|
5312
|
+
clearImmediate(this.tickTimer);
|
|
5313
|
+
this.tickTimer = null;
|
|
5314
|
+
}
|
|
5315
|
+
}
|
|
5316
|
+
scheduleTick() {
|
|
5317
|
+
if (!this.isRunning) return;
|
|
5318
|
+
this.tickTimer = setImmediate(() => {
|
|
5319
|
+
this.tick();
|
|
5320
|
+
});
|
|
5321
|
+
}
|
|
5322
|
+
tick() {
|
|
5323
|
+
if (!this.isRunning || this.activeTasklets.size === 0) {
|
|
5324
|
+
this.stopScheduler();
|
|
5325
|
+
return;
|
|
5326
|
+
}
|
|
5327
|
+
const tickStart = Date.now();
|
|
5328
|
+
const taskletIds = Array.from(this.activeTasklets.keys());
|
|
5329
|
+
for (const id of taskletIds) {
|
|
5330
|
+
const state = this.activeTasklets.get(id);
|
|
5331
|
+
if (!state) continue;
|
|
5332
|
+
try {
|
|
5333
|
+
const iterationStart = Date.now();
|
|
5334
|
+
const result = state.tasklet.call();
|
|
5335
|
+
const iterationTime = Date.now() - iterationStart;
|
|
5336
|
+
state.iterations++;
|
|
5337
|
+
state.lastProgressTime = Date.now();
|
|
5338
|
+
if (this.config.metricsEnabled) {
|
|
5339
|
+
this.totalIterations++;
|
|
5340
|
+
this.totalExecutionTimeMs += iterationTime;
|
|
5341
|
+
}
|
|
5342
|
+
if (result === "DONE") {
|
|
5343
|
+
this.completeTasklet(id, state);
|
|
5344
|
+
} else if (result === "NO_PROGRESS") {
|
|
5345
|
+
}
|
|
5346
|
+
} catch (error) {
|
|
5347
|
+
this.failTasklet(id, state, error);
|
|
3895
5348
|
}
|
|
3896
|
-
|
|
3897
|
-
|
|
3898
|
-
Atomics.wait(
|
|
3899
|
-
this.statusArray,
|
|
3900
|
-
statusOffset,
|
|
3901
|
-
status,
|
|
3902
|
-
Math.min(remaining, 100)
|
|
3903
|
-
);
|
|
5349
|
+
if (Date.now() - tickStart > this.config.defaultTimeBudgetMs * 2) {
|
|
5350
|
+
break;
|
|
3904
5351
|
}
|
|
3905
5352
|
}
|
|
3906
|
-
|
|
5353
|
+
if (this.activeTasklets.size > 0) {
|
|
5354
|
+
this.scheduleTick();
|
|
5355
|
+
} else {
|
|
5356
|
+
this.stopScheduler();
|
|
5357
|
+
}
|
|
3907
5358
|
}
|
|
3908
|
-
|
|
3909
|
-
|
|
3910
|
-
|
|
3911
|
-
|
|
3912
|
-
|
|
3913
|
-
const status = this.waitForStatus(
|
|
3914
|
-
slot.index,
|
|
3915
|
-
4 /* RESULT_READY */,
|
|
3916
|
-
timeoutMs
|
|
3917
|
-
);
|
|
3918
|
-
if (status === 4 /* RESULT_READY */) {
|
|
3919
|
-
const length = this.getDataLength(slot.index);
|
|
3920
|
-
const result = new Uint8Array(length);
|
|
3921
|
-
result.set(slot.dataView.subarray(0, length));
|
|
3922
|
-
return result;
|
|
5359
|
+
completeTasklet(id, state) {
|
|
5360
|
+
this.activeTasklets.delete(id);
|
|
5361
|
+
this.completedTasklets++;
|
|
5362
|
+
if (state.iterations === 1) {
|
|
5363
|
+
this.singleIterationCompletions++;
|
|
3923
5364
|
}
|
|
3924
|
-
|
|
5365
|
+
try {
|
|
5366
|
+
const result = state.tasklet.getResult();
|
|
5367
|
+
state.resolve(result);
|
|
5368
|
+
} catch (error) {
|
|
5369
|
+
state.reject(error);
|
|
5370
|
+
}
|
|
5371
|
+
}
|
|
5372
|
+
failTasklet(id, state, error) {
|
|
5373
|
+
this.activeTasklets.delete(id);
|
|
5374
|
+
state.reject(error);
|
|
5375
|
+
}
|
|
5376
|
+
cancelTasklet(id, state) {
|
|
5377
|
+
this.activeTasklets.delete(id);
|
|
5378
|
+
this.cancelledTasklets++;
|
|
5379
|
+
if (state.tasklet.onCancel) {
|
|
5380
|
+
try {
|
|
5381
|
+
state.tasklet.onCancel();
|
|
5382
|
+
} catch {
|
|
5383
|
+
}
|
|
5384
|
+
}
|
|
5385
|
+
state.reject(new Error(`Tasklet ${state.tasklet.name} was cancelled`));
|
|
5386
|
+
}
|
|
5387
|
+
};
|
|
5388
|
+
|
|
5389
|
+
// src/tasklet/tasklets/IteratorTasklet.ts
|
|
5390
|
+
var DEFAULT_CONFIG5 = {
|
|
5391
|
+
timeBudgetMs: 5,
|
|
5392
|
+
maxItemsPerIteration: 1e3
|
|
5393
|
+
};
|
|
5394
|
+
var IteratorTasklet = class {
|
|
5395
|
+
constructor(iterator, config) {
|
|
5396
|
+
this.itemsProcessed = 0;
|
|
5397
|
+
this.isDone = false;
|
|
5398
|
+
this.iterator = iterator;
|
|
5399
|
+
this.config = { ...DEFAULT_CONFIG5, ...config };
|
|
3925
5400
|
}
|
|
3926
5401
|
/**
|
|
3927
|
-
*
|
|
5402
|
+
* Execute one chunk of iteration.
|
|
3928
5403
|
*/
|
|
3929
|
-
|
|
3930
|
-
|
|
5404
|
+
call() {
|
|
5405
|
+
if (this.isDone) {
|
|
5406
|
+
return "DONE";
|
|
5407
|
+
}
|
|
5408
|
+
const deadline = Date.now() + this.config.timeBudgetMs;
|
|
5409
|
+
let processedThisIteration = 0;
|
|
5410
|
+
while (Date.now() < deadline && processedThisIteration < this.config.maxItemsPerIteration) {
|
|
5411
|
+
const { value, done } = this.iterator.next();
|
|
5412
|
+
if (done) {
|
|
5413
|
+
this.isDone = true;
|
|
5414
|
+
return "DONE";
|
|
5415
|
+
}
|
|
5416
|
+
this.processItem(value);
|
|
5417
|
+
this.itemsProcessed++;
|
|
5418
|
+
processedThisIteration++;
|
|
5419
|
+
}
|
|
5420
|
+
return "MADE_PROGRESS";
|
|
3931
5421
|
}
|
|
3932
5422
|
/**
|
|
3933
|
-
*
|
|
5423
|
+
* Called when tasklet is cancelled.
|
|
3934
5424
|
*/
|
|
3935
|
-
|
|
3936
|
-
return {
|
|
3937
|
-
sharedBuffer: this.buffer,
|
|
3938
|
-
slotSize: this.slotSize,
|
|
3939
|
-
slotCount: this.slotCount,
|
|
3940
|
-
metadataSize: this.metadataSize
|
|
3941
|
-
};
|
|
5425
|
+
onCancel() {
|
|
3942
5426
|
}
|
|
3943
5427
|
/**
|
|
3944
|
-
* Get
|
|
5428
|
+
* Get number of items processed so far.
|
|
3945
5429
|
*/
|
|
3946
|
-
|
|
3947
|
-
return
|
|
3948
|
-
|
|
3949
|
-
|
|
3950
|
-
|
|
3951
|
-
|
|
3952
|
-
|
|
3953
|
-
|
|
3954
|
-
|
|
3955
|
-
|
|
3956
|
-
|
|
5430
|
+
get processed() {
|
|
5431
|
+
return this.itemsProcessed;
|
|
5432
|
+
}
|
|
5433
|
+
};
|
|
5434
|
+
var FilterTasklet = class extends IteratorTasklet {
|
|
5435
|
+
constructor(name, iterator, predicate, config) {
|
|
5436
|
+
super(iterator, config);
|
|
5437
|
+
this.results = [];
|
|
5438
|
+
this.name = name;
|
|
5439
|
+
this.predicate = predicate;
|
|
5440
|
+
}
|
|
5441
|
+
processItem(item) {
|
|
5442
|
+
if (this.predicate(item)) {
|
|
5443
|
+
this.results.push(item);
|
|
5444
|
+
}
|
|
5445
|
+
}
|
|
5446
|
+
getResult() {
|
|
5447
|
+
return this.results;
|
|
5448
|
+
}
|
|
5449
|
+
};
|
|
5450
|
+
var MapTasklet = class extends IteratorTasklet {
|
|
5451
|
+
constructor(name, iterator, mapper, config) {
|
|
5452
|
+
super(iterator, config);
|
|
5453
|
+
this.results = [];
|
|
5454
|
+
this.name = name;
|
|
5455
|
+
this.mapper = mapper;
|
|
5456
|
+
}
|
|
5457
|
+
processItem(item) {
|
|
5458
|
+
this.results.push(this.mapper(item));
|
|
5459
|
+
}
|
|
5460
|
+
getResult() {
|
|
5461
|
+
return this.results;
|
|
5462
|
+
}
|
|
5463
|
+
};
|
|
5464
|
+
var ForEachTasklet = class extends IteratorTasklet {
|
|
5465
|
+
constructor(name, iterator, action, config) {
|
|
5466
|
+
super(iterator, config);
|
|
5467
|
+
this.name = name;
|
|
5468
|
+
this.action = action;
|
|
5469
|
+
}
|
|
5470
|
+
processItem(item) {
|
|
5471
|
+
this.action(item);
|
|
5472
|
+
}
|
|
5473
|
+
getResult() {
|
|
5474
|
+
return this.itemsProcessed;
|
|
3957
5475
|
}
|
|
3958
|
-
|
|
3959
|
-
|
|
3960
|
-
|
|
3961
|
-
|
|
3962
|
-
|
|
3963
|
-
|
|
3964
|
-
|
|
3965
|
-
} catch {
|
|
3966
|
-
return false;
|
|
3967
|
-
}
|
|
5476
|
+
};
|
|
5477
|
+
var ReduceTasklet = class extends IteratorTasklet {
|
|
5478
|
+
constructor(name, iterator, initialValue, reducer, config) {
|
|
5479
|
+
super(iterator, config);
|
|
5480
|
+
this.name = name;
|
|
5481
|
+
this.accumulator = initialValue;
|
|
5482
|
+
this.reducer = reducer;
|
|
3968
5483
|
}
|
|
3969
|
-
|
|
3970
|
-
|
|
3971
|
-
|
|
3972
|
-
|
|
3973
|
-
|
|
3974
|
-
for (let i = 0; i < this.slotCount; i++) {
|
|
3975
|
-
Atomics.store(this.statusArray, this.getStatusOffset(i), 0 /* FREE */);
|
|
3976
|
-
}
|
|
3977
|
-
this.freeSlots.clear();
|
|
3978
|
-
for (let i = 0; i < this.slotCount; i++) {
|
|
3979
|
-
this.freeSlots.add(i);
|
|
3980
|
-
}
|
|
3981
|
-
this.allocatedCount = 0;
|
|
5484
|
+
processItem(item) {
|
|
5485
|
+
this.accumulator = this.reducer(this.accumulator, item);
|
|
5486
|
+
}
|
|
5487
|
+
getResult() {
|
|
5488
|
+
return this.accumulator;
|
|
3982
5489
|
}
|
|
3983
5490
|
};
|
|
3984
5491
|
|
|
3985
|
-
// src/
|
|
3986
|
-
|
|
3987
|
-
|
|
3988
|
-
|
|
3989
|
-
|
|
3990
|
-
|
|
3991
|
-
|
|
3992
|
-
|
|
5492
|
+
// src/ack/WriteAckManager.ts
|
|
5493
|
+
import { EventEmitter as EventEmitter7 } from "events";
|
|
5494
|
+
import {
|
|
5495
|
+
WriteConcern,
|
|
5496
|
+
DEFAULT_WRITE_CONCERN_TIMEOUT,
|
|
5497
|
+
isWriteConcernAchieved,
|
|
5498
|
+
getHighestWriteConcernLevel
|
|
5499
|
+
} from "@topgunbuild/core";
|
|
5500
|
+
var WriteAckManager = class extends EventEmitter7 {
|
|
3993
5501
|
constructor(config) {
|
|
3994
|
-
|
|
3995
|
-
this.
|
|
3996
|
-
this.
|
|
3997
|
-
this.isShuttingDown = false;
|
|
3998
|
-
// Metrics
|
|
3999
|
-
this.totalScheduled = 0;
|
|
4000
|
-
this.completedTasklets = 0;
|
|
4001
|
-
this.cancelledTasklets = 0;
|
|
4002
|
-
this.totalIterations = 0;
|
|
4003
|
-
this.singleIterationCompletions = 0;
|
|
4004
|
-
this.totalExecutionTimeMs = 0;
|
|
4005
|
-
this.config = { ...DEFAULT_CONFIG4, ...config };
|
|
5502
|
+
super();
|
|
5503
|
+
this.pending = /* @__PURE__ */ new Map();
|
|
5504
|
+
this.defaultTimeout = config?.defaultTimeout ?? DEFAULT_WRITE_CONCERN_TIMEOUT;
|
|
4006
5505
|
}
|
|
4007
5506
|
/**
|
|
4008
|
-
*
|
|
4009
|
-
* Returns a promise that resolves when
|
|
5507
|
+
* Register a pending write operation.
|
|
5508
|
+
* Returns a promise that resolves when target Write Concern is achieved.
|
|
5509
|
+
*
|
|
5510
|
+
* @param opId - Operation ID
|
|
5511
|
+
* @param writeConcern - Target Write Concern level
|
|
5512
|
+
* @param timeout - Optional timeout in ms (defaults to config or 5000ms)
|
|
5513
|
+
* @returns Promise that resolves with WriteResult
|
|
4010
5514
|
*/
|
|
4011
|
-
|
|
4012
|
-
if (
|
|
4013
|
-
return Promise.
|
|
5515
|
+
registerPending(opId, writeConcern, timeout) {
|
|
5516
|
+
if (writeConcern === WriteConcern.FIRE_AND_FORGET) {
|
|
5517
|
+
return Promise.resolve({
|
|
5518
|
+
success: true,
|
|
5519
|
+
opId,
|
|
5520
|
+
achievedLevel: WriteConcern.FIRE_AND_FORGET,
|
|
5521
|
+
latencyMs: 0
|
|
5522
|
+
});
|
|
4014
5523
|
}
|
|
4015
5524
|
return new Promise((resolve, reject) => {
|
|
4016
|
-
const
|
|
4017
|
-
|
|
4018
|
-
|
|
4019
|
-
|
|
4020
|
-
|
|
4021
|
-
|
|
4022
|
-
|
|
5525
|
+
const effectiveTimeout = timeout ?? this.defaultTimeout;
|
|
5526
|
+
const timestamp = Date.now();
|
|
5527
|
+
const pendingWrite = {
|
|
5528
|
+
opId,
|
|
5529
|
+
writeConcern,
|
|
5530
|
+
timestamp,
|
|
5531
|
+
timeout: effectiveTimeout,
|
|
4023
5532
|
resolve,
|
|
4024
5533
|
reject,
|
|
4025
|
-
|
|
4026
|
-
startTime: Date.now(),
|
|
4027
|
-
lastProgressTime: Date.now()
|
|
5534
|
+
achievedLevels: /* @__PURE__ */ new Set([WriteConcern.FIRE_AND_FORGET])
|
|
4028
5535
|
};
|
|
4029
|
-
|
|
4030
|
-
|
|
4031
|
-
|
|
4032
|
-
|
|
5536
|
+
pendingWrite.timeoutHandle = setTimeout(() => {
|
|
5537
|
+
this.handleTimeout(opId);
|
|
5538
|
+
}, effectiveTimeout);
|
|
5539
|
+
this.pending.set(opId, pendingWrite);
|
|
5540
|
+
logger.debug(
|
|
5541
|
+
{ opId, writeConcern, timeout: effectiveTimeout },
|
|
5542
|
+
"Registered pending write"
|
|
5543
|
+
);
|
|
5544
|
+
if (writeConcern === WriteConcern.MEMORY) {
|
|
5545
|
+
this.notifyLevel(opId, WriteConcern.MEMORY);
|
|
4033
5546
|
}
|
|
4034
5547
|
});
|
|
4035
5548
|
}
|
|
4036
5549
|
/**
|
|
4037
|
-
*
|
|
4038
|
-
*
|
|
5550
|
+
* Notify that a Write Concern level has been achieved for an operation.
|
|
5551
|
+
*
|
|
5552
|
+
* @param opId - Operation ID
|
|
5553
|
+
* @param level - Write Concern level that was achieved
|
|
4039
5554
|
*/
|
|
4040
|
-
|
|
4041
|
-
|
|
4042
|
-
|
|
4043
|
-
|
|
4044
|
-
do {
|
|
4045
|
-
state = tasklet.call();
|
|
4046
|
-
iterations++;
|
|
4047
|
-
} while (state === "MADE_PROGRESS");
|
|
4048
|
-
if (state === "NO_PROGRESS") {
|
|
4049
|
-
throw new Error(`Tasklet ${tasklet.name} made no progress`);
|
|
5555
|
+
notifyLevel(opId, level) {
|
|
5556
|
+
const pending = this.pending.get(opId);
|
|
5557
|
+
if (!pending) {
|
|
5558
|
+
return;
|
|
4050
5559
|
}
|
|
4051
|
-
|
|
4052
|
-
|
|
4053
|
-
|
|
4054
|
-
|
|
4055
|
-
|
|
4056
|
-
|
|
5560
|
+
pending.achievedLevels.add(level);
|
|
5561
|
+
logger.debug(
|
|
5562
|
+
{ opId, level, target: pending.writeConcern },
|
|
5563
|
+
"Write Concern level achieved"
|
|
5564
|
+
);
|
|
5565
|
+
if (isWriteConcernAchieved(pending.achievedLevels, pending.writeConcern)) {
|
|
5566
|
+
this.resolvePending(opId, level);
|
|
4057
5567
|
}
|
|
4058
|
-
return tasklet.getResult();
|
|
4059
5568
|
}
|
|
4060
5569
|
/**
|
|
4061
|
-
*
|
|
4062
|
-
*
|
|
5570
|
+
* Notify multiple operations that a Write Concern level has been achieved.
|
|
5571
|
+
* Useful for batch operations.
|
|
5572
|
+
*
|
|
5573
|
+
* @param opIds - Array of operation IDs
|
|
5574
|
+
* @param level - Write Concern level that was achieved
|
|
4063
5575
|
*/
|
|
4064
|
-
|
|
4065
|
-
|
|
4066
|
-
|
|
4067
|
-
for (const [id, state] of this.activeTasklets) {
|
|
4068
|
-
if (pattern.test(state.tasklet.name)) {
|
|
4069
|
-
this.cancelTasklet(id, state);
|
|
4070
|
-
cancelled++;
|
|
4071
|
-
}
|
|
5576
|
+
notifyLevelBatch(opIds, level) {
|
|
5577
|
+
for (const opId of opIds) {
|
|
5578
|
+
this.notifyLevel(opId, level);
|
|
4072
5579
|
}
|
|
4073
|
-
return cancelled;
|
|
4074
5580
|
}
|
|
4075
5581
|
/**
|
|
4076
|
-
*
|
|
5582
|
+
* Check if an operation is still pending.
|
|
5583
|
+
*
|
|
5584
|
+
* @param opId - Operation ID
|
|
5585
|
+
* @returns true if operation is pending
|
|
4077
5586
|
*/
|
|
4078
|
-
|
|
4079
|
-
|
|
4080
|
-
|
|
4081
|
-
|
|
4082
|
-
|
|
5587
|
+
isPending(opId) {
|
|
5588
|
+
return this.pending.has(opId);
|
|
5589
|
+
}
|
|
5590
|
+
/**
|
|
5591
|
+
* Get the target Write Concern level for a pending operation.
|
|
5592
|
+
*
|
|
5593
|
+
* @param opId - Operation ID
|
|
5594
|
+
* @returns Target Write Concern level or undefined if not pending
|
|
5595
|
+
*/
|
|
5596
|
+
getTargetLevel(opId) {
|
|
5597
|
+
return this.pending.get(opId)?.writeConcern;
|
|
5598
|
+
}
|
|
5599
|
+
/**
|
|
5600
|
+
* Get the highest achieved level for a pending operation.
|
|
5601
|
+
*
|
|
5602
|
+
* @param opId - Operation ID
|
|
5603
|
+
* @returns Highest achieved level or undefined if not pending
|
|
5604
|
+
*/
|
|
5605
|
+
getAchievedLevel(opId) {
|
|
5606
|
+
const pending = this.pending.get(opId);
|
|
5607
|
+
if (!pending) return void 0;
|
|
5608
|
+
return getHighestWriteConcernLevel(pending.achievedLevels);
|
|
5609
|
+
}
|
|
5610
|
+
/**
|
|
5611
|
+
* Resolve a pending write with success.
|
|
5612
|
+
*/
|
|
5613
|
+
resolvePending(opId, achievedLevel) {
|
|
5614
|
+
const pending = this.pending.get(opId);
|
|
5615
|
+
if (!pending) return;
|
|
5616
|
+
if (pending.timeoutHandle) {
|
|
5617
|
+
clearTimeout(pending.timeoutHandle);
|
|
4083
5618
|
}
|
|
4084
|
-
|
|
5619
|
+
const latencyMs = Date.now() - pending.timestamp;
|
|
5620
|
+
const result = {
|
|
5621
|
+
success: true,
|
|
5622
|
+
opId,
|
|
5623
|
+
achievedLevel,
|
|
5624
|
+
latencyMs
|
|
5625
|
+
};
|
|
5626
|
+
pending.resolve(result);
|
|
5627
|
+
this.pending.delete(opId);
|
|
5628
|
+
logger.debug(
|
|
5629
|
+
{ opId, achievedLevel, latencyMs },
|
|
5630
|
+
"Write resolved successfully"
|
|
5631
|
+
);
|
|
5632
|
+
this.emit("resolved", result);
|
|
4085
5633
|
}
|
|
4086
5634
|
/**
|
|
4087
|
-
*
|
|
5635
|
+
* Handle timeout for a pending write.
|
|
4088
5636
|
*/
|
|
4089
|
-
|
|
4090
|
-
|
|
4091
|
-
|
|
4092
|
-
|
|
4093
|
-
|
|
4094
|
-
|
|
4095
|
-
|
|
4096
|
-
|
|
4097
|
-
|
|
4098
|
-
|
|
5637
|
+
handleTimeout(opId) {
|
|
5638
|
+
const pending = this.pending.get(opId);
|
|
5639
|
+
if (!pending) return;
|
|
5640
|
+
const highestAchieved = getHighestWriteConcernLevel(pending.achievedLevels);
|
|
5641
|
+
const latencyMs = Date.now() - pending.timestamp;
|
|
5642
|
+
const result = {
|
|
5643
|
+
success: false,
|
|
5644
|
+
opId,
|
|
5645
|
+
achievedLevel: highestAchieved,
|
|
5646
|
+
latencyMs,
|
|
5647
|
+
error: `Timeout: achieved ${highestAchieved}, requested ${pending.writeConcern}`
|
|
4099
5648
|
};
|
|
5649
|
+
pending.resolve(result);
|
|
5650
|
+
this.pending.delete(opId);
|
|
5651
|
+
logger.warn(
|
|
5652
|
+
{ opId, requested: pending.writeConcern, achieved: highestAchieved, latencyMs },
|
|
5653
|
+
"Write timed out"
|
|
5654
|
+
);
|
|
5655
|
+
this.emit("timeout", {
|
|
5656
|
+
opId,
|
|
5657
|
+
requested: pending.writeConcern,
|
|
5658
|
+
achieved: highestAchieved,
|
|
5659
|
+
latencyMs
|
|
5660
|
+
});
|
|
4100
5661
|
}
|
|
4101
5662
|
/**
|
|
4102
|
-
*
|
|
5663
|
+
* Fail a pending write with an error.
|
|
5664
|
+
*
|
|
5665
|
+
* @param opId - Operation ID
|
|
5666
|
+
* @param error - Error message
|
|
4103
5667
|
*/
|
|
4104
|
-
|
|
4105
|
-
|
|
4106
|
-
|
|
4107
|
-
|
|
4108
|
-
|
|
4109
|
-
|
|
4110
|
-
|
|
5668
|
+
failPending(opId, error) {
|
|
5669
|
+
const pending = this.pending.get(opId);
|
|
5670
|
+
if (!pending) return;
|
|
5671
|
+
if (pending.timeoutHandle) {
|
|
5672
|
+
clearTimeout(pending.timeoutHandle);
|
|
5673
|
+
}
|
|
5674
|
+
const latencyMs = Date.now() - pending.timestamp;
|
|
5675
|
+
const highestAchieved = getHighestWriteConcernLevel(pending.achievedLevels);
|
|
5676
|
+
const result = {
|
|
5677
|
+
success: false,
|
|
5678
|
+
opId,
|
|
5679
|
+
achievedLevel: highestAchieved,
|
|
5680
|
+
latencyMs,
|
|
5681
|
+
error
|
|
5682
|
+
};
|
|
5683
|
+
pending.resolve(result);
|
|
5684
|
+
this.pending.delete(opId);
|
|
5685
|
+
logger.error({ opId, error, latencyMs }, "Write failed");
|
|
5686
|
+
this.emit("failed", result);
|
|
4111
5687
|
}
|
|
4112
5688
|
/**
|
|
4113
|
-
*
|
|
4114
|
-
* Cancels all running tasklets and stops the tick timer.
|
|
5689
|
+
* Get pending writes statistics.
|
|
4115
5690
|
*/
|
|
4116
|
-
|
|
4117
|
-
|
|
4118
|
-
|
|
4119
|
-
|
|
5691
|
+
getStats() {
|
|
5692
|
+
const byLevel = {
|
|
5693
|
+
[WriteConcern.FIRE_AND_FORGET]: 0,
|
|
5694
|
+
[WriteConcern.MEMORY]: 0,
|
|
5695
|
+
[WriteConcern.APPLIED]: 0,
|
|
5696
|
+
[WriteConcern.REPLICATED]: 0,
|
|
5697
|
+
[WriteConcern.PERSISTED]: 0
|
|
5698
|
+
};
|
|
5699
|
+
for (const pending of this.pending.values()) {
|
|
5700
|
+
byLevel[pending.writeConcern]++;
|
|
5701
|
+
}
|
|
5702
|
+
return { pending: this.pending.size, byLevel };
|
|
4120
5703
|
}
|
|
4121
5704
|
/**
|
|
4122
|
-
*
|
|
5705
|
+
* Get all pending operation IDs.
|
|
4123
5706
|
*/
|
|
4124
|
-
|
|
4125
|
-
return this.
|
|
5707
|
+
getPendingIds() {
|
|
5708
|
+
return Array.from(this.pending.keys());
|
|
4126
5709
|
}
|
|
4127
5710
|
/**
|
|
4128
|
-
*
|
|
5711
|
+
* Clear all pending writes (for shutdown).
|
|
5712
|
+
* Rejects all pending promises with an error.
|
|
4129
5713
|
*/
|
|
4130
|
-
|
|
4131
|
-
|
|
4132
|
-
|
|
4133
|
-
|
|
4134
|
-
|
|
4135
|
-
this.isRunning = true;
|
|
4136
|
-
this.scheduleTick();
|
|
4137
|
-
}
|
|
4138
|
-
stopScheduler() {
|
|
4139
|
-
this.isRunning = false;
|
|
4140
|
-
if (this.tickTimer) {
|
|
4141
|
-
clearImmediate(this.tickTimer);
|
|
4142
|
-
this.tickTimer = null;
|
|
4143
|
-
}
|
|
4144
|
-
}
|
|
4145
|
-
scheduleTick() {
|
|
4146
|
-
if (!this.isRunning) return;
|
|
4147
|
-
this.tickTimer = setImmediate(() => {
|
|
4148
|
-
this.tick();
|
|
4149
|
-
});
|
|
4150
|
-
}
|
|
4151
|
-
tick() {
|
|
4152
|
-
if (!this.isRunning || this.activeTasklets.size === 0) {
|
|
4153
|
-
this.stopScheduler();
|
|
4154
|
-
return;
|
|
4155
|
-
}
|
|
4156
|
-
const tickStart = Date.now();
|
|
4157
|
-
const taskletIds = Array.from(this.activeTasklets.keys());
|
|
4158
|
-
for (const id of taskletIds) {
|
|
4159
|
-
const state = this.activeTasklets.get(id);
|
|
4160
|
-
if (!state) continue;
|
|
4161
|
-
try {
|
|
4162
|
-
const iterationStart = Date.now();
|
|
4163
|
-
const result = state.tasklet.call();
|
|
4164
|
-
const iterationTime = Date.now() - iterationStart;
|
|
4165
|
-
state.iterations++;
|
|
4166
|
-
state.lastProgressTime = Date.now();
|
|
4167
|
-
if (this.config.metricsEnabled) {
|
|
4168
|
-
this.totalIterations++;
|
|
4169
|
-
this.totalExecutionTimeMs += iterationTime;
|
|
4170
|
-
}
|
|
4171
|
-
if (result === "DONE") {
|
|
4172
|
-
this.completeTasklet(id, state);
|
|
4173
|
-
} else if (result === "NO_PROGRESS") {
|
|
4174
|
-
}
|
|
4175
|
-
} catch (error) {
|
|
4176
|
-
this.failTasklet(id, state, error);
|
|
4177
|
-
}
|
|
4178
|
-
if (Date.now() - tickStart > this.config.defaultTimeBudgetMs * 2) {
|
|
4179
|
-
break;
|
|
5714
|
+
clear() {
|
|
5715
|
+
const count = this.pending.size;
|
|
5716
|
+
for (const pending of this.pending.values()) {
|
|
5717
|
+
if (pending.timeoutHandle) {
|
|
5718
|
+
clearTimeout(pending.timeoutHandle);
|
|
4180
5719
|
}
|
|
5720
|
+
pending.reject(new Error("WriteAckManager cleared"));
|
|
4181
5721
|
}
|
|
4182
|
-
|
|
4183
|
-
|
|
4184
|
-
|
|
4185
|
-
this.stopScheduler();
|
|
4186
|
-
}
|
|
4187
|
-
}
|
|
4188
|
-
completeTasklet(id, state) {
|
|
4189
|
-
this.activeTasklets.delete(id);
|
|
4190
|
-
this.completedTasklets++;
|
|
4191
|
-
if (state.iterations === 1) {
|
|
4192
|
-
this.singleIterationCompletions++;
|
|
4193
|
-
}
|
|
4194
|
-
try {
|
|
4195
|
-
const result = state.tasklet.getResult();
|
|
4196
|
-
state.resolve(result);
|
|
4197
|
-
} catch (error) {
|
|
4198
|
-
state.reject(error);
|
|
5722
|
+
this.pending.clear();
|
|
5723
|
+
if (count > 0) {
|
|
5724
|
+
logger.info({ count }, "WriteAckManager cleared");
|
|
4199
5725
|
}
|
|
4200
5726
|
}
|
|
4201
|
-
|
|
4202
|
-
|
|
4203
|
-
|
|
4204
|
-
|
|
4205
|
-
|
|
4206
|
-
this.
|
|
4207
|
-
|
|
4208
|
-
|
|
4209
|
-
try {
|
|
4210
|
-
state.tasklet.onCancel();
|
|
4211
|
-
} catch {
|
|
5727
|
+
/**
|
|
5728
|
+
* Graceful shutdown - resolves all pending writes with their current achieved level.
|
|
5729
|
+
*/
|
|
5730
|
+
shutdown() {
|
|
5731
|
+
const count = this.pending.size;
|
|
5732
|
+
for (const [opId, pending] of this.pending.entries()) {
|
|
5733
|
+
if (pending.timeoutHandle) {
|
|
5734
|
+
clearTimeout(pending.timeoutHandle);
|
|
4212
5735
|
}
|
|
5736
|
+
const highestAchieved = getHighestWriteConcernLevel(pending.achievedLevels);
|
|
5737
|
+
const latencyMs = Date.now() - pending.timestamp;
|
|
5738
|
+
const result = {
|
|
5739
|
+
success: highestAchieved === pending.writeConcern,
|
|
5740
|
+
opId,
|
|
5741
|
+
achievedLevel: highestAchieved,
|
|
5742
|
+
latencyMs,
|
|
5743
|
+
error: highestAchieved !== pending.writeConcern ? `Shutdown: achieved ${highestAchieved}, requested ${pending.writeConcern}` : void 0
|
|
5744
|
+
};
|
|
5745
|
+
pending.resolve(result);
|
|
5746
|
+
}
|
|
5747
|
+
this.pending.clear();
|
|
5748
|
+
if (count > 0) {
|
|
5749
|
+
logger.info({ count }, "WriteAckManager shutdown");
|
|
4213
5750
|
}
|
|
4214
|
-
state.reject(new Error(`Tasklet ${state.tasklet.name} was cancelled`));
|
|
4215
5751
|
}
|
|
4216
5752
|
};
|
|
4217
5753
|
|
|
4218
|
-
// src/
|
|
4219
|
-
|
|
4220
|
-
|
|
4221
|
-
|
|
5754
|
+
// src/cluster/ReplicationPipeline.ts
|
|
5755
|
+
import { EventEmitter as EventEmitter8 } from "events";
|
|
5756
|
+
import {
|
|
5757
|
+
ConsistencyLevel,
|
|
5758
|
+
DEFAULT_REPLICATION_CONFIG
|
|
5759
|
+
} from "@topgunbuild/core";
|
|
5760
|
+
|
|
5761
|
+
// src/cluster/LagTracker.ts
|
|
5762
|
+
var DEFAULT_LAG_TRACKER_CONFIG = {
|
|
5763
|
+
historySize: 100,
|
|
5764
|
+
laggyThresholdMs: 5e3,
|
|
5765
|
+
unhealthyThresholdMs: 3e4
|
|
4222
5766
|
};
|
|
4223
|
-
var
|
|
4224
|
-
constructor(
|
|
4225
|
-
this.
|
|
4226
|
-
this.
|
|
4227
|
-
|
|
4228
|
-
|
|
5767
|
+
var LagTracker = class {
|
|
5768
|
+
constructor(config = {}) {
|
|
5769
|
+
this.lagByNode = /* @__PURE__ */ new Map();
|
|
5770
|
+
this.config = {
|
|
5771
|
+
...DEFAULT_LAG_TRACKER_CONFIG,
|
|
5772
|
+
...config
|
|
5773
|
+
};
|
|
4229
5774
|
}
|
|
4230
5775
|
/**
|
|
4231
|
-
*
|
|
5776
|
+
* Update lag measurement for a node
|
|
4232
5777
|
*/
|
|
4233
|
-
|
|
4234
|
-
|
|
4235
|
-
|
|
5778
|
+
update(nodeId, lagMs) {
|
|
5779
|
+
let info = this.lagByNode.get(nodeId);
|
|
5780
|
+
if (!info) {
|
|
5781
|
+
info = {
|
|
5782
|
+
current: 0,
|
|
5783
|
+
history: [],
|
|
5784
|
+
lastUpdate: Date.now(),
|
|
5785
|
+
pendingOps: 0
|
|
5786
|
+
};
|
|
5787
|
+
this.lagByNode.set(nodeId, info);
|
|
4236
5788
|
}
|
|
4237
|
-
|
|
4238
|
-
|
|
4239
|
-
|
|
4240
|
-
|
|
4241
|
-
if (done) {
|
|
4242
|
-
this.isDone = true;
|
|
4243
|
-
return "DONE";
|
|
4244
|
-
}
|
|
4245
|
-
this.processItem(value);
|
|
4246
|
-
this.itemsProcessed++;
|
|
4247
|
-
processedThisIteration++;
|
|
5789
|
+
info.current = lagMs;
|
|
5790
|
+
info.history.push(lagMs);
|
|
5791
|
+
if (info.history.length > this.config.historySize) {
|
|
5792
|
+
info.history.shift();
|
|
4248
5793
|
}
|
|
4249
|
-
|
|
5794
|
+
info.lastUpdate = Date.now();
|
|
4250
5795
|
}
|
|
4251
5796
|
/**
|
|
4252
|
-
*
|
|
5797
|
+
* Record acknowledgment from a node (lag effectively becomes 0)
|
|
4253
5798
|
*/
|
|
4254
|
-
|
|
5799
|
+
recordAck(nodeId) {
|
|
5800
|
+
const info = this.lagByNode.get(nodeId);
|
|
5801
|
+
if (info) {
|
|
5802
|
+
info.current = 0;
|
|
5803
|
+
info.lastUpdate = Date.now();
|
|
5804
|
+
if (info.pendingOps > 0) {
|
|
5805
|
+
info.pendingOps--;
|
|
5806
|
+
}
|
|
5807
|
+
}
|
|
4255
5808
|
}
|
|
4256
5809
|
/**
|
|
4257
|
-
*
|
|
5810
|
+
* Increment pending operations counter for a node
|
|
4258
5811
|
*/
|
|
4259
|
-
|
|
4260
|
-
|
|
4261
|
-
|
|
4262
|
-
|
|
4263
|
-
|
|
4264
|
-
|
|
4265
|
-
|
|
4266
|
-
|
|
4267
|
-
|
|
4268
|
-
|
|
4269
|
-
}
|
|
4270
|
-
processItem(item) {
|
|
4271
|
-
if (this.predicate(item)) {
|
|
4272
|
-
this.results.push(item);
|
|
5812
|
+
incrementPending(nodeId) {
|
|
5813
|
+
let info = this.lagByNode.get(nodeId);
|
|
5814
|
+
if (!info) {
|
|
5815
|
+
info = {
|
|
5816
|
+
current: 0,
|
|
5817
|
+
history: [],
|
|
5818
|
+
lastUpdate: Date.now(),
|
|
5819
|
+
pendingOps: 0
|
|
5820
|
+
};
|
|
5821
|
+
this.lagByNode.set(nodeId, info);
|
|
4273
5822
|
}
|
|
5823
|
+
info.pendingOps++;
|
|
4274
5824
|
}
|
|
4275
|
-
|
|
4276
|
-
|
|
5825
|
+
/**
|
|
5826
|
+
* Get lag statistics for a specific node
|
|
5827
|
+
*/
|
|
5828
|
+
getLag(nodeId) {
|
|
5829
|
+
const info = this.lagByNode.get(nodeId);
|
|
5830
|
+
if (!info || info.history.length === 0) {
|
|
5831
|
+
return { current: 0, avg: 0, max: 0, percentile99: 0 };
|
|
5832
|
+
}
|
|
5833
|
+
const sorted = [...info.history].sort((a, b) => a - b);
|
|
5834
|
+
const avg = sorted.reduce((a, b) => a + b, 0) / sorted.length;
|
|
5835
|
+
const max = sorted[sorted.length - 1] || 0;
|
|
5836
|
+
const p99Index = Math.floor(sorted.length * 0.99);
|
|
5837
|
+
const percentile99 = sorted[p99Index] || max;
|
|
5838
|
+
return {
|
|
5839
|
+
current: info.current,
|
|
5840
|
+
avg: Math.round(avg * 100) / 100,
|
|
5841
|
+
// Round to 2 decimal places
|
|
5842
|
+
max,
|
|
5843
|
+
percentile99
|
|
5844
|
+
};
|
|
4277
5845
|
}
|
|
4278
|
-
|
|
4279
|
-
|
|
4280
|
-
|
|
4281
|
-
|
|
4282
|
-
|
|
4283
|
-
|
|
4284
|
-
this.mapper = mapper;
|
|
5846
|
+
/**
|
|
5847
|
+
* Get pending operations count for a node
|
|
5848
|
+
*/
|
|
5849
|
+
getPendingOps(nodeId) {
|
|
5850
|
+
const info = this.lagByNode.get(nodeId);
|
|
5851
|
+
return info?.pendingOps ?? 0;
|
|
4285
5852
|
}
|
|
4286
|
-
|
|
4287
|
-
|
|
5853
|
+
/**
|
|
5854
|
+
* Get overall replication health status
|
|
5855
|
+
*/
|
|
5856
|
+
getHealth() {
|
|
5857
|
+
const unhealthyNodes = [];
|
|
5858
|
+
const laggyNodes = [];
|
|
5859
|
+
let totalLag = 0;
|
|
5860
|
+
let nodeCount = 0;
|
|
5861
|
+
const now = Date.now();
|
|
5862
|
+
for (const [nodeId, info] of this.lagByNode) {
|
|
5863
|
+
const timeSinceUpdate = now - info.lastUpdate;
|
|
5864
|
+
if (timeSinceUpdate > this.config.unhealthyThresholdMs) {
|
|
5865
|
+
unhealthyNodes.push(nodeId);
|
|
5866
|
+
} else if (info.current > this.config.laggyThresholdMs) {
|
|
5867
|
+
laggyNodes.push(nodeId);
|
|
5868
|
+
}
|
|
5869
|
+
totalLag += info.current;
|
|
5870
|
+
nodeCount++;
|
|
5871
|
+
}
|
|
5872
|
+
const avgLagMs = nodeCount > 0 ? totalLag / nodeCount : 0;
|
|
5873
|
+
return {
|
|
5874
|
+
healthy: unhealthyNodes.length === 0,
|
|
5875
|
+
unhealthyNodes,
|
|
5876
|
+
laggyNodes,
|
|
5877
|
+
avgLagMs: Math.round(avgLagMs * 100) / 100
|
|
5878
|
+
};
|
|
4288
5879
|
}
|
|
4289
|
-
|
|
4290
|
-
|
|
5880
|
+
/**
|
|
5881
|
+
* Get average lag across all tracked nodes
|
|
5882
|
+
*/
|
|
5883
|
+
getAverageLag() {
|
|
5884
|
+
let total = 0;
|
|
5885
|
+
let count = 0;
|
|
5886
|
+
for (const info of this.lagByNode.values()) {
|
|
5887
|
+
total += info.current;
|
|
5888
|
+
count++;
|
|
5889
|
+
}
|
|
5890
|
+
return count > 0 ? total / count : 0;
|
|
4291
5891
|
}
|
|
4292
|
-
|
|
4293
|
-
|
|
4294
|
-
|
|
4295
|
-
|
|
4296
|
-
|
|
4297
|
-
|
|
5892
|
+
/**
|
|
5893
|
+
* Check if a specific node is considered healthy
|
|
5894
|
+
*/
|
|
5895
|
+
isNodeHealthy(nodeId) {
|
|
5896
|
+
const info = this.lagByNode.get(nodeId);
|
|
5897
|
+
if (!info) return true;
|
|
5898
|
+
const timeSinceUpdate = Date.now() - info.lastUpdate;
|
|
5899
|
+
return timeSinceUpdate < this.config.unhealthyThresholdMs;
|
|
4298
5900
|
}
|
|
4299
|
-
|
|
4300
|
-
|
|
5901
|
+
/**
|
|
5902
|
+
* Check if a specific node is considered laggy
|
|
5903
|
+
*/
|
|
5904
|
+
isNodeLaggy(nodeId) {
|
|
5905
|
+
const info = this.lagByNode.get(nodeId);
|
|
5906
|
+
if (!info) return false;
|
|
5907
|
+
return info.current > this.config.laggyThresholdMs;
|
|
4301
5908
|
}
|
|
4302
|
-
|
|
4303
|
-
|
|
5909
|
+
/**
|
|
5910
|
+
* Remove a node from tracking
|
|
5911
|
+
*/
|
|
5912
|
+
removeNode(nodeId) {
|
|
5913
|
+
this.lagByNode.delete(nodeId);
|
|
4304
5914
|
}
|
|
4305
|
-
|
|
4306
|
-
|
|
4307
|
-
|
|
4308
|
-
|
|
4309
|
-
this.
|
|
4310
|
-
this.accumulator = initialValue;
|
|
4311
|
-
this.reducer = reducer;
|
|
5915
|
+
/**
|
|
5916
|
+
* Get all tracked node IDs
|
|
5917
|
+
*/
|
|
5918
|
+
getTrackedNodes() {
|
|
5919
|
+
return Array.from(this.lagByNode.keys());
|
|
4312
5920
|
}
|
|
4313
|
-
|
|
4314
|
-
|
|
5921
|
+
/**
|
|
5922
|
+
* Get raw lag info for a node (for advanced monitoring)
|
|
5923
|
+
*/
|
|
5924
|
+
getRawLagInfo(nodeId) {
|
|
5925
|
+
return this.lagByNode.get(nodeId);
|
|
4315
5926
|
}
|
|
4316
|
-
|
|
4317
|
-
|
|
5927
|
+
/**
|
|
5928
|
+
* Clear all tracking data
|
|
5929
|
+
*/
|
|
5930
|
+
clear() {
|
|
5931
|
+
this.lagByNode.clear();
|
|
5932
|
+
}
|
|
5933
|
+
/**
|
|
5934
|
+
* Export metrics in Prometheus format
|
|
5935
|
+
*/
|
|
5936
|
+
toPrometheusMetrics() {
|
|
5937
|
+
const lines = [
|
|
5938
|
+
"# HELP topgun_replication_lag_ms Current replication lag in milliseconds",
|
|
5939
|
+
"# TYPE topgun_replication_lag_ms gauge"
|
|
5940
|
+
];
|
|
5941
|
+
for (const [nodeId, info] of this.lagByNode) {
|
|
5942
|
+
lines.push(`topgun_replication_lag_ms{node="${nodeId}"} ${info.current}`);
|
|
5943
|
+
}
|
|
5944
|
+
lines.push("");
|
|
5945
|
+
lines.push("# HELP topgun_replication_pending_ops Pending replication operations");
|
|
5946
|
+
lines.push("# TYPE topgun_replication_pending_ops gauge");
|
|
5947
|
+
for (const [nodeId, info] of this.lagByNode) {
|
|
5948
|
+
lines.push(`topgun_replication_pending_ops{node="${nodeId}"} ${info.pendingOps}`);
|
|
5949
|
+
}
|
|
5950
|
+
const health = this.getHealth();
|
|
5951
|
+
lines.push("");
|
|
5952
|
+
lines.push("# HELP topgun_replication_healthy Cluster replication health (1=healthy, 0=unhealthy)");
|
|
5953
|
+
lines.push("# TYPE topgun_replication_healthy gauge");
|
|
5954
|
+
lines.push(`topgun_replication_healthy ${health.healthy ? 1 : 0}`);
|
|
5955
|
+
lines.push("");
|
|
5956
|
+
lines.push("# HELP topgun_replication_avg_lag_ms Average replication lag across all nodes");
|
|
5957
|
+
lines.push("# TYPE topgun_replication_avg_lag_ms gauge");
|
|
5958
|
+
lines.push(`topgun_replication_avg_lag_ms ${health.avgLagMs}`);
|
|
5959
|
+
return lines.join("\n");
|
|
4318
5960
|
}
|
|
4319
5961
|
};
|
|
4320
5962
|
|
|
4321
|
-
// src/
|
|
4322
|
-
|
|
4323
|
-
|
|
4324
|
-
|
|
4325
|
-
|
|
4326
|
-
|
|
4327
|
-
|
|
4328
|
-
|
|
4329
|
-
|
|
4330
|
-
|
|
5963
|
+
// src/cluster/ReplicationPipeline.ts
|
|
5964
|
+
var ReplicationTimeoutError = class extends Error {
|
|
5965
|
+
constructor(opId, targetNodes, ackedNodes) {
|
|
5966
|
+
super(
|
|
5967
|
+
`Replication timeout for operation ${opId}. Expected: ${targetNodes.join(", ")}, Acked: ${ackedNodes.join(", ")}`
|
|
5968
|
+
);
|
|
5969
|
+
this.opId = opId;
|
|
5970
|
+
this.targetNodes = targetNodes;
|
|
5971
|
+
this.ackedNodes = ackedNodes;
|
|
5972
|
+
this.name = "ReplicationTimeoutError";
|
|
5973
|
+
}
|
|
5974
|
+
};
|
|
5975
|
+
var ReplicationPipeline = class extends EventEmitter8 {
|
|
5976
|
+
constructor(clusterManager, partitionService, config = {}) {
|
|
4331
5977
|
super();
|
|
4332
|
-
|
|
4333
|
-
this.
|
|
5978
|
+
// Replication queues per node (for EVENTUAL mode)
|
|
5979
|
+
this.replicationQueue = /* @__PURE__ */ new Map();
|
|
5980
|
+
// Pending acknowledgments (for STRONG/QUORUM mode)
|
|
5981
|
+
this.pendingAcks = /* @__PURE__ */ new Map();
|
|
5982
|
+
// Queue processor timer
|
|
5983
|
+
this.queueProcessorTimer = null;
|
|
5984
|
+
// Operation applier callback (injected by ServerCoordinator)
|
|
5985
|
+
this.operationApplier = null;
|
|
5986
|
+
this.clusterManager = clusterManager;
|
|
5987
|
+
this.partitionService = partitionService;
|
|
5988
|
+
this.nodeId = clusterManager.config.nodeId;
|
|
5989
|
+
this.config = {
|
|
5990
|
+
...DEFAULT_REPLICATION_CONFIG,
|
|
5991
|
+
...config
|
|
5992
|
+
};
|
|
5993
|
+
this.lagTracker = new LagTracker();
|
|
5994
|
+
this.setupMessageHandlers();
|
|
5995
|
+
this.startQueueProcessor();
|
|
4334
5996
|
}
|
|
5997
|
+
// ============================================
|
|
5998
|
+
// Configuration
|
|
5999
|
+
// ============================================
|
|
4335
6000
|
/**
|
|
4336
|
-
*
|
|
4337
|
-
*
|
|
4338
|
-
*
|
|
4339
|
-
* @param opId - Operation ID
|
|
4340
|
-
* @param writeConcern - Target Write Concern level
|
|
4341
|
-
* @param timeout - Optional timeout in ms (defaults to config or 5000ms)
|
|
4342
|
-
* @returns Promise that resolves with WriteResult
|
|
6001
|
+
* Set the operation applier callback
|
|
6002
|
+
* This is called when replicated operations are received from other nodes
|
|
4343
6003
|
*/
|
|
4344
|
-
|
|
4345
|
-
|
|
4346
|
-
|
|
4347
|
-
|
|
4348
|
-
|
|
4349
|
-
|
|
4350
|
-
|
|
4351
|
-
|
|
6004
|
+
setOperationApplier(applier) {
|
|
6005
|
+
this.operationApplier = applier;
|
|
6006
|
+
}
|
|
6007
|
+
// ============================================
|
|
6008
|
+
// Replication API
|
|
6009
|
+
// ============================================
|
|
6010
|
+
/**
|
|
6011
|
+
* Replicate operation to backup nodes
|
|
6012
|
+
*/
|
|
6013
|
+
async replicate(operation, opId, key, options = {}) {
|
|
6014
|
+
const consistency = options.consistency ?? this.config.defaultConsistency;
|
|
6015
|
+
const partitionId = this.partitionService.getPartitionId(key);
|
|
6016
|
+
const backups = this.partitionService.getBackups(partitionId);
|
|
6017
|
+
if (backups.length === 0) {
|
|
6018
|
+
return { success: true, ackedBy: [this.nodeId] };
|
|
6019
|
+
}
|
|
6020
|
+
switch (consistency) {
|
|
6021
|
+
case ConsistencyLevel.STRONG:
|
|
6022
|
+
return this.replicateStrong(operation, opId, backups, options.timeout);
|
|
6023
|
+
case ConsistencyLevel.QUORUM:
|
|
6024
|
+
return this.replicateQuorum(operation, opId, backups, options.timeout);
|
|
6025
|
+
case ConsistencyLevel.EVENTUAL:
|
|
6026
|
+
return this.replicateEventual(operation, opId, backups);
|
|
4352
6027
|
}
|
|
6028
|
+
}
|
|
6029
|
+
/**
|
|
6030
|
+
* STRONG: Wait for all replicas to acknowledge
|
|
6031
|
+
*/
|
|
6032
|
+
async replicateStrong(operation, opId, backups, timeout) {
|
|
6033
|
+
const targetNodes = backups;
|
|
4353
6034
|
return new Promise((resolve, reject) => {
|
|
4354
|
-
const
|
|
4355
|
-
const timestamp = Date.now();
|
|
4356
|
-
const pendingWrite = {
|
|
6035
|
+
const pending = {
|
|
4357
6036
|
opId,
|
|
4358
|
-
|
|
4359
|
-
|
|
4360
|
-
|
|
4361
|
-
resolve
|
|
4362
|
-
|
|
4363
|
-
|
|
6037
|
+
consistency: ConsistencyLevel.STRONG,
|
|
6038
|
+
targetNodes,
|
|
6039
|
+
ackedNodes: /* @__PURE__ */ new Set(),
|
|
6040
|
+
resolve: () => resolve({
|
|
6041
|
+
success: true,
|
|
6042
|
+
ackedBy: [this.nodeId, ...targetNodes]
|
|
6043
|
+
}),
|
|
6044
|
+
reject: (error) => reject(error),
|
|
6045
|
+
timeout: setTimeout(() => {
|
|
6046
|
+
this.pendingAcks.delete(opId);
|
|
6047
|
+
const ackedList = Array.from(pending.ackedNodes);
|
|
6048
|
+
reject(new ReplicationTimeoutError(opId, targetNodes, ackedList));
|
|
6049
|
+
}, timeout ?? this.config.ackTimeoutMs),
|
|
6050
|
+
startTime: Date.now()
|
|
4364
6051
|
};
|
|
4365
|
-
|
|
4366
|
-
|
|
4367
|
-
|
|
4368
|
-
|
|
4369
|
-
|
|
4370
|
-
|
|
4371
|
-
"Registered pending write"
|
|
4372
|
-
);
|
|
4373
|
-
if (writeConcern === WriteConcern.MEMORY) {
|
|
4374
|
-
this.notifyLevel(opId, WriteConcern.MEMORY);
|
|
6052
|
+
this.pendingAcks.set(opId, pending);
|
|
6053
|
+
for (const nodeId of targetNodes) {
|
|
6054
|
+
this.lagTracker.incrementPending(nodeId);
|
|
6055
|
+
}
|
|
6056
|
+
for (const nodeId of targetNodes) {
|
|
6057
|
+
this.sendReplication(nodeId, operation, opId, ConsistencyLevel.STRONG);
|
|
4375
6058
|
}
|
|
4376
6059
|
});
|
|
4377
6060
|
}
|
|
4378
6061
|
/**
|
|
4379
|
-
*
|
|
4380
|
-
*
|
|
4381
|
-
* @param opId - Operation ID
|
|
4382
|
-
* @param level - Write Concern level that was achieved
|
|
6062
|
+
* QUORUM: Wait for majority of replicas
|
|
4383
6063
|
*/
|
|
4384
|
-
|
|
4385
|
-
const
|
|
4386
|
-
|
|
4387
|
-
|
|
6064
|
+
async replicateQuorum(operation, opId, backups, timeout) {
|
|
6065
|
+
const targetNodes = backups;
|
|
6066
|
+
const quorumSize = Math.floor(targetNodes.length / 2) + 1;
|
|
6067
|
+
return new Promise((resolve, reject) => {
|
|
6068
|
+
const ackedNodes = /* @__PURE__ */ new Set();
|
|
6069
|
+
const pending = {
|
|
6070
|
+
opId,
|
|
6071
|
+
consistency: ConsistencyLevel.QUORUM,
|
|
6072
|
+
targetNodes,
|
|
6073
|
+
ackedNodes,
|
|
6074
|
+
resolve: () => {
|
|
6075
|
+
const ackedSnapshot = Array.from(ackedNodes);
|
|
6076
|
+
const ackedBy = [this.nodeId, ...ackedSnapshot];
|
|
6077
|
+
resolve({ success: true, ackedBy });
|
|
6078
|
+
},
|
|
6079
|
+
reject: (error) => reject(error),
|
|
6080
|
+
timeout: setTimeout(() => {
|
|
6081
|
+
this.pendingAcks.delete(opId);
|
|
6082
|
+
const ackedList = Array.from(ackedNodes);
|
|
6083
|
+
reject(new ReplicationTimeoutError(opId, targetNodes, ackedList));
|
|
6084
|
+
}, timeout ?? this.config.ackTimeoutMs),
|
|
6085
|
+
startTime: Date.now()
|
|
6086
|
+
};
|
|
6087
|
+
this.pendingAcks.set(opId, pending);
|
|
6088
|
+
for (const nodeId of targetNodes) {
|
|
6089
|
+
this.lagTracker.incrementPending(nodeId);
|
|
6090
|
+
}
|
|
6091
|
+
for (const nodeId of targetNodes) {
|
|
6092
|
+
this.sendReplication(nodeId, operation, opId, ConsistencyLevel.QUORUM);
|
|
6093
|
+
}
|
|
6094
|
+
});
|
|
6095
|
+
}
|
|
6096
|
+
/**
|
|
6097
|
+
* EVENTUAL: Fire-and-forget with queue
|
|
6098
|
+
*/
|
|
6099
|
+
async replicateEventual(operation, opId, backups) {
|
|
6100
|
+
for (const nodeId of backups) {
|
|
6101
|
+
this.enqueue(nodeId, {
|
|
6102
|
+
opId,
|
|
6103
|
+
operation,
|
|
6104
|
+
consistency: ConsistencyLevel.EVENTUAL,
|
|
6105
|
+
timestamp: Date.now(),
|
|
6106
|
+
retryCount: 0
|
|
6107
|
+
});
|
|
4388
6108
|
}
|
|
4389
|
-
|
|
4390
|
-
|
|
4391
|
-
|
|
4392
|
-
|
|
4393
|
-
|
|
4394
|
-
|
|
4395
|
-
|
|
6109
|
+
return { success: true, ackedBy: [this.nodeId] };
|
|
6110
|
+
}
|
|
6111
|
+
// ============================================
|
|
6112
|
+
// Queue Management
|
|
6113
|
+
// ============================================
|
|
6114
|
+
/**
|
|
6115
|
+
* Add task to replication queue
|
|
6116
|
+
*/
|
|
6117
|
+
enqueue(nodeId, task) {
|
|
6118
|
+
let queue = this.replicationQueue.get(nodeId);
|
|
6119
|
+
if (!queue) {
|
|
6120
|
+
queue = [];
|
|
6121
|
+
this.replicationQueue.set(nodeId, queue);
|
|
4396
6122
|
}
|
|
6123
|
+
if (queue.length >= this.config.queueSizeLimit) {
|
|
6124
|
+
this.emit("queueOverflow", nodeId);
|
|
6125
|
+
logger.warn({ nodeId, queueSize: queue.length }, "Replication queue overflow, dropping oldest");
|
|
6126
|
+
queue.shift();
|
|
6127
|
+
}
|
|
6128
|
+
queue.push(task);
|
|
6129
|
+
this.lagTracker.incrementPending(nodeId);
|
|
4397
6130
|
}
|
|
4398
6131
|
/**
|
|
4399
|
-
*
|
|
4400
|
-
* Useful for batch operations.
|
|
4401
|
-
*
|
|
4402
|
-
* @param opIds - Array of operation IDs
|
|
4403
|
-
* @param level - Write Concern level that was achieved
|
|
6132
|
+
* Start queue processor
|
|
4404
6133
|
*/
|
|
4405
|
-
|
|
4406
|
-
|
|
4407
|
-
|
|
6134
|
+
startQueueProcessor() {
|
|
6135
|
+
if (this.queueProcessorTimer) return;
|
|
6136
|
+
this.queueProcessorTimer = setInterval(() => {
|
|
6137
|
+
for (const nodeId of this.replicationQueue.keys()) {
|
|
6138
|
+
this.processQueue(nodeId).catch((err) => {
|
|
6139
|
+
logger.error({ nodeId, error: err }, "Error processing replication queue");
|
|
6140
|
+
this.emit("error", err);
|
|
6141
|
+
});
|
|
6142
|
+
}
|
|
6143
|
+
}, this.config.batchIntervalMs);
|
|
6144
|
+
}
|
|
6145
|
+
/**
|
|
6146
|
+
* Stop queue processor
|
|
6147
|
+
*/
|
|
6148
|
+
stopQueueProcessor() {
|
|
6149
|
+
if (this.queueProcessorTimer) {
|
|
6150
|
+
clearInterval(this.queueProcessorTimer);
|
|
6151
|
+
this.queueProcessorTimer = null;
|
|
4408
6152
|
}
|
|
4409
6153
|
}
|
|
4410
6154
|
/**
|
|
4411
|
-
*
|
|
4412
|
-
*
|
|
4413
|
-
* @param opId - Operation ID
|
|
4414
|
-
* @returns true if operation is pending
|
|
6155
|
+
* Process replication queue for a node
|
|
4415
6156
|
*/
|
|
4416
|
-
|
|
4417
|
-
|
|
6157
|
+
async processQueue(nodeId) {
|
|
6158
|
+
const queue = this.replicationQueue.get(nodeId);
|
|
6159
|
+
if (!queue || queue.length === 0) return;
|
|
6160
|
+
const batch = queue.splice(0, this.config.batchSize);
|
|
6161
|
+
try {
|
|
6162
|
+
this.clusterManager.send(nodeId, "OP_FORWARD", {
|
|
6163
|
+
_replication: {
|
|
6164
|
+
type: "REPLICATION_BATCH",
|
|
6165
|
+
payload: {
|
|
6166
|
+
operations: batch.map((t) => t.operation),
|
|
6167
|
+
opIds: batch.map((t) => t.opId)
|
|
6168
|
+
}
|
|
6169
|
+
}
|
|
6170
|
+
});
|
|
6171
|
+
const oldestTimestamp = Math.min(...batch.map((t) => t.timestamp));
|
|
6172
|
+
this.lagTracker.update(nodeId, Date.now() - oldestTimestamp);
|
|
6173
|
+
logger.debug({ nodeId, batchSize: batch.length }, "Sent replication batch");
|
|
6174
|
+
} catch (error) {
|
|
6175
|
+
for (const task of batch) {
|
|
6176
|
+
task.retryCount++;
|
|
6177
|
+
if (task.retryCount <= this.config.maxRetries) {
|
|
6178
|
+
queue.unshift(task);
|
|
6179
|
+
} else {
|
|
6180
|
+
logger.warn({ nodeId, opId: task.opId, retries: task.retryCount }, "Replication task exceeded max retries");
|
|
6181
|
+
this.emit("replicationFailed", task.opId, new Error("Max retries exceeded"));
|
|
6182
|
+
}
|
|
6183
|
+
}
|
|
6184
|
+
}
|
|
4418
6185
|
}
|
|
6186
|
+
// ============================================
|
|
6187
|
+
// Message Handling
|
|
6188
|
+
// ============================================
|
|
4419
6189
|
/**
|
|
4420
|
-
*
|
|
4421
|
-
*
|
|
4422
|
-
* @param opId - Operation ID
|
|
4423
|
-
* @returns Target Write Concern level or undefined if not pending
|
|
6190
|
+
* Send replication message to a node
|
|
4424
6191
|
*/
|
|
4425
|
-
|
|
4426
|
-
|
|
6192
|
+
sendReplication(nodeId, operation, opId, consistency) {
|
|
6193
|
+
this.clusterManager.send(nodeId, "OP_FORWARD", {
|
|
6194
|
+
_replication: {
|
|
6195
|
+
type: "REPLICATION",
|
|
6196
|
+
payload: {
|
|
6197
|
+
opId,
|
|
6198
|
+
operation,
|
|
6199
|
+
consistency
|
|
6200
|
+
}
|
|
6201
|
+
}
|
|
6202
|
+
});
|
|
4427
6203
|
}
|
|
4428
6204
|
/**
|
|
4429
|
-
*
|
|
4430
|
-
*
|
|
4431
|
-
* @param opId - Operation ID
|
|
4432
|
-
* @returns Highest achieved level or undefined if not pending
|
|
6205
|
+
* Setup cluster message handlers
|
|
4433
6206
|
*/
|
|
4434
|
-
|
|
4435
|
-
|
|
4436
|
-
|
|
4437
|
-
|
|
6207
|
+
setupMessageHandlers() {
|
|
6208
|
+
this.clusterManager.on("message", (msg) => {
|
|
6209
|
+
if (msg.payload?._replication) {
|
|
6210
|
+
const replication = msg.payload._replication;
|
|
6211
|
+
switch (replication.type) {
|
|
6212
|
+
case "REPLICATION":
|
|
6213
|
+
this.handleReplication(msg.senderId, replication.payload);
|
|
6214
|
+
break;
|
|
6215
|
+
case "REPLICATION_BATCH":
|
|
6216
|
+
this.handleReplicationBatch(msg.senderId, replication.payload);
|
|
6217
|
+
break;
|
|
6218
|
+
case "REPLICATION_ACK":
|
|
6219
|
+
this.handleReplicationAck(msg.senderId, replication.payload);
|
|
6220
|
+
break;
|
|
6221
|
+
case "REPLICATION_BATCH_ACK":
|
|
6222
|
+
this.handleReplicationBatchAck(msg.senderId, replication.payload);
|
|
6223
|
+
break;
|
|
6224
|
+
}
|
|
6225
|
+
}
|
|
6226
|
+
});
|
|
4438
6227
|
}
|
|
4439
6228
|
/**
|
|
4440
|
-
*
|
|
6229
|
+
* Handle incoming replication request (on backup node)
|
|
4441
6230
|
*/
|
|
4442
|
-
|
|
4443
|
-
const
|
|
4444
|
-
|
|
4445
|
-
|
|
4446
|
-
|
|
6231
|
+
async handleReplication(sourceNode, payload) {
|
|
6232
|
+
const { opId, operation, consistency } = payload;
|
|
6233
|
+
logger.debug({ sourceNode, opId, consistency }, "Received replication");
|
|
6234
|
+
let success = true;
|
|
6235
|
+
if (this.operationApplier) {
|
|
6236
|
+
try {
|
|
6237
|
+
success = await this.operationApplier(operation, opId, sourceNode);
|
|
6238
|
+
} catch (error) {
|
|
6239
|
+
logger.error({ sourceNode, opId, error }, "Failed to apply replicated operation");
|
|
6240
|
+
success = false;
|
|
6241
|
+
}
|
|
6242
|
+
} else {
|
|
6243
|
+
logger.warn({ sourceNode, opId }, "No operation applier set, operation not applied");
|
|
6244
|
+
}
|
|
6245
|
+
if (consistency === ConsistencyLevel.STRONG || consistency === ConsistencyLevel.QUORUM) {
|
|
6246
|
+
this.clusterManager.send(sourceNode, "OP_FORWARD", {
|
|
6247
|
+
_replication: {
|
|
6248
|
+
type: "REPLICATION_ACK",
|
|
6249
|
+
payload: {
|
|
6250
|
+
opId,
|
|
6251
|
+
success,
|
|
6252
|
+
timestamp: Date.now()
|
|
6253
|
+
}
|
|
6254
|
+
}
|
|
6255
|
+
});
|
|
4447
6256
|
}
|
|
4448
|
-
const latencyMs = Date.now() - pending.timestamp;
|
|
4449
|
-
const result = {
|
|
4450
|
-
success: true,
|
|
4451
|
-
opId,
|
|
4452
|
-
achievedLevel,
|
|
4453
|
-
latencyMs
|
|
4454
|
-
};
|
|
4455
|
-
pending.resolve(result);
|
|
4456
|
-
this.pending.delete(opId);
|
|
4457
|
-
logger.debug(
|
|
4458
|
-
{ opId, achievedLevel, latencyMs },
|
|
4459
|
-
"Write resolved successfully"
|
|
4460
|
-
);
|
|
4461
|
-
this.emit("resolved", result);
|
|
4462
6257
|
}
|
|
4463
6258
|
/**
|
|
4464
|
-
* Handle
|
|
6259
|
+
* Handle incoming batch replication (on backup node)
|
|
4465
6260
|
*/
|
|
4466
|
-
|
|
4467
|
-
const
|
|
4468
|
-
|
|
4469
|
-
|
|
4470
|
-
|
|
4471
|
-
|
|
4472
|
-
|
|
4473
|
-
|
|
4474
|
-
|
|
4475
|
-
|
|
4476
|
-
|
|
4477
|
-
|
|
4478
|
-
|
|
4479
|
-
|
|
4480
|
-
|
|
4481
|
-
|
|
4482
|
-
|
|
4483
|
-
|
|
4484
|
-
|
|
4485
|
-
|
|
4486
|
-
|
|
4487
|
-
|
|
4488
|
-
|
|
6261
|
+
async handleReplicationBatch(sourceNode, payload) {
|
|
6262
|
+
const { operations, opIds } = payload;
|
|
6263
|
+
logger.debug({ sourceNode, count: operations.length }, "Received replication batch");
|
|
6264
|
+
let allSuccess = true;
|
|
6265
|
+
if (this.operationApplier) {
|
|
6266
|
+
for (let i = 0; i < operations.length; i++) {
|
|
6267
|
+
try {
|
|
6268
|
+
const success = await this.operationApplier(operations[i], opIds[i], sourceNode);
|
|
6269
|
+
if (!success) {
|
|
6270
|
+
allSuccess = false;
|
|
6271
|
+
}
|
|
6272
|
+
} catch (error) {
|
|
6273
|
+
logger.error({ sourceNode, opId: opIds[i], error }, "Failed to apply replicated operation in batch");
|
|
6274
|
+
allSuccess = false;
|
|
6275
|
+
}
|
|
6276
|
+
}
|
|
6277
|
+
} else {
|
|
6278
|
+
logger.warn({ sourceNode, count: operations.length }, "No operation applier set, batch not applied");
|
|
6279
|
+
}
|
|
6280
|
+
this.clusterManager.send(sourceNode, "OP_FORWARD", {
|
|
6281
|
+
_replication: {
|
|
6282
|
+
type: "REPLICATION_BATCH_ACK",
|
|
6283
|
+
payload: {
|
|
6284
|
+
opIds,
|
|
6285
|
+
success: allSuccess,
|
|
6286
|
+
timestamp: Date.now()
|
|
6287
|
+
}
|
|
6288
|
+
}
|
|
4489
6289
|
});
|
|
4490
6290
|
}
|
|
4491
6291
|
/**
|
|
4492
|
-
*
|
|
4493
|
-
|
|
4494
|
-
|
|
4495
|
-
|
|
6292
|
+
* Handle replication acknowledgment (on owner node)
|
|
6293
|
+
*/
|
|
6294
|
+
handleReplicationAck(sourceNode, payload) {
|
|
6295
|
+
const { opId, success } = payload;
|
|
6296
|
+
this.lagTracker.recordAck(sourceNode);
|
|
6297
|
+
const pending = this.pendingAcks.get(opId);
|
|
6298
|
+
if (!pending) return;
|
|
6299
|
+
if (!success) {
|
|
6300
|
+
logger.warn({ sourceNode, opId }, "Replication rejected by backup");
|
|
6301
|
+
return;
|
|
6302
|
+
}
|
|
6303
|
+
pending.ackedNodes.add(sourceNode);
|
|
6304
|
+
const lag = Date.now() - pending.startTime;
|
|
6305
|
+
this.lagTracker.update(sourceNode, lag);
|
|
6306
|
+
const ackedCount = pending.ackedNodes.size;
|
|
6307
|
+
const targetCount = pending.targetNodes.length;
|
|
6308
|
+
switch (pending.consistency) {
|
|
6309
|
+
case ConsistencyLevel.STRONG:
|
|
6310
|
+
if (ackedCount === targetCount) {
|
|
6311
|
+
clearTimeout(pending.timeout);
|
|
6312
|
+
this.pendingAcks.delete(opId);
|
|
6313
|
+
pending.resolve();
|
|
6314
|
+
this.emit("replicationComplete", opId, [this.nodeId, ...pending.ackedNodes]);
|
|
6315
|
+
}
|
|
6316
|
+
break;
|
|
6317
|
+
case ConsistencyLevel.QUORUM:
|
|
6318
|
+
const quorumSize = Math.floor(targetCount / 2) + 1;
|
|
6319
|
+
if (ackedCount >= quorumSize) {
|
|
6320
|
+
clearTimeout(pending.timeout);
|
|
6321
|
+
this.pendingAcks.delete(opId);
|
|
6322
|
+
pending.resolve();
|
|
6323
|
+
this.emit("replicationComplete", opId, [this.nodeId, ...pending.ackedNodes]);
|
|
6324
|
+
}
|
|
6325
|
+
break;
|
|
6326
|
+
}
|
|
6327
|
+
}
|
|
6328
|
+
/**
|
|
6329
|
+
* Handle batch acknowledgment (on owner node)
|
|
6330
|
+
*/
|
|
6331
|
+
handleReplicationBatchAck(sourceNode, payload) {
|
|
6332
|
+
const { success } = payload;
|
|
6333
|
+
this.lagTracker.recordAck(sourceNode);
|
|
6334
|
+
if (!success) {
|
|
6335
|
+
logger.warn({ sourceNode, count: payload.opIds.length }, "Batch replication rejected");
|
|
6336
|
+
}
|
|
6337
|
+
}
|
|
6338
|
+
// ============================================
|
|
6339
|
+
// Status and Metrics
|
|
6340
|
+
// ============================================
|
|
6341
|
+
/**
|
|
6342
|
+
* Get replication lag for a specific node
|
|
4496
6343
|
*/
|
|
4497
|
-
|
|
4498
|
-
|
|
4499
|
-
if (!pending) return;
|
|
4500
|
-
if (pending.timeoutHandle) {
|
|
4501
|
-
clearTimeout(pending.timeoutHandle);
|
|
4502
|
-
}
|
|
4503
|
-
const latencyMs = Date.now() - pending.timestamp;
|
|
4504
|
-
const highestAchieved = getHighestWriteConcernLevel(pending.achievedLevels);
|
|
4505
|
-
const result = {
|
|
4506
|
-
success: false,
|
|
4507
|
-
opId,
|
|
4508
|
-
achievedLevel: highestAchieved,
|
|
4509
|
-
latencyMs,
|
|
4510
|
-
error
|
|
4511
|
-
};
|
|
4512
|
-
pending.resolve(result);
|
|
4513
|
-
this.pending.delete(opId);
|
|
4514
|
-
logger.error({ opId, error, latencyMs }, "Write failed");
|
|
4515
|
-
this.emit("failed", result);
|
|
6344
|
+
getLag(nodeId) {
|
|
6345
|
+
return this.lagTracker.getLag(nodeId);
|
|
4516
6346
|
}
|
|
4517
6347
|
/**
|
|
4518
|
-
* Get
|
|
6348
|
+
* Get overall replication health
|
|
4519
6349
|
*/
|
|
4520
|
-
|
|
4521
|
-
|
|
4522
|
-
[WriteConcern.FIRE_AND_FORGET]: 0,
|
|
4523
|
-
[WriteConcern.MEMORY]: 0,
|
|
4524
|
-
[WriteConcern.APPLIED]: 0,
|
|
4525
|
-
[WriteConcern.REPLICATED]: 0,
|
|
4526
|
-
[WriteConcern.PERSISTED]: 0
|
|
4527
|
-
};
|
|
4528
|
-
for (const pending of this.pending.values()) {
|
|
4529
|
-
byLevel[pending.writeConcern]++;
|
|
4530
|
-
}
|
|
4531
|
-
return { pending: this.pending.size, byLevel };
|
|
6350
|
+
getHealth() {
|
|
6351
|
+
return this.lagTracker.getHealth();
|
|
4532
6352
|
}
|
|
4533
6353
|
/**
|
|
4534
|
-
* Get
|
|
6354
|
+
* Get queue size for a specific node
|
|
4535
6355
|
*/
|
|
4536
|
-
|
|
4537
|
-
return
|
|
6356
|
+
getQueueSize(nodeId) {
|
|
6357
|
+
return this.replicationQueue.get(nodeId)?.length ?? 0;
|
|
4538
6358
|
}
|
|
4539
6359
|
/**
|
|
4540
|
-
*
|
|
4541
|
-
* Rejects all pending promises with an error.
|
|
6360
|
+
* Get total pending operations across all nodes
|
|
4542
6361
|
*/
|
|
4543
|
-
|
|
4544
|
-
|
|
4545
|
-
for (const
|
|
4546
|
-
|
|
4547
|
-
clearTimeout(pending.timeoutHandle);
|
|
4548
|
-
}
|
|
4549
|
-
pending.reject(new Error("WriteAckManager cleared"));
|
|
4550
|
-
}
|
|
4551
|
-
this.pending.clear();
|
|
4552
|
-
if (count > 0) {
|
|
4553
|
-
logger.info({ count }, "WriteAckManager cleared");
|
|
6362
|
+
getTotalPending() {
|
|
6363
|
+
let total = 0;
|
|
6364
|
+
for (const queue of this.replicationQueue.values()) {
|
|
6365
|
+
total += queue.length;
|
|
4554
6366
|
}
|
|
6367
|
+
return total + this.pendingAcks.size;
|
|
4555
6368
|
}
|
|
4556
6369
|
/**
|
|
4557
|
-
*
|
|
6370
|
+
* Check if a node is considered synced (low lag)
|
|
4558
6371
|
*/
|
|
4559
|
-
|
|
4560
|
-
const
|
|
4561
|
-
|
|
4562
|
-
|
|
4563
|
-
|
|
4564
|
-
|
|
4565
|
-
|
|
4566
|
-
|
|
4567
|
-
|
|
4568
|
-
|
|
4569
|
-
|
|
4570
|
-
|
|
4571
|
-
|
|
4572
|
-
|
|
4573
|
-
|
|
4574
|
-
|
|
6372
|
+
isSynced(nodeId, maxLagMs = 1e3) {
|
|
6373
|
+
const lag = this.lagTracker.getLag(nodeId);
|
|
6374
|
+
return lag.current < maxLagMs;
|
|
6375
|
+
}
|
|
6376
|
+
/**
|
|
6377
|
+
* Get LagTracker for advanced monitoring
|
|
6378
|
+
*/
|
|
6379
|
+
getLagTracker() {
|
|
6380
|
+
return this.lagTracker;
|
|
6381
|
+
}
|
|
6382
|
+
/**
|
|
6383
|
+
* Export metrics in Prometheus format
|
|
6384
|
+
*/
|
|
6385
|
+
toPrometheusMetrics() {
|
|
6386
|
+
const lines = [];
|
|
6387
|
+
lines.push("# HELP topgun_replication_queue_size Pending operations in replication queue");
|
|
6388
|
+
lines.push("# TYPE topgun_replication_queue_size gauge");
|
|
6389
|
+
for (const [nodeId, queue] of this.replicationQueue) {
|
|
6390
|
+
lines.push(`topgun_replication_queue_size{node="${nodeId}"} ${queue.length}`);
|
|
4575
6391
|
}
|
|
4576
|
-
|
|
4577
|
-
|
|
4578
|
-
|
|
6392
|
+
lines.push("");
|
|
6393
|
+
lines.push("# HELP topgun_replication_pending_acks Pending synchronous acknowledgments");
|
|
6394
|
+
lines.push("# TYPE topgun_replication_pending_acks gauge");
|
|
6395
|
+
lines.push(`topgun_replication_pending_acks ${this.pendingAcks.size}`);
|
|
6396
|
+
lines.push("");
|
|
6397
|
+
lines.push(this.lagTracker.toPrometheusMetrics());
|
|
6398
|
+
return lines.join("\n");
|
|
6399
|
+
}
|
|
6400
|
+
/**
|
|
6401
|
+
* Cleanup resources
|
|
6402
|
+
*/
|
|
6403
|
+
close() {
|
|
6404
|
+
this.stopQueueProcessor();
|
|
6405
|
+
for (const [opId, pending] of this.pendingAcks) {
|
|
6406
|
+
clearTimeout(pending.timeout);
|
|
6407
|
+
pending.reject(new Error("ReplicationPipeline closed"));
|
|
4579
6408
|
}
|
|
6409
|
+
this.pendingAcks.clear();
|
|
6410
|
+
this.replicationQueue.clear();
|
|
6411
|
+
this.lagTracker.clear();
|
|
4580
6412
|
}
|
|
4581
6413
|
};
|
|
4582
6414
|
|
|
@@ -4741,6 +6573,22 @@ var ServerCoordinator = class {
|
|
|
4741
6573
|
tls: config.clusterTls
|
|
4742
6574
|
});
|
|
4743
6575
|
this.partitionService = new PartitionService(this.cluster);
|
|
6576
|
+
if (config.replicationEnabled !== false) {
|
|
6577
|
+
this.replicationPipeline = new ReplicationPipeline(
|
|
6578
|
+
this.cluster,
|
|
6579
|
+
this.partitionService,
|
|
6580
|
+
{
|
|
6581
|
+
...DEFAULT_REPLICATION_CONFIG2,
|
|
6582
|
+
defaultConsistency: config.defaultConsistency ?? ConsistencyLevel2.EVENTUAL,
|
|
6583
|
+
...config.replicationConfig
|
|
6584
|
+
}
|
|
6585
|
+
);
|
|
6586
|
+
this.replicationPipeline.setOperationApplier(this.applyReplicatedOperation.bind(this));
|
|
6587
|
+
logger.info({ nodeId: config.nodeId }, "ReplicationPipeline initialized");
|
|
6588
|
+
}
|
|
6589
|
+
this.partitionService.on("rebalanced", (partitionMap, changes) => {
|
|
6590
|
+
this.broadcastPartitionMap(partitionMap);
|
|
6591
|
+
});
|
|
4744
6592
|
this.lockManager = new LockManager();
|
|
4745
6593
|
this.lockManager.on("lockGranted", (evt) => this.handleLockGranted(evt));
|
|
4746
6594
|
this.topicManager = new TopicManager({
|
|
@@ -4857,7 +6705,7 @@ var ServerCoordinator = class {
|
|
|
4857
6705
|
this.metricsService.destroy();
|
|
4858
6706
|
this.wss.close();
|
|
4859
6707
|
logger.info(`Closing ${this.clients.size} client connections...`);
|
|
4860
|
-
const shutdownMsg =
|
|
6708
|
+
const shutdownMsg = serialize4({ type: "SHUTDOWN_PENDING", retryAfter: 5e3 });
|
|
4861
6709
|
for (const client of this.clients.values()) {
|
|
4862
6710
|
try {
|
|
4863
6711
|
if (client.socket.readyState === WebSocket3.OPEN) {
|
|
@@ -4879,6 +6727,9 @@ var ServerCoordinator = class {
|
|
|
4879
6727
|
await this.workerPool.shutdown(5e3);
|
|
4880
6728
|
logger.info("Worker pool shutdown complete.");
|
|
4881
6729
|
}
|
|
6730
|
+
if (this.replicationPipeline) {
|
|
6731
|
+
this.replicationPipeline.close();
|
|
6732
|
+
}
|
|
4882
6733
|
if (this.cluster) {
|
|
4883
6734
|
this.cluster.stop();
|
|
4884
6735
|
}
|
|
@@ -5026,7 +6877,7 @@ var ServerCoordinator = class {
|
|
|
5026
6877
|
this.clients.delete(clientId);
|
|
5027
6878
|
this.metricsService.setConnectedClients(this.clients.size);
|
|
5028
6879
|
});
|
|
5029
|
-
ws.send(
|
|
6880
|
+
ws.send(serialize4({ type: "AUTH_REQUIRED" }));
|
|
5030
6881
|
}
|
|
5031
6882
|
async handleMessage(client, rawMessage) {
|
|
5032
6883
|
const parseResult = MessageSchema.safeParse(rawMessage);
|
|
@@ -5436,6 +7287,23 @@ var ServerCoordinator = class {
|
|
|
5436
7287
|
}
|
|
5437
7288
|
break;
|
|
5438
7289
|
}
|
|
7290
|
+
// ============ Phase 4: Partition Map Request Handler ============
|
|
7291
|
+
case "PARTITION_MAP_REQUEST": {
|
|
7292
|
+
const clientVersion = message.payload?.currentVersion ?? 0;
|
|
7293
|
+
const currentMap = this.partitionService.getPartitionMap();
|
|
7294
|
+
if (clientVersion < currentMap.version) {
|
|
7295
|
+
client.writer.write({
|
|
7296
|
+
type: "PARTITION_MAP",
|
|
7297
|
+
payload: currentMap
|
|
7298
|
+
});
|
|
7299
|
+
logger.debug({
|
|
7300
|
+
clientId: client.id,
|
|
7301
|
+
clientVersion,
|
|
7302
|
+
serverVersion: currentMap.version
|
|
7303
|
+
}, "Sent partition map to client");
|
|
7304
|
+
}
|
|
7305
|
+
break;
|
|
7306
|
+
}
|
|
5439
7307
|
// ============ ORMap Sync Message Handlers ============
|
|
5440
7308
|
case "ORMAP_SYNC_INIT": {
|
|
5441
7309
|
if (!this.securityManager.checkPermission(client.principal, message.mapName, "READ")) {
|
|
@@ -5639,6 +7507,28 @@ var ServerCoordinator = class {
|
|
|
5639
7507
|
client.lastActiveHlc = this.hlc.now();
|
|
5640
7508
|
}
|
|
5641
7509
|
}
|
|
7510
|
+
// ============ Phase 4: Partition Map Broadcast ============
|
|
7511
|
+
/**
|
|
7512
|
+
* Broadcast partition map to all connected and authenticated clients.
|
|
7513
|
+
* Called when partition topology changes (node join/leave/failover).
|
|
7514
|
+
*/
|
|
7515
|
+
broadcastPartitionMap(partitionMap) {
|
|
7516
|
+
const message = {
|
|
7517
|
+
type: "PARTITION_MAP",
|
|
7518
|
+
payload: partitionMap
|
|
7519
|
+
};
|
|
7520
|
+
let broadcastCount = 0;
|
|
7521
|
+
for (const client of this.clients.values()) {
|
|
7522
|
+
if (client.isAuthenticated && client.socket.readyState === WebSocket3.OPEN) {
|
|
7523
|
+
client.writer.write(message);
|
|
7524
|
+
broadcastCount++;
|
|
7525
|
+
}
|
|
7526
|
+
}
|
|
7527
|
+
logger.info({
|
|
7528
|
+
version: partitionMap.version,
|
|
7529
|
+
clientCount: broadcastCount
|
|
7530
|
+
}, "Broadcast partition map to clients");
|
|
7531
|
+
}
|
|
5642
7532
|
broadcast(message, excludeClientId) {
|
|
5643
7533
|
const isServerEvent = message.type === "SERVER_EVENT";
|
|
5644
7534
|
if (isServerEvent) {
|
|
@@ -5669,7 +7559,7 @@ var ServerCoordinator = class {
|
|
|
5669
7559
|
client.writer.write({ ...message, payload: newPayload });
|
|
5670
7560
|
}
|
|
5671
7561
|
} else {
|
|
5672
|
-
const msgData =
|
|
7562
|
+
const msgData = serialize4(message);
|
|
5673
7563
|
for (const [id, client] of this.clients) {
|
|
5674
7564
|
if (id !== excludeClientId && client.socket.readyState === 1) {
|
|
5675
7565
|
client.writer.writeRaw(msgData);
|
|
@@ -5747,7 +7637,7 @@ var ServerCoordinator = class {
|
|
|
5747
7637
|
payload: { events: filteredEvents },
|
|
5748
7638
|
timestamp: this.hlc.now()
|
|
5749
7639
|
};
|
|
5750
|
-
const serializedBatch =
|
|
7640
|
+
const serializedBatch = serialize4(batchMessage);
|
|
5751
7641
|
for (const client of clients) {
|
|
5752
7642
|
try {
|
|
5753
7643
|
client.writer.writeRaw(serializedBatch);
|
|
@@ -5832,7 +7722,7 @@ var ServerCoordinator = class {
|
|
|
5832
7722
|
payload: { events: filteredEvents },
|
|
5833
7723
|
timestamp: this.hlc.now()
|
|
5834
7724
|
};
|
|
5835
|
-
const serializedBatch =
|
|
7725
|
+
const serializedBatch = serialize4(batchMessage);
|
|
5836
7726
|
for (const client of clients) {
|
|
5837
7727
|
sendPromises.push(new Promise((resolve, reject) => {
|
|
5838
7728
|
try {
|
|
@@ -6117,6 +8007,26 @@ var ServerCoordinator = class {
|
|
|
6117
8007
|
}
|
|
6118
8008
|
}
|
|
6119
8009
|
}
|
|
8010
|
+
/**
|
|
8011
|
+
* Apply replicated operation from another node (callback for ReplicationPipeline)
|
|
8012
|
+
* This is called when we receive a replicated operation as a backup node
|
|
8013
|
+
*/
|
|
8014
|
+
async applyReplicatedOperation(operation, opId, sourceNode) {
|
|
8015
|
+
try {
|
|
8016
|
+
const op = operation;
|
|
8017
|
+
logger.debug({ sourceNode, opId, mapName: op.mapName, key: op.key }, "Applying replicated operation");
|
|
8018
|
+
const { eventPayload } = this.applyOpToMap(op);
|
|
8019
|
+
this.broadcast({
|
|
8020
|
+
type: "SERVER_EVENT",
|
|
8021
|
+
payload: eventPayload,
|
|
8022
|
+
timestamp: this.hlc.now()
|
|
8023
|
+
});
|
|
8024
|
+
return true;
|
|
8025
|
+
} catch (error) {
|
|
8026
|
+
logger.error({ sourceNode, opId, error }, "Failed to apply replicated operation");
|
|
8027
|
+
return false;
|
|
8028
|
+
}
|
|
8029
|
+
}
|
|
6120
8030
|
/**
|
|
6121
8031
|
* Build OpContext for interceptors.
|
|
6122
8032
|
*/
|
|
@@ -6205,6 +8115,12 @@ var ServerCoordinator = class {
|
|
|
6205
8115
|
throw err;
|
|
6206
8116
|
}
|
|
6207
8117
|
const { eventPayload } = this.applyOpToMap(op);
|
|
8118
|
+
if (this.replicationPipeline && !fromCluster) {
|
|
8119
|
+
const opId = op.id || `${op.mapName}:${op.key}:${Date.now()}`;
|
|
8120
|
+
this.replicationPipeline.replicate(op, opId, op.key).catch((err) => {
|
|
8121
|
+
logger.warn({ opId, key: op.key, err }, "Replication failed (non-fatal)");
|
|
8122
|
+
});
|
|
8123
|
+
}
|
|
6208
8124
|
this.broadcast({
|
|
6209
8125
|
type: "SERVER_EVENT",
|
|
6210
8126
|
payload: eventPayload,
|
|
@@ -6327,6 +8243,12 @@ var ServerCoordinator = class {
|
|
|
6327
8243
|
throw err;
|
|
6328
8244
|
}
|
|
6329
8245
|
const { eventPayload } = this.applyOpToMap(op);
|
|
8246
|
+
if (this.replicationPipeline) {
|
|
8247
|
+
const opId = op.id || `${op.mapName}:${op.key}:${Date.now()}`;
|
|
8248
|
+
this.replicationPipeline.replicate(op, opId, op.key).catch((err) => {
|
|
8249
|
+
logger.warn({ opId, key: op.key, err }, "Batch replication failed (non-fatal)");
|
|
8250
|
+
});
|
|
8251
|
+
}
|
|
6330
8252
|
batchedEvents.push(eventPayload);
|
|
6331
8253
|
this.broadcastToCluster(eventPayload);
|
|
6332
8254
|
this.runAfterInterceptors(op, context);
|
|
@@ -7274,18 +9196,403 @@ function logNativeStatus() {
|
|
|
7274
9196
|
` - SharedArrayBuffer: ${status.sharedArrayBuffer ? "available" : "unavailable"}`
|
|
7275
9197
|
);
|
|
7276
9198
|
}
|
|
9199
|
+
|
|
9200
|
+
// src/cluster/ClusterCoordinator.ts
|
|
9201
|
+
import { EventEmitter as EventEmitter9 } from "events";
|
|
9202
|
+
import {
|
|
9203
|
+
DEFAULT_MIGRATION_CONFIG as DEFAULT_MIGRATION_CONFIG3,
|
|
9204
|
+
DEFAULT_REPLICATION_CONFIG as DEFAULT_REPLICATION_CONFIG3
|
|
9205
|
+
} from "@topgunbuild/core";
|
|
9206
|
+
var DEFAULT_CLUSTER_COORDINATOR_CONFIG = {
|
|
9207
|
+
gradualRebalancing: true,
|
|
9208
|
+
migration: DEFAULT_MIGRATION_CONFIG3,
|
|
9209
|
+
replication: DEFAULT_REPLICATION_CONFIG3,
|
|
9210
|
+
replicationEnabled: true
|
|
9211
|
+
};
|
|
9212
|
+
var ClusterCoordinator = class extends EventEmitter9 {
|
|
9213
|
+
constructor(config) {
|
|
9214
|
+
super();
|
|
9215
|
+
this.replicationPipeline = null;
|
|
9216
|
+
// State
|
|
9217
|
+
this.started = false;
|
|
9218
|
+
this.actualPort = 0;
|
|
9219
|
+
this.config = {
|
|
9220
|
+
...DEFAULT_CLUSTER_COORDINATOR_CONFIG,
|
|
9221
|
+
...config
|
|
9222
|
+
};
|
|
9223
|
+
this.clusterManager = new ClusterManager(this.config.cluster);
|
|
9224
|
+
this.lagTracker = new LagTracker();
|
|
9225
|
+
const partitionServiceConfig = {
|
|
9226
|
+
gradualRebalancing: this.config.gradualRebalancing,
|
|
9227
|
+
migration: this.config.migration
|
|
9228
|
+
};
|
|
9229
|
+
this.partitionService = new PartitionService(this.clusterManager, partitionServiceConfig);
|
|
9230
|
+
if (this.config.replicationEnabled) {
|
|
9231
|
+
this.replicationPipeline = new ReplicationPipeline(
|
|
9232
|
+
this.clusterManager,
|
|
9233
|
+
this.partitionService,
|
|
9234
|
+
this.config.replication
|
|
9235
|
+
);
|
|
9236
|
+
}
|
|
9237
|
+
this.setupEventHandlers();
|
|
9238
|
+
}
|
|
9239
|
+
// ============================================
|
|
9240
|
+
// Lifecycle Methods
|
|
9241
|
+
// ============================================
|
|
9242
|
+
/**
|
|
9243
|
+
* Start the cluster coordinator
|
|
9244
|
+
*/
|
|
9245
|
+
async start() {
|
|
9246
|
+
if (this.started) {
|
|
9247
|
+
return this.actualPort;
|
|
9248
|
+
}
|
|
9249
|
+
logger.info({ nodeId: this.config.cluster.nodeId }, "Starting ClusterCoordinator");
|
|
9250
|
+
this.actualPort = await this.clusterManager.start();
|
|
9251
|
+
const migrationManager = this.partitionService.getMigrationManager();
|
|
9252
|
+
if (migrationManager && this.config.dataCollector) {
|
|
9253
|
+
migrationManager.setDataCollector(this.config.dataCollector);
|
|
9254
|
+
}
|
|
9255
|
+
if (migrationManager && this.config.dataStorer) {
|
|
9256
|
+
migrationManager.setDataStorer(this.config.dataStorer);
|
|
9257
|
+
}
|
|
9258
|
+
this.started = true;
|
|
9259
|
+
this.emit("started");
|
|
9260
|
+
logger.info({ nodeId: this.config.cluster.nodeId, port: this.actualPort }, "ClusterCoordinator started");
|
|
9261
|
+
return this.actualPort;
|
|
9262
|
+
}
|
|
9263
|
+
/**
|
|
9264
|
+
* Stop the cluster coordinator
|
|
9265
|
+
*/
|
|
9266
|
+
async stop() {
|
|
9267
|
+
if (!this.started) return;
|
|
9268
|
+
logger.info({ nodeId: this.config.cluster.nodeId }, "Stopping ClusterCoordinator");
|
|
9269
|
+
await this.partitionService.cancelMigrations();
|
|
9270
|
+
this.replicationPipeline?.close();
|
|
9271
|
+
this.clusterManager.stop();
|
|
9272
|
+
this.started = false;
|
|
9273
|
+
this.emit("stopped");
|
|
9274
|
+
logger.info({ nodeId: this.config.cluster.nodeId }, "ClusterCoordinator stopped");
|
|
9275
|
+
}
|
|
9276
|
+
// ============================================
|
|
9277
|
+
// Cluster Information
|
|
9278
|
+
// ============================================
|
|
9279
|
+
/**
|
|
9280
|
+
* Get local node ID
|
|
9281
|
+
*/
|
|
9282
|
+
getNodeId() {
|
|
9283
|
+
return this.config.cluster.nodeId;
|
|
9284
|
+
}
|
|
9285
|
+
/**
|
|
9286
|
+
* Get cluster port
|
|
9287
|
+
*/
|
|
9288
|
+
getPort() {
|
|
9289
|
+
return this.actualPort;
|
|
9290
|
+
}
|
|
9291
|
+
/**
|
|
9292
|
+
* Get all cluster members
|
|
9293
|
+
*/
|
|
9294
|
+
getMembers() {
|
|
9295
|
+
return this.clusterManager.getMembers();
|
|
9296
|
+
}
|
|
9297
|
+
/**
|
|
9298
|
+
* Check if this is the local node
|
|
9299
|
+
*/
|
|
9300
|
+
isLocal(nodeId) {
|
|
9301
|
+
return this.clusterManager.isLocal(nodeId);
|
|
9302
|
+
}
|
|
9303
|
+
/**
|
|
9304
|
+
* Check if coordinator is started
|
|
9305
|
+
*/
|
|
9306
|
+
isStarted() {
|
|
9307
|
+
return this.started;
|
|
9308
|
+
}
|
|
9309
|
+
// ============================================
|
|
9310
|
+
// Partition Operations
|
|
9311
|
+
// ============================================
|
|
9312
|
+
/**
|
|
9313
|
+
* Get current partition map
|
|
9314
|
+
*/
|
|
9315
|
+
getPartitionMap() {
|
|
9316
|
+
return this.partitionService.getPartitionMap();
|
|
9317
|
+
}
|
|
9318
|
+
/**
|
|
9319
|
+
* Get partition map version
|
|
9320
|
+
*/
|
|
9321
|
+
getPartitionMapVersion() {
|
|
9322
|
+
return this.partitionService.getMapVersion();
|
|
9323
|
+
}
|
|
9324
|
+
/**
|
|
9325
|
+
* Get partition ID for a key
|
|
9326
|
+
*/
|
|
9327
|
+
getPartitionId(key) {
|
|
9328
|
+
return this.partitionService.getPartitionId(key);
|
|
9329
|
+
}
|
|
9330
|
+
/**
|
|
9331
|
+
* Get owner node for a key
|
|
9332
|
+
*/
|
|
9333
|
+
getOwner(key) {
|
|
9334
|
+
return this.partitionService.getOwner(key);
|
|
9335
|
+
}
|
|
9336
|
+
/**
|
|
9337
|
+
* Check if this node owns the key
|
|
9338
|
+
*/
|
|
9339
|
+
isLocalOwner(key) {
|
|
9340
|
+
return this.partitionService.isLocalOwner(key);
|
|
9341
|
+
}
|
|
9342
|
+
/**
|
|
9343
|
+
* Check if this node is a backup for the key
|
|
9344
|
+
*/
|
|
9345
|
+
isLocalBackup(key) {
|
|
9346
|
+
return this.partitionService.isLocalBackup(key);
|
|
9347
|
+
}
|
|
9348
|
+
/**
|
|
9349
|
+
* Get backup nodes for a partition
|
|
9350
|
+
*/
|
|
9351
|
+
getBackups(partitionId) {
|
|
9352
|
+
return this.partitionService.getBackups(partitionId);
|
|
9353
|
+
}
|
|
9354
|
+
/**
|
|
9355
|
+
* Check if partition is currently migrating
|
|
9356
|
+
*/
|
|
9357
|
+
isMigrating(partitionId) {
|
|
9358
|
+
return this.partitionService.isMigrating(partitionId);
|
|
9359
|
+
}
|
|
9360
|
+
/**
|
|
9361
|
+
* Check if any rebalancing is in progress
|
|
9362
|
+
*/
|
|
9363
|
+
isRebalancing() {
|
|
9364
|
+
return this.partitionService.isRebalancing();
|
|
9365
|
+
}
|
|
9366
|
+
// ============================================
|
|
9367
|
+
// Migration Operations
|
|
9368
|
+
// ============================================
|
|
9369
|
+
/**
|
|
9370
|
+
* Get migration status
|
|
9371
|
+
*/
|
|
9372
|
+
getMigrationStatus() {
|
|
9373
|
+
return this.partitionService.getMigrationStatus();
|
|
9374
|
+
}
|
|
9375
|
+
/**
|
|
9376
|
+
* Get migration metrics
|
|
9377
|
+
*/
|
|
9378
|
+
getMigrationMetrics() {
|
|
9379
|
+
return this.partitionService.getMigrationManager()?.getMetrics() ?? null;
|
|
9380
|
+
}
|
|
9381
|
+
/**
|
|
9382
|
+
* Cancel all active migrations
|
|
9383
|
+
*/
|
|
9384
|
+
async cancelMigrations() {
|
|
9385
|
+
await this.partitionService.cancelMigrations();
|
|
9386
|
+
}
|
|
9387
|
+
/**
|
|
9388
|
+
* Set data collector for migrations
|
|
9389
|
+
*/
|
|
9390
|
+
setDataCollector(collector) {
|
|
9391
|
+
const migrationManager = this.partitionService.getMigrationManager();
|
|
9392
|
+
if (migrationManager) {
|
|
9393
|
+
migrationManager.setDataCollector(collector);
|
|
9394
|
+
}
|
|
9395
|
+
}
|
|
9396
|
+
/**
|
|
9397
|
+
* Set data storer for incoming migrations
|
|
9398
|
+
*/
|
|
9399
|
+
setDataStorer(storer) {
|
|
9400
|
+
const migrationManager = this.partitionService.getMigrationManager();
|
|
9401
|
+
if (migrationManager) {
|
|
9402
|
+
migrationManager.setDataStorer(storer);
|
|
9403
|
+
}
|
|
9404
|
+
}
|
|
9405
|
+
// ============================================
|
|
9406
|
+
// Replication Operations
|
|
9407
|
+
// ============================================
|
|
9408
|
+
/**
|
|
9409
|
+
* Replicate an operation to backup nodes
|
|
9410
|
+
*/
|
|
9411
|
+
async replicate(operation, opId, key, options = {}) {
|
|
9412
|
+
if (!this.replicationPipeline) {
|
|
9413
|
+
return { success: true, ackedBy: [] };
|
|
9414
|
+
}
|
|
9415
|
+
return this.replicationPipeline.replicate(operation, opId, key, options);
|
|
9416
|
+
}
|
|
9417
|
+
/**
|
|
9418
|
+
* Get replication health status
|
|
9419
|
+
*/
|
|
9420
|
+
getReplicationHealth() {
|
|
9421
|
+
return this.lagTracker.getHealth();
|
|
9422
|
+
}
|
|
9423
|
+
/**
|
|
9424
|
+
* Get replication lag for a specific node
|
|
9425
|
+
*/
|
|
9426
|
+
getReplicationLag(nodeId) {
|
|
9427
|
+
return this.lagTracker.getLag(nodeId);
|
|
9428
|
+
}
|
|
9429
|
+
/**
|
|
9430
|
+
* Check if a node is healthy for replication
|
|
9431
|
+
*/
|
|
9432
|
+
isNodeHealthy(nodeId) {
|
|
9433
|
+
return this.lagTracker.isNodeHealthy(nodeId);
|
|
9434
|
+
}
|
|
9435
|
+
/**
|
|
9436
|
+
* Check if a node is laggy
|
|
9437
|
+
*/
|
|
9438
|
+
isNodeLaggy(nodeId) {
|
|
9439
|
+
return this.lagTracker.isNodeLaggy(nodeId);
|
|
9440
|
+
}
|
|
9441
|
+
// ============================================
|
|
9442
|
+
// Cluster Communication
|
|
9443
|
+
// ============================================
|
|
9444
|
+
/**
|
|
9445
|
+
* Send message to a specific node
|
|
9446
|
+
*/
|
|
9447
|
+
send(nodeId, message) {
|
|
9448
|
+
this.clusterManager.sendToNode(nodeId, message);
|
|
9449
|
+
}
|
|
9450
|
+
/**
|
|
9451
|
+
* Broadcast message to all nodes
|
|
9452
|
+
*/
|
|
9453
|
+
broadcast(message) {
|
|
9454
|
+
for (const nodeId of this.clusterManager.getMembers()) {
|
|
9455
|
+
if (!this.clusterManager.isLocal(nodeId)) {
|
|
9456
|
+
this.clusterManager.sendToNode(nodeId, message);
|
|
9457
|
+
}
|
|
9458
|
+
}
|
|
9459
|
+
}
|
|
9460
|
+
// ============================================
|
|
9461
|
+
// Component Access
|
|
9462
|
+
// ============================================
|
|
9463
|
+
/**
|
|
9464
|
+
* Get underlying ClusterManager
|
|
9465
|
+
*/
|
|
9466
|
+
getClusterManager() {
|
|
9467
|
+
return this.clusterManager;
|
|
9468
|
+
}
|
|
9469
|
+
/**
|
|
9470
|
+
* Get underlying PartitionService
|
|
9471
|
+
*/
|
|
9472
|
+
getPartitionService() {
|
|
9473
|
+
return this.partitionService;
|
|
9474
|
+
}
|
|
9475
|
+
/**
|
|
9476
|
+
* Get underlying ReplicationPipeline
|
|
9477
|
+
*/
|
|
9478
|
+
getReplicationPipeline() {
|
|
9479
|
+
return this.replicationPipeline;
|
|
9480
|
+
}
|
|
9481
|
+
/**
|
|
9482
|
+
* Get underlying LagTracker
|
|
9483
|
+
*/
|
|
9484
|
+
getLagTracker() {
|
|
9485
|
+
return this.lagTracker;
|
|
9486
|
+
}
|
|
9487
|
+
// ============================================
|
|
9488
|
+
// Metrics Export
|
|
9489
|
+
// ============================================
|
|
9490
|
+
/**
|
|
9491
|
+
* Get all metrics in Prometheus format
|
|
9492
|
+
*/
|
|
9493
|
+
getPrometheusMetrics() {
|
|
9494
|
+
const lines = [];
|
|
9495
|
+
lines.push("# HELP topgun_cluster_members Number of cluster members");
|
|
9496
|
+
lines.push("# TYPE topgun_cluster_members gauge");
|
|
9497
|
+
lines.push(`topgun_cluster_members ${this.clusterManager.getMembers().length}`);
|
|
9498
|
+
lines.push("");
|
|
9499
|
+
lines.push("# HELP topgun_cluster_started Cluster started status (1=started, 0=stopped)");
|
|
9500
|
+
lines.push("# TYPE topgun_cluster_started gauge");
|
|
9501
|
+
lines.push(`topgun_cluster_started ${this.started ? 1 : 0}`);
|
|
9502
|
+
lines.push("");
|
|
9503
|
+
lines.push("# HELP topgun_partition_map_version Current partition map version");
|
|
9504
|
+
lines.push("# TYPE topgun_partition_map_version gauge");
|
|
9505
|
+
lines.push(`topgun_partition_map_version ${this.partitionService.getMapVersion()}`);
|
|
9506
|
+
const migrationMetrics = this.getMigrationMetrics();
|
|
9507
|
+
if (migrationMetrics) {
|
|
9508
|
+
lines.push("");
|
|
9509
|
+
lines.push("# HELP topgun_migrations_started Total migrations started");
|
|
9510
|
+
lines.push("# TYPE topgun_migrations_started counter");
|
|
9511
|
+
lines.push(`topgun_migrations_started ${migrationMetrics.migrationsStarted}`);
|
|
9512
|
+
lines.push("");
|
|
9513
|
+
lines.push("# HELP topgun_migrations_completed Total migrations completed");
|
|
9514
|
+
lines.push("# TYPE topgun_migrations_completed counter");
|
|
9515
|
+
lines.push(`topgun_migrations_completed ${migrationMetrics.migrationsCompleted}`);
|
|
9516
|
+
lines.push("");
|
|
9517
|
+
lines.push("# HELP topgun_migrations_failed Total migrations failed");
|
|
9518
|
+
lines.push("# TYPE topgun_migrations_failed counter");
|
|
9519
|
+
lines.push(`topgun_migrations_failed ${migrationMetrics.migrationsFailed}`);
|
|
9520
|
+
lines.push("");
|
|
9521
|
+
lines.push("# HELP topgun_migrations_active Currently active migrations");
|
|
9522
|
+
lines.push("# TYPE topgun_migrations_active gauge");
|
|
9523
|
+
lines.push(`topgun_migrations_active ${migrationMetrics.activeMigrations}`);
|
|
9524
|
+
lines.push("");
|
|
9525
|
+
lines.push("# HELP topgun_migrations_queued Queued migrations");
|
|
9526
|
+
lines.push("# TYPE topgun_migrations_queued gauge");
|
|
9527
|
+
lines.push(`topgun_migrations_queued ${migrationMetrics.queuedMigrations}`);
|
|
9528
|
+
}
|
|
9529
|
+
lines.push("");
|
|
9530
|
+
lines.push(this.lagTracker.toPrometheusMetrics());
|
|
9531
|
+
return lines.join("\n");
|
|
9532
|
+
}
|
|
9533
|
+
// ============================================
|
|
9534
|
+
// Private Methods
|
|
9535
|
+
// ============================================
|
|
9536
|
+
setupEventHandlers() {
|
|
9537
|
+
this.clusterManager.on("memberJoined", (nodeId) => {
|
|
9538
|
+
logger.info({ nodeId }, "Cluster member joined");
|
|
9539
|
+
this.emit("member:joined", nodeId);
|
|
9540
|
+
});
|
|
9541
|
+
this.clusterManager.on("memberLeft", (nodeId) => {
|
|
9542
|
+
logger.info({ nodeId }, "Cluster member left");
|
|
9543
|
+
this.lagTracker.removeNode(nodeId);
|
|
9544
|
+
this.emit("member:left", nodeId);
|
|
9545
|
+
});
|
|
9546
|
+
this.partitionService.on("rebalanced", (map, changes) => {
|
|
9547
|
+
logger.info({ version: map.version, changesCount: changes.length }, "Partition map rebalanced");
|
|
9548
|
+
this.emit("partition:rebalanced", map, changes);
|
|
9549
|
+
});
|
|
9550
|
+
this.partitionService.on("partitionMoved", (info) => {
|
|
9551
|
+
this.emit("partition:moved", info);
|
|
9552
|
+
});
|
|
9553
|
+
const migrationManager = this.partitionService.getMigrationManager();
|
|
9554
|
+
if (migrationManager) {
|
|
9555
|
+
migrationManager.on("migrationStarted", (partitionId, targetNode) => {
|
|
9556
|
+
this.emit("migration:started", partitionId, targetNode);
|
|
9557
|
+
});
|
|
9558
|
+
migrationManager.on("migrationComplete", (partitionId) => {
|
|
9559
|
+
this.emit("migration:completed", partitionId);
|
|
9560
|
+
});
|
|
9561
|
+
migrationManager.on("migrationFailed", (partitionId, error) => {
|
|
9562
|
+
this.emit("migration:failed", partitionId, error);
|
|
9563
|
+
});
|
|
9564
|
+
}
|
|
9565
|
+
if (this.replicationPipeline) {
|
|
9566
|
+
this.replicationPipeline.on("ackReceived", (nodeId) => {
|
|
9567
|
+
this.lagTracker.recordAck(nodeId);
|
|
9568
|
+
});
|
|
9569
|
+
this.replicationPipeline.on("replicationSent", (nodeId) => {
|
|
9570
|
+
this.lagTracker.incrementPending(nodeId);
|
|
9571
|
+
});
|
|
9572
|
+
}
|
|
9573
|
+
}
|
|
9574
|
+
};
|
|
7277
9575
|
export {
|
|
7278
9576
|
BufferPool,
|
|
9577
|
+
ClusterCoordinator,
|
|
9578
|
+
ClusterManager,
|
|
7279
9579
|
ConnectionRateLimiter,
|
|
9580
|
+
DEFAULT_CLUSTER_COORDINATOR_CONFIG,
|
|
9581
|
+
DEFAULT_LAG_TRACKER_CONFIG,
|
|
7280
9582
|
FilterTasklet,
|
|
7281
9583
|
ForEachTasklet,
|
|
7282
9584
|
IteratorTasklet,
|
|
9585
|
+
LagTracker,
|
|
9586
|
+
LockManager,
|
|
7283
9587
|
MapTasklet,
|
|
7284
9588
|
MemoryServerAdapter,
|
|
9589
|
+
MigrationManager,
|
|
7285
9590
|
ObjectPool,
|
|
9591
|
+
PartitionService,
|
|
7286
9592
|
PostgresAdapter,
|
|
7287
9593
|
RateLimitInterceptor,
|
|
7288
9594
|
ReduceTasklet,
|
|
9595
|
+
ReplicationPipeline,
|
|
7289
9596
|
SecurityManager,
|
|
7290
9597
|
ServerCoordinator,
|
|
7291
9598
|
TaskletScheduler,
|