@topgunbuild/server 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -10,7 +10,7 @@ import { createServer as createHttpServer } from "http";
10
10
  import { createServer as createHttpsServer } from "https";
11
11
  import { readFileSync as readFileSync2 } from "fs";
12
12
  import { WebSocketServer as WebSocketServer2, WebSocket as WebSocket3 } from "ws";
13
- import { HLC, LWWMap as LWWMap2, ORMap as ORMap2, serialize as serialize3, deserialize, MessageSchema, WriteConcern as WriteConcern2 } from "@topgunbuild/core";
13
+ import { HLC, LWWMap as LWWMap2, ORMap as ORMap2, serialize as serialize4, deserialize, MessageSchema, WriteConcern as WriteConcern2, ConsistencyLevel as ConsistencyLevel2, DEFAULT_REPLICATION_CONFIG as DEFAULT_REPLICATION_CONFIG2 } from "@topgunbuild/core";
14
14
  import * as jwt from "jsonwebtoken";
15
15
  import * as crypto from "crypto";
16
16
 
@@ -570,11 +570,268 @@ var TopicManager = class {
570
570
 
571
571
  // src/cluster/ClusterManager.ts
572
572
  import { WebSocket, WebSocketServer } from "ws";
573
- import { EventEmitter } from "events";
573
+ import { EventEmitter as EventEmitter2 } from "events";
574
574
  import * as dns from "dns";
575
575
  import { readFileSync } from "fs";
576
576
  import * as https from "https";
577
- var ClusterManager = class extends EventEmitter {
577
+
578
+ // src/cluster/FailureDetector.ts
579
+ import { EventEmitter } from "events";
580
+ var DEFAULT_FAILURE_DETECTOR_CONFIG = {
581
+ heartbeatIntervalMs: 1e3,
582
+ suspicionTimeoutMs: 5e3,
583
+ confirmationTimeoutMs: 1e4,
584
+ phiThreshold: 8,
585
+ minSamples: 10,
586
+ maxSamples: 100,
587
+ initialHeartbeatIntervalMs: 1e3
588
+ };
589
+ var FailureDetector = class extends EventEmitter {
590
+ constructor(config = {}) {
591
+ super();
592
+ this.nodeStates = /* @__PURE__ */ new Map();
593
+ this.monitoringNodes = /* @__PURE__ */ new Set();
594
+ this.confirmationTimers = /* @__PURE__ */ new Map();
595
+ this.started = false;
596
+ this.config = { ...DEFAULT_FAILURE_DETECTOR_CONFIG, ...config };
597
+ }
598
+ /**
599
+ * Start the failure detector monitoring loop.
600
+ */
601
+ start() {
602
+ if (this.started) return;
603
+ this.started = true;
604
+ this.checkTimer = setInterval(() => {
605
+ this.checkAllNodes();
606
+ }, this.config.heartbeatIntervalMs);
607
+ logger.info({ config: this.config }, "FailureDetector started");
608
+ }
609
+ /**
610
+ * Stop the failure detector and clean up.
611
+ */
612
+ stop() {
613
+ if (!this.started) return;
614
+ this.started = false;
615
+ if (this.checkTimer) {
616
+ clearInterval(this.checkTimer);
617
+ this.checkTimer = void 0;
618
+ }
619
+ for (const timer of this.confirmationTimers.values()) {
620
+ clearTimeout(timer);
621
+ }
622
+ this.confirmationTimers.clear();
623
+ logger.info("FailureDetector stopped");
624
+ }
625
+ /**
626
+ * Start monitoring a node.
627
+ */
628
+ startMonitoring(nodeId) {
629
+ if (this.monitoringNodes.has(nodeId)) return;
630
+ this.monitoringNodes.add(nodeId);
631
+ this.nodeStates.set(nodeId, {
632
+ lastHeartbeat: Date.now(),
633
+ intervalHistory: [],
634
+ isSuspected: false,
635
+ isConfirmedFailed: false
636
+ });
637
+ logger.debug({ nodeId }, "Started monitoring node");
638
+ }
639
+ /**
640
+ * Stop monitoring a node.
641
+ */
642
+ stopMonitoring(nodeId) {
643
+ this.monitoringNodes.delete(nodeId);
644
+ this.nodeStates.delete(nodeId);
645
+ const timer = this.confirmationTimers.get(nodeId);
646
+ if (timer) {
647
+ clearTimeout(timer);
648
+ this.confirmationTimers.delete(nodeId);
649
+ }
650
+ logger.debug({ nodeId }, "Stopped monitoring node");
651
+ }
652
+ /**
653
+ * Record a heartbeat from a node.
654
+ * This updates the node's state and clears any suspicion.
655
+ */
656
+ recordHeartbeat(nodeId) {
657
+ const state = this.nodeStates.get(nodeId);
658
+ if (!state) {
659
+ this.startMonitoring(nodeId);
660
+ return;
661
+ }
662
+ const now = Date.now();
663
+ const interval = now - state.lastHeartbeat;
664
+ state.intervalHistory.push(interval);
665
+ if (state.intervalHistory.length > this.config.maxSamples) {
666
+ state.intervalHistory.shift();
667
+ }
668
+ state.lastHeartbeat = now;
669
+ if (state.isSuspected) {
670
+ state.isSuspected = false;
671
+ state.suspicionStartTime = void 0;
672
+ state.isConfirmedFailed = false;
673
+ const timer = this.confirmationTimers.get(nodeId);
674
+ if (timer) {
675
+ clearTimeout(timer);
676
+ this.confirmationTimers.delete(nodeId);
677
+ }
678
+ this.emit("nodeRecovered", { nodeId });
679
+ logger.info({ nodeId }, "Node recovered");
680
+ }
681
+ }
682
+ /**
683
+ * Check all monitored nodes for failure.
684
+ */
685
+ checkAllNodes() {
686
+ for (const nodeId of this.monitoringNodes) {
687
+ const phi = this.calculatePhi(nodeId);
688
+ const state = this.nodeStates.get(nodeId);
689
+ if (!state) continue;
690
+ if (phi > this.config.phiThreshold) {
691
+ if (!state.isSuspected) {
692
+ state.isSuspected = true;
693
+ state.suspicionStartTime = Date.now();
694
+ this.emit("nodeSuspected", { nodeId, phi });
695
+ logger.warn({ nodeId, phi }, "Node suspected");
696
+ this.scheduleConfirmation(nodeId);
697
+ }
698
+ }
699
+ }
700
+ }
701
+ /**
702
+ * Schedule failure confirmation after suspicion timeout.
703
+ */
704
+ scheduleConfirmation(nodeId) {
705
+ const existingTimer = this.confirmationTimers.get(nodeId);
706
+ if (existingTimer) {
707
+ clearTimeout(existingTimer);
708
+ }
709
+ const timer = setTimeout(() => {
710
+ this.confirmFailure(nodeId);
711
+ }, this.config.confirmationTimeoutMs);
712
+ this.confirmationTimers.set(nodeId, timer);
713
+ }
714
+ /**
715
+ * Confirm node failure after confirmation timeout.
716
+ */
717
+ confirmFailure(nodeId) {
718
+ const state = this.nodeStates.get(nodeId);
719
+ if (!state) return;
720
+ if (state.isSuspected && !state.isConfirmedFailed) {
721
+ state.isConfirmedFailed = true;
722
+ this.emit("nodeConfirmedFailed", { nodeId });
723
+ logger.error({ nodeId }, "Node failure confirmed");
724
+ }
725
+ this.confirmationTimers.delete(nodeId);
726
+ }
727
+ /**
728
+ * Calculate the phi value for a node using the Phi Accrual algorithm.
729
+ *
730
+ * Phi = -log10(P_later(t_now - t_last))
731
+ *
732
+ * where P_later is the probability that a heartbeat will arrive later than expected.
733
+ */
734
+ calculatePhi(nodeId) {
735
+ const state = this.nodeStates.get(nodeId);
736
+ if (!state) return 0;
737
+ const now = Date.now();
738
+ const timeSinceLastHeartbeat = now - state.lastHeartbeat;
739
+ if (state.intervalHistory.length < this.config.minSamples) {
740
+ const expectedInterval = this.config.initialHeartbeatIntervalMs;
741
+ return timeSinceLastHeartbeat / expectedInterval;
742
+ }
743
+ const mean = this.calculateMean(state.intervalHistory);
744
+ const variance = this.calculateVariance(state.intervalHistory, mean);
745
+ const stdDev = Math.sqrt(variance);
746
+ if (timeSinceLastHeartbeat <= mean) {
747
+ return 0;
748
+ }
749
+ const deviations = stdDev > 0 ? (timeSinceLastHeartbeat - mean) / stdDev : 0;
750
+ const phi = Math.max(0, deviations);
751
+ return phi;
752
+ }
753
+ /**
754
+ * Calculate mean of an array of numbers.
755
+ */
756
+ calculateMean(values) {
757
+ if (values.length === 0) return 0;
758
+ return values.reduce((sum, v) => sum + v, 0) / values.length;
759
+ }
760
+ /**
761
+ * Calculate variance of an array of numbers.
762
+ */
763
+ calculateVariance(values, mean) {
764
+ if (values.length < 2) return 0;
765
+ return values.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / values.length;
766
+ }
767
+ /**
768
+ * Get list of currently suspected nodes.
769
+ */
770
+ getSuspectedNodes() {
771
+ const suspected = [];
772
+ for (const [nodeId, state] of this.nodeStates) {
773
+ if (state.isSuspected) {
774
+ suspected.push(nodeId);
775
+ }
776
+ }
777
+ return suspected;
778
+ }
779
+ /**
780
+ * Get list of confirmed failed nodes.
781
+ */
782
+ getConfirmedFailedNodes() {
783
+ const failed = [];
784
+ for (const [nodeId, state] of this.nodeStates) {
785
+ if (state.isConfirmedFailed) {
786
+ failed.push(nodeId);
787
+ }
788
+ }
789
+ return failed;
790
+ }
791
+ /**
792
+ * Check if a specific node is suspected.
793
+ */
794
+ isSuspected(nodeId) {
795
+ return this.nodeStates.get(nodeId)?.isSuspected ?? false;
796
+ }
797
+ /**
798
+ * Check if a specific node's failure is confirmed.
799
+ */
800
+ isConfirmedFailed(nodeId) {
801
+ return this.nodeStates.get(nodeId)?.isConfirmedFailed ?? false;
802
+ }
803
+ /**
804
+ * Get the current phi value for a node.
805
+ */
806
+ getPhi(nodeId) {
807
+ return this.calculatePhi(nodeId);
808
+ }
809
+ /**
810
+ * Get all monitored nodes.
811
+ */
812
+ getMonitoredNodes() {
813
+ return Array.from(this.monitoringNodes);
814
+ }
815
+ /**
816
+ * Get metrics for monitoring.
817
+ */
818
+ getMetrics() {
819
+ let suspectedCount = 0;
820
+ let confirmedCount = 0;
821
+ for (const state of this.nodeStates.values()) {
822
+ if (state.isSuspected) suspectedCount++;
823
+ if (state.isConfirmedFailed) confirmedCount++;
824
+ }
825
+ return {
826
+ monitoredNodes: this.monitoringNodes.size,
827
+ suspectedNodes: suspectedCount,
828
+ confirmedFailedNodes: confirmedCount
829
+ };
830
+ }
831
+ };
832
+
833
+ // src/cluster/ClusterManager.ts
834
+ var ClusterManager = class extends EventEmitter2 {
578
835
  constructor(config) {
579
836
  super();
580
837
  this.members = /* @__PURE__ */ new Map();
@@ -582,6 +839,30 @@ var ClusterManager = class extends EventEmitter {
582
839
  this.reconnectIntervals = /* @__PURE__ */ new Map();
583
840
  this._actualPort = 0;
584
841
  this.config = config;
842
+ this.failureDetector = new FailureDetector({
843
+ ...DEFAULT_FAILURE_DETECTOR_CONFIG,
844
+ heartbeatIntervalMs: config.heartbeatIntervalMs ?? 1e3,
845
+ ...config.failureDetection
846
+ });
847
+ this.failureDetector.on("nodeSuspected", (event) => {
848
+ logger.warn({ nodeId: event.nodeId, phi: event.phi }, "Node suspected (failure detector)");
849
+ this.emit("nodeSuspected", event.nodeId, event.phi);
850
+ });
851
+ this.failureDetector.on("nodeRecovered", (event) => {
852
+ logger.info({ nodeId: event.nodeId }, "Node recovered (failure detector)");
853
+ this.emit("nodeRecovered", event.nodeId);
854
+ });
855
+ this.failureDetector.on("nodeConfirmedFailed", (event) => {
856
+ logger.error({ nodeId: event.nodeId }, "Node failure confirmed");
857
+ this.emit("nodeConfirmedFailed", event.nodeId);
858
+ this.handleNodeFailure(event.nodeId);
859
+ });
860
+ }
861
+ /**
862
+ * Get the failure detector instance.
863
+ */
864
+ getFailureDetector() {
865
+ return this.failureDetector;
585
866
  }
586
867
  /** Get the actual port the cluster is listening on */
587
868
  get port() {
@@ -633,6 +914,8 @@ var ClusterManager = class extends EventEmitter {
633
914
  }
634
915
  stop() {
635
916
  logger.info({ port: this.config.port }, "Stopping Cluster Manager");
917
+ this.stopHeartbeat();
918
+ this.failureDetector.stop();
636
919
  for (const timeout of this.reconnectIntervals.values()) {
637
920
  clearTimeout(timeout);
638
921
  }
@@ -652,6 +935,61 @@ var ClusterManager = class extends EventEmitter {
652
935
  this.server.close();
653
936
  }
654
937
  }
938
+ /**
939
+ * Start sending heartbeats to all peers.
940
+ */
941
+ startHeartbeat() {
942
+ if (this.heartbeatTimer) return;
943
+ const intervalMs = this.config.heartbeatIntervalMs ?? 1e3;
944
+ this.heartbeatTimer = setInterval(() => {
945
+ this.sendHeartbeatToAll();
946
+ }, intervalMs);
947
+ this.failureDetector.start();
948
+ logger.debug({ intervalMs }, "Heartbeat started");
949
+ }
950
+ /**
951
+ * Stop sending heartbeats.
952
+ */
953
+ stopHeartbeat() {
954
+ if (this.heartbeatTimer) {
955
+ clearInterval(this.heartbeatTimer);
956
+ this.heartbeatTimer = void 0;
957
+ }
958
+ }
959
+ /**
960
+ * Send heartbeat to all connected peers.
961
+ */
962
+ sendHeartbeatToAll() {
963
+ for (const [nodeId, member] of this.members) {
964
+ if (member.isSelf) continue;
965
+ if (member.socket && member.socket.readyState === WebSocket.OPEN) {
966
+ this.send(nodeId, "HEARTBEAT", { timestamp: Date.now() });
967
+ }
968
+ }
969
+ }
970
+ /**
971
+ * Handle incoming heartbeat from a peer.
972
+ */
973
+ handleHeartbeat(senderId, _payload) {
974
+ this.failureDetector.recordHeartbeat(senderId);
975
+ }
976
+ /**
977
+ * Handle confirmed node failure.
978
+ */
979
+ handleNodeFailure(nodeId) {
980
+ const member = this.members.get(nodeId);
981
+ if (!member) return;
982
+ logger.warn({ nodeId }, "Removing failed node from cluster");
983
+ if (member.socket && member.socket.readyState !== WebSocket.CLOSED) {
984
+ try {
985
+ member.socket.terminate();
986
+ } catch (e) {
987
+ }
988
+ }
989
+ this.members.delete(nodeId);
990
+ this.failureDetector.stopMonitoring(nodeId);
991
+ this.emit("memberLeft", nodeId);
992
+ }
655
993
  connectToPeers() {
656
994
  for (const peer of this.config.peers) {
657
995
  this.connectToPeer(peer);
@@ -770,7 +1108,13 @@ var ClusterManager = class extends EventEmitter {
770
1108
  socket: ws,
771
1109
  isSelf: false
772
1110
  });
1111
+ this.failureDetector.startMonitoring(remoteNodeId);
1112
+ this.startHeartbeat();
773
1113
  this.emit("memberJoined", remoteNodeId);
1114
+ } else if (msg.type === "HEARTBEAT") {
1115
+ if (remoteNodeId) {
1116
+ this.handleHeartbeat(remoteNodeId, msg.payload);
1117
+ }
774
1118
  } else {
775
1119
  this.emit("message", msg);
776
1120
  }
@@ -784,6 +1128,7 @@ var ClusterManager = class extends EventEmitter {
784
1128
  if (current && current.socket === ws) {
785
1129
  logger.info({ nodeId: remoteNodeId }, "Peer disconnected");
786
1130
  this.members.delete(remoteNodeId);
1131
+ this.failureDetector.stopMonitoring(remoteNodeId);
787
1132
  this.emit("memberLeft", remoteNodeId);
788
1133
  if (initiated && peerAddress) {
789
1134
  this.scheduleReconnect(peerAddress, 0);
@@ -837,526 +1182,1345 @@ var ClusterManager = class extends EventEmitter {
837
1182
  };
838
1183
 
839
1184
  // src/cluster/PartitionService.ts
840
- import { hashString } from "@topgunbuild/core";
841
- var PartitionService = class {
842
- // Standard Hazelcast default
843
- constructor(cluster) {
844
- // partitionId -> { owner, backups }
845
- this.partitions = /* @__PURE__ */ new Map();
846
- this.PARTITION_COUNT = 271;
847
- this.BACKUP_COUNT = 1;
848
- this.cluster = cluster;
849
- this.cluster.on("memberJoined", () => this.rebalance());
850
- this.cluster.on("memberLeft", () => this.rebalance());
851
- this.rebalance();
852
- }
853
- getPartitionId(key) {
854
- return Math.abs(hashString(key)) % this.PARTITION_COUNT;
855
- }
856
- getDistribution(key) {
857
- const pId = this.getPartitionId(key);
858
- return this.partitions.get(pId) || {
859
- owner: this.cluster.config.nodeId,
860
- backups: []
1185
+ import { EventEmitter as EventEmitter4 } from "events";
1186
+
1187
+ // src/cluster/MigrationManager.ts
1188
+ import { EventEmitter as EventEmitter3 } from "events";
1189
+ import {
1190
+ PartitionState,
1191
+ DEFAULT_MIGRATION_CONFIG
1192
+ } from "@topgunbuild/core";
1193
+ import { xxhash64AsNumber, createXxHash64State } from "@topgunbuild/native";
1194
+ var MigrationManager = class extends EventEmitter3 {
1195
+ constructor(clusterManager, partitionService, config = {}) {
1196
+ super();
1197
+ // Active outgoing migrations (this node is source)
1198
+ this.activeMigrations = /* @__PURE__ */ new Map();
1199
+ // Queue of migrations to process
1200
+ this.migrationQueue = [];
1201
+ // Incoming migrations (this node is target)
1202
+ this.incomingMigrations = /* @__PURE__ */ new Map();
1203
+ // Pending chunk acknowledgments
1204
+ this.pendingChunkAcks = /* @__PURE__ */ new Map();
1205
+ // Pending verification results
1206
+ this.pendingVerifications = /* @__PURE__ */ new Map();
1207
+ // Metrics tracking
1208
+ this.metrics = {
1209
+ migrationsStarted: 0,
1210
+ migrationsCompleted: 0,
1211
+ migrationsFailed: 0,
1212
+ chunksTransferred: 0,
1213
+ bytesTransferred: 0,
1214
+ activeMigrations: 0,
1215
+ queuedMigrations: 0
861
1216
  };
1217
+ // Batch processing timer
1218
+ this.batchTimer = null;
1219
+ // Data collection callback (injected from ServerCoordinator)
1220
+ this.dataCollector = null;
1221
+ // Data storage callback (injected from ServerCoordinator)
1222
+ this.dataStorer = null;
1223
+ this.clusterManager = clusterManager;
1224
+ this.partitionService = partitionService;
1225
+ this.config = {
1226
+ ...DEFAULT_MIGRATION_CONFIG,
1227
+ ...config
1228
+ };
1229
+ this.setupMessageHandlers();
1230
+ }
1231
+ // ============================================
1232
+ // Configuration
1233
+ // ============================================
1234
+ /**
1235
+ * Set the data collector callback
1236
+ * Called to collect all records for a partition before migration
1237
+ */
1238
+ setDataCollector(collector) {
1239
+ this.dataCollector = collector;
1240
+ }
1241
+ /**
1242
+ * Set the data storer callback
1243
+ * Called to store received records after successful migration
1244
+ */
1245
+ setDataStorer(storer) {
1246
+ this.dataStorer = storer;
1247
+ }
1248
+ // ============================================
1249
+ // Migration Planning
1250
+ // ============================================
1251
+ /**
1252
+ * Plan migration for topology change
1253
+ */
1254
+ planMigration(oldDistribution, newDistribution) {
1255
+ const migrations = [];
1256
+ for (const [partitionId, newDist] of newDistribution) {
1257
+ const oldDist = oldDistribution.get(partitionId);
1258
+ const oldOwner = oldDist?.owner ?? this.clusterManager.config.nodeId;
1259
+ const newOwner = newDist.owner;
1260
+ if (oldOwner !== newOwner && oldOwner === this.clusterManager.config.nodeId) {
1261
+ migrations.push({
1262
+ partitionId,
1263
+ state: PartitionState.STABLE,
1264
+ sourceNode: oldOwner,
1265
+ targetNode: newOwner,
1266
+ startTime: 0,
1267
+ bytesTransferred: 0,
1268
+ totalBytes: 0,
1269
+ retryCount: 0
1270
+ });
1271
+ }
1272
+ }
1273
+ migrations.sort((a, b) => a.partitionId - b.partitionId);
1274
+ this.migrationQueue = migrations;
1275
+ this.metrics.queuedMigrations = migrations.length;
1276
+ logger.info({ total: migrations.length }, "Migration planned");
1277
+ this.emit("migrationPlanned", { total: migrations.length });
1278
+ if (migrations.length > 0) {
1279
+ this.startBatchProcessing();
1280
+ }
862
1281
  }
863
- getOwner(key) {
864
- return this.getDistribution(key).owner;
865
- }
866
- isLocalOwner(key) {
867
- return this.getOwner(key) === this.cluster.config.nodeId;
1282
+ /**
1283
+ * Start batch processing timer
1284
+ */
1285
+ startBatchProcessing() {
1286
+ if (this.batchTimer) return;
1287
+ this.startNextBatch().catch((err) => {
1288
+ logger.error({ error: err }, "Failed to start first migration batch");
1289
+ this.emit("error", err);
1290
+ });
1291
+ this.batchTimer = setInterval(() => {
1292
+ this.startNextBatch().catch((err) => {
1293
+ logger.error({ error: err }, "Failed to start migration batch");
1294
+ this.emit("error", err);
1295
+ });
1296
+ }, this.config.batchIntervalMs);
868
1297
  }
869
- isLocalBackup(key) {
870
- const dist = this.getDistribution(key);
871
- return dist.backups.includes(this.cluster.config.nodeId);
1298
+ /**
1299
+ * Stop batch processing
1300
+ */
1301
+ stopBatchProcessing() {
1302
+ if (this.batchTimer) {
1303
+ clearInterval(this.batchTimer);
1304
+ this.batchTimer = null;
1305
+ }
872
1306
  }
873
- isRelated(key) {
874
- return this.isLocalOwner(key) || this.isLocalBackup(key);
1307
+ /**
1308
+ * Start next batch of migrations
1309
+ */
1310
+ async startNextBatch() {
1311
+ if (this.activeMigrations.size >= this.config.parallelTransfers) {
1312
+ return;
1313
+ }
1314
+ const slotsAvailable = this.config.parallelTransfers - this.activeMigrations.size;
1315
+ const batch = this.migrationQueue.splice(0, Math.min(slotsAvailable, this.config.batchSize));
1316
+ if (batch.length === 0) {
1317
+ if (this.migrationQueue.length === 0 && this.activeMigrations.size === 0) {
1318
+ this.stopBatchProcessing();
1319
+ }
1320
+ return;
1321
+ }
1322
+ for (const migration of batch) {
1323
+ migration.state = PartitionState.MIGRATING;
1324
+ migration.startTime = Date.now();
1325
+ this.activeMigrations.set(migration.partitionId, migration);
1326
+ this.metrics.migrationsStarted++;
1327
+ this.metrics.activeMigrations = this.activeMigrations.size;
1328
+ this.metrics.queuedMigrations = this.migrationQueue.length;
1329
+ this.startPartitionMigration(migration).catch((error) => {
1330
+ this.onMigrationFailed(migration.partitionId, error);
1331
+ });
1332
+ }
1333
+ logger.info({ count: batch.length, remaining: this.migrationQueue.length }, "Batch started");
1334
+ this.emit("batchStarted", { count: batch.length, remaining: this.migrationQueue.length });
875
1335
  }
876
- rebalance() {
877
- let allMembers = this.cluster.getMembers().sort();
878
- if (allMembers.length === 0) {
879
- allMembers = [this.cluster.config.nodeId];
1336
+ // ============================================
1337
+ // Migration Execution
1338
+ // ============================================
1339
+ /**
1340
+ * Start migration for a single partition
1341
+ */
1342
+ async startPartitionMigration(migration) {
1343
+ const { partitionId, targetNode } = migration;
1344
+ logger.info({ partitionId, targetNode }, "Starting partition migration");
1345
+ let records;
1346
+ if (this.dataCollector) {
1347
+ records = await this.dataCollector(partitionId);
1348
+ } else {
1349
+ records = [];
880
1350
  }
881
- logger.info({ memberCount: allMembers.length, members: allMembers }, "Rebalancing partitions");
882
- for (let i = 0; i < this.PARTITION_COUNT; i++) {
883
- const ownerIndex = i % allMembers.length;
884
- const owner = allMembers[ownerIndex];
885
- const backups = [];
886
- if (allMembers.length > 1) {
887
- for (let b = 1; b <= this.BACKUP_COUNT; b++) {
888
- const backupIndex = (ownerIndex + b) % allMembers.length;
889
- backups.push(allMembers[backupIndex]);
1351
+ migration.totalBytes = records.reduce((sum, r) => sum + r.length, 0);
1352
+ this.clusterManager.send(targetNode, "OP_FORWARD", {
1353
+ _migration: {
1354
+ type: "MIGRATION_START",
1355
+ payload: {
1356
+ partitionId,
1357
+ sourceNode: this.clusterManager.config.nodeId,
1358
+ estimatedSize: migration.totalBytes
890
1359
  }
891
1360
  }
892
- this.partitions.set(i, { owner, backups });
1361
+ });
1362
+ const chunks = this.chunkify(records);
1363
+ for (let i = 0; i < chunks.length; i++) {
1364
+ const chunk = chunks[i];
1365
+ const checksum = this.calculateChecksum(chunk);
1366
+ this.clusterManager.send(targetNode, "OP_FORWARD", {
1367
+ _migration: {
1368
+ type: "MIGRATION_CHUNK",
1369
+ payload: {
1370
+ partitionId,
1371
+ chunkIndex: i,
1372
+ totalChunks: chunks.length,
1373
+ data: Array.from(chunk),
1374
+ // Convert Uint8Array to array for JSON serialization
1375
+ checksum
1376
+ }
1377
+ }
1378
+ });
1379
+ await this.waitForChunkAck(partitionId, i);
1380
+ migration.bytesTransferred += chunk.length;
1381
+ this.metrics.chunksTransferred++;
1382
+ this.metrics.bytesTransferred += chunk.length;
1383
+ this.emit("migrationProgress", migration);
1384
+ }
1385
+ const fullChecksum = this.calculatePartitionChecksum(records);
1386
+ migration.state = PartitionState.SYNC;
1387
+ this.clusterManager.send(targetNode, "OP_FORWARD", {
1388
+ _migration: {
1389
+ type: "MIGRATION_COMPLETE",
1390
+ payload: {
1391
+ partitionId,
1392
+ totalRecords: records.length,
1393
+ checksum: fullChecksum
1394
+ }
1395
+ }
1396
+ });
1397
+ const verified = await this.waitForVerification(partitionId);
1398
+ if (verified) {
1399
+ await this.onMigrationComplete(partitionId);
1400
+ } else {
1401
+ throw new Error(`Migration verification failed for partition ${partitionId}`);
893
1402
  }
894
1403
  }
895
- };
896
-
897
- // src/cluster/LockManager.ts
898
- import { EventEmitter as EventEmitter2 } from "events";
899
- var _LockManager = class _LockManager extends EventEmitter2 {
900
- // 5 minutes
901
- constructor() {
902
- super();
903
- this.locks = /* @__PURE__ */ new Map();
904
- this.checkInterval = setInterval(() => this.cleanupExpiredLocks(), 1e3);
905
- }
906
- stop() {
907
- clearInterval(this.checkInterval);
908
- }
909
- acquire(name, clientId, requestId, ttl) {
910
- const safeTtl = Math.max(_LockManager.MIN_TTL, Math.min(ttl || _LockManager.MIN_TTL, _LockManager.MAX_TTL));
911
- let lock = this.locks.get(name);
912
- if (!lock) {
913
- lock = {
914
- name,
915
- owner: "",
916
- fencingToken: 0,
917
- expiry: 0,
918
- queue: []
919
- };
920
- this.locks.set(name, lock);
1404
+ /**
1405
+ * Split records into chunks
1406
+ */
1407
+ chunkify(records) {
1408
+ const chunks = [];
1409
+ let currentChunk = [];
1410
+ let currentSize = 0;
1411
+ for (const record of records) {
1412
+ const lengthPrefix = new Uint8Array(4);
1413
+ new DataView(lengthPrefix.buffer).setUint32(0, record.length, true);
1414
+ currentChunk.push(...lengthPrefix, ...record);
1415
+ currentSize += 4 + record.length;
1416
+ if (currentSize >= this.config.transferChunkSize) {
1417
+ chunks.push(new Uint8Array(currentChunk));
1418
+ currentChunk = [];
1419
+ currentSize = 0;
1420
+ }
921
1421
  }
922
- const now = Date.now();
923
- if (!lock.owner || lock.expiry < now) {
924
- this.grantLock(lock, clientId, safeTtl);
925
- return { granted: true, fencingToken: lock.fencingToken };
1422
+ if (currentChunk.length > 0) {
1423
+ chunks.push(new Uint8Array(currentChunk));
926
1424
  }
927
- if (lock.owner === clientId) {
928
- lock.expiry = Math.max(lock.expiry, now + safeTtl);
929
- logger.info({ name, clientId, fencingToken: lock.fencingToken }, "Lock lease extended");
930
- return { granted: true, fencingToken: lock.fencingToken };
1425
+ if (chunks.length === 0) {
1426
+ chunks.push(new Uint8Array(0));
931
1427
  }
932
- lock.queue.push({ clientId, requestId, ttl: safeTtl, timestamp: now });
933
- logger.info({ name, clientId, queueLength: lock.queue.length }, "Lock queued");
934
- return { granted: false };
1428
+ return chunks;
935
1429
  }
936
- release(name, clientId, fencingToken) {
937
- const lock = this.locks.get(name);
938
- if (!lock) return false;
939
- if (lock.owner !== clientId) {
940
- logger.warn({ name, clientId, owner: lock.owner }, "Release failed: Not owner");
941
- return false;
942
- }
943
- if (lock.fencingToken !== fencingToken) {
944
- logger.warn({ name, clientId, sentToken: fencingToken, actualToken: lock.fencingToken }, "Release failed: Token mismatch");
945
- return false;
946
- }
947
- this.processNext(lock);
948
- return true;
1430
+ /**
1431
+ * Calculate checksum for a chunk using native xxhash
1432
+ */
1433
+ calculateChecksum(data) {
1434
+ return String(xxhash64AsNumber(data));
949
1435
  }
950
- handleClientDisconnect(clientId) {
951
- for (const lock of this.locks.values()) {
952
- if (lock.owner === clientId) {
953
- logger.info({ name: lock.name, clientId }, "Releasing lock due to disconnect");
954
- this.processNext(lock);
955
- } else {
956
- const initialLen = lock.queue.length;
957
- lock.queue = lock.queue.filter((req) => req.clientId !== clientId);
958
- if (lock.queue.length < initialLen) {
959
- logger.info({ name: lock.name, clientId }, "Removed from lock queue due to disconnect");
960
- }
961
- }
1436
+ /**
1437
+ * Calculate checksum for all partition records using streaming xxhash
1438
+ */
1439
+ calculatePartitionChecksum(records) {
1440
+ const state = createXxHash64State();
1441
+ for (const record of records) {
1442
+ state.update(record);
962
1443
  }
1444
+ return String(state.digestAsNumber());
963
1445
  }
964
- grantLock(lock, clientId, ttl) {
965
- lock.owner = clientId;
966
- lock.expiry = Date.now() + ttl;
967
- lock.fencingToken++;
968
- logger.info({ name: lock.name, clientId, fencingToken: lock.fencingToken }, "Lock granted");
1446
+ /**
1447
+ * Wait for chunk acknowledgment
1448
+ */
1449
+ waitForChunkAck(partitionId, chunkIndex) {
1450
+ return new Promise((resolve, reject) => {
1451
+ const key = `${partitionId}:${chunkIndex}`;
1452
+ const timeout = setTimeout(() => {
1453
+ this.pendingChunkAcks.delete(key);
1454
+ reject(new Error(`Chunk ack timeout for partition ${partitionId}, chunk ${chunkIndex}`));
1455
+ }, this.config.syncTimeoutMs);
1456
+ this.pendingChunkAcks.set(key, { resolve, reject, timeout });
1457
+ });
969
1458
  }
970
- processNext(lock) {
971
- const now = Date.now();
972
- lock.owner = "";
973
- lock.expiry = 0;
974
- while (lock.queue.length > 0) {
975
- const next = lock.queue.shift();
976
- this.grantLock(lock, next.clientId, next.ttl);
977
- this.emit("lockGranted", {
978
- clientId: next.clientId,
979
- requestId: next.requestId,
980
- name: lock.name,
981
- fencingToken: lock.fencingToken
982
- });
1459
+ /**
1460
+ * Wait for migration verification
1461
+ */
1462
+ waitForVerification(partitionId) {
1463
+ return new Promise((resolve) => {
1464
+ const timeout = setTimeout(() => {
1465
+ this.pendingVerifications.delete(partitionId);
1466
+ resolve(false);
1467
+ }, this.config.syncTimeoutMs);
1468
+ this.pendingVerifications.set(partitionId, { resolve, timeout });
1469
+ });
1470
+ }
1471
+ // ============================================
1472
+ // Migration Completion
1473
+ // ============================================
1474
+ /**
1475
+ * Handle successful migration completion
1476
+ */
1477
+ async onMigrationComplete(partitionId) {
1478
+ const migration = this.activeMigrations.get(partitionId);
1479
+ if (!migration) return;
1480
+ migration.state = PartitionState.STABLE;
1481
+ this.activeMigrations.delete(partitionId);
1482
+ this.metrics.migrationsCompleted++;
1483
+ this.metrics.activeMigrations = this.activeMigrations.size;
1484
+ logger.info({
1485
+ partitionId,
1486
+ duration: Date.now() - migration.startTime,
1487
+ bytesTransferred: migration.bytesTransferred
1488
+ }, "Migration completed");
1489
+ this.emit("migrationComplete", partitionId);
1490
+ }
1491
+ /**
1492
+ * Handle migration failure
1493
+ */
1494
+ async onMigrationFailed(partitionId, error) {
1495
+ const migration = this.activeMigrations.get(partitionId);
1496
+ if (!migration) return;
1497
+ migration.retryCount++;
1498
+ if (migration.retryCount <= this.config.maxRetries) {
1499
+ migration.state = PartitionState.STABLE;
1500
+ migration.bytesTransferred = 0;
1501
+ this.activeMigrations.delete(partitionId);
1502
+ this.migrationQueue.unshift(migration);
1503
+ this.metrics.queuedMigrations = this.migrationQueue.length;
1504
+ this.metrics.activeMigrations = this.activeMigrations.size;
1505
+ logger.warn({
1506
+ partitionId,
1507
+ retryCount: migration.retryCount,
1508
+ error: error.message
1509
+ }, "Migration failed, will retry");
1510
+ } else {
1511
+ migration.state = PartitionState.FAILED;
1512
+ this.activeMigrations.delete(partitionId);
1513
+ this.metrics.migrationsFailed++;
1514
+ this.metrics.activeMigrations = this.activeMigrations.size;
1515
+ logger.error({
1516
+ partitionId,
1517
+ retryCount: migration.retryCount,
1518
+ error: error.message
1519
+ }, "Migration failed permanently");
1520
+ this.emit("migrationFailed", partitionId, error);
1521
+ }
1522
+ }
1523
+ // ============================================
1524
+ // Incoming Migration Handlers (Target Node)
1525
+ // ============================================
1526
+ /**
1527
+ * Handle MIGRATION_START message
1528
+ */
1529
+ handleMigrationStart(payload) {
1530
+ const { partitionId, sourceNode, estimatedSize } = payload;
1531
+ logger.info({ partitionId, sourceNode, estimatedSize }, "Receiving migration");
1532
+ this.incomingMigrations.set(partitionId, {
1533
+ sourceNode,
1534
+ chunks: [],
1535
+ expectedSize: estimatedSize,
1536
+ receivedSize: 0,
1537
+ startTime: Date.now()
1538
+ });
1539
+ }
1540
+ /**
1541
+ * Handle MIGRATION_CHUNK message
1542
+ */
1543
+ handleMigrationChunk(payload) {
1544
+ const { partitionId, chunkIndex, data, checksum } = payload;
1545
+ const incoming = this.incomingMigrations.get(partitionId);
1546
+ if (!incoming) {
1547
+ logger.warn({ partitionId, chunkIndex }, "Received chunk for unknown migration");
983
1548
  return;
984
1549
  }
985
- if (lock.queue.length === 0) {
986
- this.locks.delete(lock.name);
1550
+ const chunkData = new Uint8Array(data);
1551
+ const actualChecksum = this.calculateChecksum(chunkData);
1552
+ const success = actualChecksum === checksum;
1553
+ if (success) {
1554
+ incoming.chunks[chunkIndex] = chunkData;
1555
+ incoming.receivedSize += chunkData.length;
1556
+ } else {
1557
+ logger.warn({ partitionId, chunkIndex, expected: checksum, actual: actualChecksum }, "Chunk checksum mismatch");
987
1558
  }
988
- }
989
- cleanupExpiredLocks() {
990
- const now = Date.now();
991
- const lockNames = Array.from(this.locks.keys());
992
- for (const name of lockNames) {
993
- const lock = this.locks.get(name);
994
- if (!lock) continue;
995
- if (lock.owner && lock.expiry < now) {
996
- logger.info({ name: lock.name, owner: lock.owner }, "Lock expired, processing next");
997
- this.processNext(lock);
998
- } else if (!lock.owner && lock.queue.length === 0) {
999
- this.locks.delete(name);
1559
+ this.clusterManager.send(incoming.sourceNode, "OP_FORWARD", {
1560
+ _migration: {
1561
+ type: "MIGRATION_CHUNK_ACK",
1562
+ payload: {
1563
+ partitionId,
1564
+ chunkIndex,
1565
+ success
1566
+ }
1000
1567
  }
1001
- }
1002
- }
1003
- };
1004
- _LockManager.MIN_TTL = 1e3;
1005
- // 1 second
1006
- _LockManager.MAX_TTL = 3e5;
1007
- var LockManager = _LockManager;
1008
-
1009
- // src/security/SecurityManager.ts
1010
- var SecurityManager = class {
1011
- constructor(policies = []) {
1012
- this.policies = [];
1013
- this.policies = policies;
1014
- }
1015
- addPolicy(policy) {
1016
- this.policies.push(policy);
1568
+ });
1017
1569
  }
1018
- checkPermission(principal, mapName, action) {
1019
- if (principal.roles.includes("ADMIN")) {
1020
- return true;
1570
+ /**
1571
+ * Handle MIGRATION_COMPLETE message
1572
+ */
1573
+ async handleMigrationComplete(payload) {
1574
+ const { partitionId, totalRecords, checksum } = payload;
1575
+ const incoming = this.incomingMigrations.get(partitionId);
1576
+ if (!incoming) {
1577
+ logger.warn({ partitionId }, "Received complete for unknown migration");
1578
+ return;
1021
1579
  }
1022
- if (mapName.startsWith("$sys/")) {
1023
- logger.warn({ userId: principal.userId, mapName }, "Access Denied: System Map requires ADMIN role");
1024
- return false;
1580
+ const allData = this.reassemble(incoming.chunks);
1581
+ const records = this.deserializeRecords(allData);
1582
+ const actualChecksum = this.calculatePartitionChecksum(records);
1583
+ const checksumMatch = actualChecksum === checksum;
1584
+ const success = checksumMatch && records.length === totalRecords;
1585
+ if (success && this.dataStorer) {
1586
+ await this.dataStorer(partitionId, records);
1025
1587
  }
1026
- for (const policy of this.policies) {
1027
- const hasRole = this.hasRole(principal, policy.role);
1028
- const matchesMap = this.matchesMap(mapName, policy.mapNamePattern, principal);
1029
- if (hasRole && matchesMap) {
1030
- if (policy.actions.includes("ALL") || policy.actions.includes(action)) {
1031
- return true;
1588
+ logger.info({
1589
+ partitionId,
1590
+ duration: Date.now() - incoming.startTime,
1591
+ records: records.length,
1592
+ checksumMatch
1593
+ }, "Migration received");
1594
+ this.clusterManager.send(incoming.sourceNode, "OP_FORWARD", {
1595
+ _migration: {
1596
+ type: "MIGRATION_VERIFY",
1597
+ payload: {
1598
+ partitionId,
1599
+ success,
1600
+ checksumMatch
1032
1601
  }
1602
+ }
1603
+ });
1604
+ this.incomingMigrations.delete(partitionId);
1605
+ }
1606
+ /**
1607
+ * Handle MIGRATION_CHUNK_ACK message
1608
+ */
1609
+ handleMigrationChunkAck(payload) {
1610
+ const { partitionId, chunkIndex, success } = payload;
1611
+ const key = `${partitionId}:${chunkIndex}`;
1612
+ const pending = this.pendingChunkAcks.get(key);
1613
+ if (pending) {
1614
+ clearTimeout(pending.timeout);
1615
+ this.pendingChunkAcks.delete(key);
1616
+ if (success) {
1617
+ pending.resolve();
1033
1618
  } else {
1619
+ pending.reject(new Error(`Chunk ${chunkIndex} rejected by target`));
1034
1620
  }
1035
1621
  }
1036
- logger.warn({
1037
- userId: principal.userId,
1038
- roles: principal.roles,
1039
- mapName,
1040
- action,
1041
- policyCount: this.policies.length
1042
- }, "SecurityManager: Access Denied - No matching policy found");
1043
- return false;
1044
1622
  }
1045
- filterObject(object, principal, mapName) {
1046
- if (!object || typeof object !== "object") return object;
1047
- if (principal.roles.includes("ADMIN")) return object;
1048
- if (Array.isArray(object)) {
1049
- return object.map((item) => this.filterObject(item, principal, mapName));
1623
+ /**
1624
+ * Handle MIGRATION_VERIFY message
1625
+ */
1626
+ handleMigrationVerify(payload) {
1627
+ const { partitionId, success } = payload;
1628
+ const pending = this.pendingVerifications.get(partitionId);
1629
+ if (pending) {
1630
+ clearTimeout(pending.timeout);
1631
+ this.pendingVerifications.delete(partitionId);
1632
+ pending.resolve(success);
1050
1633
  }
1051
- let allowedFields = null;
1052
- let accessGranted = false;
1053
- for (const policy of this.policies) {
1054
- if (this.hasRole(principal, policy.role) && this.matchesMap(mapName, policy.mapNamePattern, principal)) {
1055
- if (policy.actions.includes("ALL") || policy.actions.includes("READ")) {
1056
- accessGranted = true;
1057
- if (!policy.allowedFields || policy.allowedFields.length === 0 || policy.allowedFields.includes("*")) {
1058
- return object;
1059
- }
1060
- if (allowedFields === null) allowedFields = /* @__PURE__ */ new Set();
1061
- policy.allowedFields.forEach((f) => allowedFields.add(f));
1062
- }
1634
+ }
1635
+ /**
1636
+ * Reassemble chunks into continuous data
1637
+ */
1638
+ reassemble(chunks) {
1639
+ const totalLength = chunks.reduce((sum, c) => sum + (c?.length ?? 0), 0);
1640
+ const result = new Uint8Array(totalLength);
1641
+ let offset = 0;
1642
+ for (const chunk of chunks) {
1643
+ if (chunk) {
1644
+ result.set(chunk, offset);
1645
+ offset += chunk.length;
1063
1646
  }
1064
1647
  }
1065
- if (!accessGranted) return null;
1066
- if (allowedFields === null) return object;
1067
- const filtered = {};
1068
- for (const key of Object.keys(object)) {
1069
- if (allowedFields.has(key)) {
1070
- filtered[key] = object[key];
1071
- }
1648
+ return result;
1649
+ }
1650
+ /**
1651
+ * Deserialize records from chunk data
1652
+ */
1653
+ deserializeRecords(data) {
1654
+ const records = [];
1655
+ let offset = 0;
1656
+ while (offset < data.length) {
1657
+ if (offset + 4 > data.length) break;
1658
+ const length = new DataView(data.buffer, data.byteOffset + offset, 4).getUint32(0, true);
1659
+ offset += 4;
1660
+ if (offset + length > data.length) break;
1661
+ records.push(data.slice(offset, offset + length));
1662
+ offset += length;
1072
1663
  }
1073
- return filtered;
1664
+ return records;
1074
1665
  }
1075
- hasRole(principal, role) {
1076
- return principal.roles.includes(role);
1666
+ // ============================================
1667
+ // Message Handling
1668
+ // ============================================
1669
+ /**
1670
+ * Setup cluster message handlers
1671
+ */
1672
+ setupMessageHandlers() {
1673
+ this.clusterManager.on("message", (msg) => {
1674
+ if (msg.payload?._migration) {
1675
+ const migration = msg.payload._migration;
1676
+ switch (migration.type) {
1677
+ case "MIGRATION_START":
1678
+ this.handleMigrationStart(migration.payload);
1679
+ break;
1680
+ case "MIGRATION_CHUNK":
1681
+ this.handleMigrationChunk(migration.payload);
1682
+ break;
1683
+ case "MIGRATION_COMPLETE":
1684
+ this.handleMigrationComplete(migration.payload).catch((err) => {
1685
+ logger.error({ error: err }, "Error handling migration complete");
1686
+ });
1687
+ break;
1688
+ case "MIGRATION_CHUNK_ACK":
1689
+ this.handleMigrationChunkAck(migration.payload);
1690
+ break;
1691
+ case "MIGRATION_VERIFY":
1692
+ this.handleMigrationVerify(migration.payload);
1693
+ break;
1694
+ }
1695
+ }
1696
+ });
1077
1697
  }
1078
- matchesMap(mapName, pattern, principal) {
1079
- let finalPattern = pattern;
1080
- if (pattern.includes("{userId}") && principal) {
1081
- finalPattern = pattern.replace("{userId}", principal.userId);
1698
+ // ============================================
1699
+ // Status and Metrics
1700
+ // ============================================
1701
+ /**
1702
+ * Check if a partition is currently migrating
1703
+ */
1704
+ isActive(partitionId) {
1705
+ return this.activeMigrations.has(partitionId) || this.incomingMigrations.has(partitionId);
1706
+ }
1707
+ /**
1708
+ * Get migration status
1709
+ */
1710
+ getStatus() {
1711
+ const avgMigrationTime = this.metrics.migrationsCompleted > 0 ? Date.now() - (this.activeMigrations.values().next().value?.startTime ?? Date.now()) : 0;
1712
+ const estimatedTimeRemainingMs = (this.migrationQueue.length + this.activeMigrations.size) * (avgMigrationTime || 1e3);
1713
+ return {
1714
+ inProgress: this.activeMigrations.size > 0 || this.migrationQueue.length > 0,
1715
+ active: Array.from(this.activeMigrations.values()),
1716
+ queued: this.migrationQueue.length,
1717
+ completed: this.metrics.migrationsCompleted,
1718
+ failed: this.metrics.migrationsFailed,
1719
+ estimatedTimeRemainingMs
1720
+ };
1721
+ }
1722
+ /**
1723
+ * Get migration metrics
1724
+ */
1725
+ getMetrics() {
1726
+ return { ...this.metrics };
1727
+ }
1728
+ /**
1729
+ * Cancel all active and queued migrations
1730
+ */
1731
+ async cancelAll() {
1732
+ this.stopBatchProcessing();
1733
+ this.migrationQueue = [];
1734
+ this.metrics.queuedMigrations = 0;
1735
+ for (const [partitionId, migration] of this.activeMigrations) {
1736
+ migration.state = PartitionState.FAILED;
1737
+ this.metrics.migrationsFailed++;
1738
+ this.emit("migrationFailed", partitionId, new Error("Migration cancelled"));
1082
1739
  }
1083
- if (finalPattern === "*") return true;
1084
- if (finalPattern === mapName) return true;
1085
- if (finalPattern.endsWith("*")) {
1086
- const prefix = finalPattern.slice(0, -1);
1087
- return mapName.startsWith(prefix);
1740
+ this.activeMigrations.clear();
1741
+ this.metrics.activeMigrations = 0;
1742
+ for (const pending of this.pendingChunkAcks.values()) {
1743
+ clearTimeout(pending.timeout);
1744
+ pending.reject(new Error("Migration cancelled"));
1088
1745
  }
1089
- return false;
1746
+ this.pendingChunkAcks.clear();
1747
+ for (const pending of this.pendingVerifications.values()) {
1748
+ clearTimeout(pending.timeout);
1749
+ pending.resolve(false);
1750
+ }
1751
+ this.pendingVerifications.clear();
1752
+ this.incomingMigrations.clear();
1753
+ logger.info("All migrations cancelled");
1754
+ }
1755
+ /**
1756
+ * Cleanup resources (sync version for backwards compatibility)
1757
+ */
1758
+ close() {
1759
+ this.cancelAll();
1760
+ }
1761
+ /**
1762
+ * Async cleanup - waits for cancellation to complete
1763
+ */
1764
+ async closeAsync() {
1765
+ await this.cancelAll();
1766
+ this.removeAllListeners();
1090
1767
  }
1091
1768
  };
1092
1769
 
1093
- // src/monitoring/MetricsService.ts
1094
- import { Registry, Gauge, Counter, Summary, collectDefaultMetrics } from "prom-client";
1095
- var MetricsService = class {
1096
- constructor() {
1097
- this.registry = new Registry();
1098
- collectDefaultMetrics({ register: this.registry, prefix: "topgun_" });
1099
- this.connectedClients = new Gauge({
1100
- name: "topgun_connected_clients",
1101
- help: "Number of currently connected clients",
1102
- registers: [this.registry]
1103
- });
1104
- this.mapSizeItems = new Gauge({
1105
- name: "topgun_map_size_items",
1106
- help: "Number of items in a map",
1107
- labelNames: ["map"],
1108
- registers: [this.registry]
1109
- });
1110
- this.opsTotal = new Counter({
1111
- name: "topgun_ops_total",
1112
- help: "Total number of operations",
1113
- labelNames: ["type", "map"],
1114
- registers: [this.registry]
1115
- });
1116
- this.memoryUsage = new Gauge({
1117
- name: "topgun_memory_usage_bytes",
1118
- help: "Current memory usage in bytes",
1119
- registers: [this.registry],
1120
- collect() {
1121
- this.set(process.memoryUsage().heapUsed);
1122
- }
1123
- });
1124
- this.clusterMembers = new Gauge({
1125
- name: "topgun_cluster_members",
1126
- help: "Number of active cluster members",
1127
- registers: [this.registry]
1128
- });
1129
- this.eventsRoutedTotal = new Counter({
1130
- name: "topgun_events_routed_total",
1131
- help: "Total number of events processed for routing",
1132
- registers: [this.registry]
1133
- });
1134
- this.eventsFilteredBySubscription = new Counter({
1135
- name: "topgun_events_filtered_by_subscription",
1136
- help: "Events NOT sent due to no active subscriptions",
1137
- registers: [this.registry]
1138
- });
1139
- this.subscribersPerEvent = new Summary({
1140
- name: "topgun_subscribers_per_event",
1141
- help: "Distribution of subscribers per event",
1142
- percentiles: [0.5, 0.9, 0.99],
1143
- registers: [this.registry]
1144
- });
1145
- this.eventQueueSize = new Gauge({
1146
- name: "topgun_event_queue_size",
1147
- help: "Current size of the event queue across all stripes",
1148
- labelNames: ["stripe"],
1149
- registers: [this.registry]
1150
- });
1151
- this.eventQueueEnqueued = new Counter({
1152
- name: "topgun_event_queue_enqueued_total",
1153
- help: "Total number of events enqueued",
1154
- registers: [this.registry]
1155
- });
1156
- this.eventQueueDequeued = new Counter({
1157
- name: "topgun_event_queue_dequeued_total",
1158
- help: "Total number of events dequeued",
1159
- registers: [this.registry]
1160
- });
1161
- this.eventQueueRejected = new Counter({
1162
- name: "topgun_event_queue_rejected_total",
1163
- help: "Total number of events rejected due to queue capacity",
1164
- registers: [this.registry]
1165
- });
1166
- this.backpressureSyncForcedTotal = new Counter({
1167
- name: "topgun_backpressure_sync_forced_total",
1168
- help: "Total number of times sync processing was forced",
1169
- registers: [this.registry]
1170
- });
1171
- this.backpressurePendingOps = new Gauge({
1172
- name: "topgun_backpressure_pending_ops",
1173
- help: "Current number of pending async operations",
1174
- registers: [this.registry]
1175
- });
1176
- this.backpressureWaitsTotal = new Counter({
1177
- name: "topgun_backpressure_waits_total",
1178
- help: "Total number of times had to wait for capacity",
1179
- registers: [this.registry]
1180
- });
1181
- this.backpressureTimeoutsTotal = new Counter({
1182
- name: "topgun_backpressure_timeouts_total",
1183
- help: "Total number of backpressure timeouts",
1184
- registers: [this.registry]
1185
- });
1186
- this.connectionsAcceptedTotal = new Counter({
1187
- name: "topgun_connections_accepted_total",
1188
- help: "Total number of connections accepted",
1189
- registers: [this.registry]
1190
- });
1191
- this.connectionsRejectedTotal = new Counter({
1192
- name: "topgun_connections_rejected_total",
1193
- help: "Total number of connections rejected due to rate limiting",
1194
- registers: [this.registry]
1195
- });
1196
- this.connectionsPending = new Gauge({
1197
- name: "topgun_connections_pending",
1198
- help: "Number of connections currently pending (handshake in progress)",
1199
- registers: [this.registry]
1200
- });
1201
- this.connectionRatePerSecond = new Gauge({
1202
- name: "topgun_connection_rate_per_second",
1203
- help: "Current connection rate per second",
1204
- registers: [this.registry]
1205
- });
1770
+ // src/cluster/PartitionService.ts
1771
+ import {
1772
+ hashString,
1773
+ PARTITION_COUNT,
1774
+ DEFAULT_BACKUP_COUNT,
1775
+ DEFAULT_MIGRATION_CONFIG as DEFAULT_MIGRATION_CONFIG2
1776
+ } from "@topgunbuild/core";
1777
+ var DEFAULT_PARTITION_SERVICE_CONFIG = {
1778
+ gradualRebalancing: false,
1779
+ migration: DEFAULT_MIGRATION_CONFIG2
1780
+ };
1781
+ var PartitionService = class extends EventEmitter4 {
1782
+ constructor(cluster, config = {}) {
1783
+ super();
1784
+ // partitionId -> { owner, backups }
1785
+ this.partitions = /* @__PURE__ */ new Map();
1786
+ this.PARTITION_COUNT = PARTITION_COUNT;
1787
+ this.BACKUP_COUNT = DEFAULT_BACKUP_COUNT;
1788
+ // Phase 4: Version tracking for partition map
1789
+ this.mapVersion = 0;
1790
+ this.lastRebalanceTime = 0;
1791
+ this.migrationManager = null;
1792
+ this.cluster = cluster;
1793
+ this.config = {
1794
+ ...DEFAULT_PARTITION_SERVICE_CONFIG,
1795
+ ...config
1796
+ };
1797
+ if (this.config.gradualRebalancing) {
1798
+ this.migrationManager = new MigrationManager(
1799
+ cluster,
1800
+ this,
1801
+ this.config.migration
1802
+ );
1803
+ this.migrationManager.on("migrationComplete", (partitionId) => {
1804
+ logger.info({ partitionId }, "Migration completed, updating ownership");
1805
+ });
1806
+ this.migrationManager.on("migrationFailed", (partitionId, error) => {
1807
+ logger.error({ partitionId, error: error.message }, "Migration failed");
1808
+ });
1809
+ }
1810
+ this.cluster.on("memberJoined", (nodeId) => this.onMembershipChange("JOIN", nodeId));
1811
+ this.cluster.on("memberLeft", (nodeId) => this.onMembershipChange("LEAVE", nodeId));
1812
+ this.rebalance("REBALANCE");
1206
1813
  }
1207
- destroy() {
1208
- this.registry.clear();
1814
+ /**
1815
+ * Handle membership change
1816
+ */
1817
+ onMembershipChange(reason, nodeId) {
1818
+ if (this.config.gradualRebalancing && this.migrationManager) {
1819
+ this.rebalanceGradual(reason, nodeId);
1820
+ } else {
1821
+ this.rebalance(reason, nodeId);
1822
+ }
1209
1823
  }
1210
- setConnectedClients(count) {
1211
- this.connectedClients.set(count);
1824
+ getPartitionId(key) {
1825
+ return Math.abs(hashString(key)) % this.PARTITION_COUNT;
1212
1826
  }
1213
- setMapSize(mapName, size) {
1214
- this.mapSizeItems.set({ map: mapName }, size);
1827
+ getDistribution(key) {
1828
+ const pId = this.getPartitionId(key);
1829
+ return this.partitions.get(pId) || {
1830
+ owner: this.cluster.config.nodeId,
1831
+ backups: []
1832
+ };
1215
1833
  }
1216
- incOp(type, mapName) {
1217
- this.opsTotal.inc({ type, map: mapName });
1834
+ getOwner(key) {
1835
+ return this.getDistribution(key).owner;
1218
1836
  }
1219
- setClusterMembers(count) {
1220
- this.clusterMembers.set(count);
1837
+ isLocalOwner(key) {
1838
+ return this.getOwner(key) === this.cluster.config.nodeId;
1221
1839
  }
1222
- // === Subscription-based routing metric methods ===
1223
- /**
1224
- * Increment counter for total events processed for routing.
1225
- */
1226
- incEventsRouted() {
1227
- this.eventsRoutedTotal.inc();
1840
+ isLocalBackup(key) {
1841
+ const dist = this.getDistribution(key);
1842
+ return dist.backups.includes(this.cluster.config.nodeId);
1228
1843
  }
1229
- /**
1230
- * Increment counter for events filtered out due to no subscribers.
1231
- */
1232
- incEventsFilteredBySubscription() {
1233
- this.eventsFilteredBySubscription.inc();
1844
+ isRelated(key) {
1845
+ return this.isLocalOwner(key) || this.isLocalBackup(key);
1234
1846
  }
1847
+ // ============================================
1848
+ // Phase 4: Partition Map Methods
1849
+ // ============================================
1235
1850
  /**
1236
- * Record the number of subscribers for an event (for average calculation).
1851
+ * Get current partition map version
1237
1852
  */
1238
- recordSubscribersPerEvent(count) {
1239
- this.subscribersPerEvent.observe(count);
1853
+ getMapVersion() {
1854
+ return this.mapVersion;
1240
1855
  }
1241
- // === Bounded event queue metric methods ===
1242
1856
  /**
1243
- * Set the current size of a specific queue stripe.
1857
+ * Generate full PartitionMap for client consumption
1244
1858
  */
1245
- setEventQueueSize(stripe, size) {
1246
- this.eventQueueSize.set({ stripe: String(stripe) }, size);
1859
+ getPartitionMap() {
1860
+ const nodes = [];
1861
+ const partitions = [];
1862
+ for (const nodeId of this.cluster.getMembers()) {
1863
+ const isSelf = nodeId === this.cluster.config.nodeId;
1864
+ const host = isSelf ? this.cluster.config.host : "unknown";
1865
+ const port = isSelf ? this.cluster.port : 0;
1866
+ nodes.push({
1867
+ nodeId,
1868
+ endpoints: {
1869
+ websocket: `ws://${host}:${port}`
1870
+ },
1871
+ status: "ACTIVE"
1872
+ });
1873
+ }
1874
+ for (let i = 0; i < this.PARTITION_COUNT; i++) {
1875
+ const dist = this.partitions.get(i);
1876
+ if (dist) {
1877
+ partitions.push({
1878
+ partitionId: i,
1879
+ ownerNodeId: dist.owner,
1880
+ backupNodeIds: dist.backups
1881
+ });
1882
+ }
1883
+ }
1884
+ return {
1885
+ version: this.mapVersion,
1886
+ partitionCount: this.PARTITION_COUNT,
1887
+ nodes,
1888
+ partitions,
1889
+ generatedAt: Date.now()
1890
+ };
1247
1891
  }
1248
1892
  /**
1249
- * Increment counter for events enqueued.
1893
+ * Get partition info by ID
1250
1894
  */
1251
- incEventQueueEnqueued() {
1252
- this.eventQueueEnqueued.inc();
1895
+ getPartitionInfo(partitionId) {
1896
+ const dist = this.partitions.get(partitionId);
1897
+ if (!dist) return null;
1898
+ return {
1899
+ partitionId,
1900
+ ownerNodeId: dist.owner,
1901
+ backupNodeIds: dist.backups
1902
+ };
1253
1903
  }
1254
1904
  /**
1255
- * Increment counter for events dequeued.
1905
+ * Get owner node for a partition ID
1256
1906
  */
1257
- incEventQueueDequeued() {
1258
- this.eventQueueDequeued.inc();
1907
+ getPartitionOwner(partitionId) {
1908
+ const dist = this.partitions.get(partitionId);
1909
+ return dist?.owner ?? null;
1259
1910
  }
1260
- /**
1261
- * Increment counter for events rejected due to queue capacity.
1262
- */
1263
- incEventQueueRejected() {
1264
- this.eventQueueRejected.inc();
1911
+ rebalance(reason = "REBALANCE", triggerNodeId) {
1912
+ const oldPartitions = new Map(this.partitions);
1913
+ let allMembers = this.cluster.getMembers().sort();
1914
+ if (allMembers.length === 0) {
1915
+ allMembers = [this.cluster.config.nodeId];
1916
+ }
1917
+ logger.info({ memberCount: allMembers.length, members: allMembers, reason }, "Rebalancing partitions");
1918
+ const changes = [];
1919
+ for (let i = 0; i < this.PARTITION_COUNT; i++) {
1920
+ const ownerIndex = i % allMembers.length;
1921
+ const owner = allMembers[ownerIndex];
1922
+ const backups = [];
1923
+ if (allMembers.length > 1) {
1924
+ for (let b = 1; b <= this.BACKUP_COUNT; b++) {
1925
+ const backupIndex = (ownerIndex + b) % allMembers.length;
1926
+ backups.push(allMembers[backupIndex]);
1927
+ }
1928
+ }
1929
+ const oldDist = oldPartitions.get(i);
1930
+ if (oldDist && oldDist.owner !== owner) {
1931
+ changes.push({
1932
+ partitionId: i,
1933
+ previousOwner: oldDist.owner,
1934
+ newOwner: owner,
1935
+ reason
1936
+ });
1937
+ }
1938
+ this.partitions.set(i, { owner, backups });
1939
+ }
1940
+ if (changes.length > 0 || this.mapVersion === 0) {
1941
+ this.mapVersion++;
1942
+ this.lastRebalanceTime = Date.now();
1943
+ logger.info({
1944
+ version: this.mapVersion,
1945
+ changesCount: changes.length,
1946
+ reason
1947
+ }, "Partition map updated");
1948
+ this.emit("rebalanced", this.getPartitionMap(), changes);
1949
+ }
1265
1950
  }
1266
- // === Backpressure metric methods ===
1951
+ // ============================================
1952
+ // Phase 4 Task 03: Gradual Rebalancing
1953
+ // ============================================
1267
1954
  /**
1268
- * Increment counter for forced sync operations.
1955
+ * Perform gradual rebalancing using MigrationManager
1269
1956
  */
1270
- incBackpressureSyncForced() {
1271
- this.backpressureSyncForcedTotal.inc();
1957
+ rebalanceGradual(reason, triggerNodeId) {
1958
+ if (!this.migrationManager) {
1959
+ this.rebalance(reason, triggerNodeId);
1960
+ return;
1961
+ }
1962
+ const oldDistribution = new Map(this.partitions);
1963
+ let allMembers = this.cluster.getMembers().sort();
1964
+ if (allMembers.length === 0) {
1965
+ allMembers = [this.cluster.config.nodeId];
1966
+ }
1967
+ const newDistribution = /* @__PURE__ */ new Map();
1968
+ for (let i = 0; i < this.PARTITION_COUNT; i++) {
1969
+ const ownerIndex = i % allMembers.length;
1970
+ const owner = allMembers[ownerIndex];
1971
+ const backups = [];
1972
+ if (allMembers.length > 1) {
1973
+ for (let b = 1; b <= this.BACKUP_COUNT; b++) {
1974
+ const backupIndex = (ownerIndex + b) % allMembers.length;
1975
+ backups.push(allMembers[backupIndex]);
1976
+ }
1977
+ }
1978
+ newDistribution.set(i, { owner, backups });
1979
+ }
1980
+ logger.info({ memberCount: allMembers.length, reason, triggerNodeId }, "Planning gradual rebalance");
1981
+ this.migrationManager.planMigration(oldDistribution, newDistribution);
1982
+ for (const [partitionId, dist] of newDistribution) {
1983
+ this.partitions.set(partitionId, dist);
1984
+ }
1985
+ this.mapVersion++;
1986
+ this.lastRebalanceTime = Date.now();
1987
+ const changes = [];
1988
+ for (const [partitionId, newDist] of newDistribution) {
1989
+ const oldDist = oldDistribution.get(partitionId);
1990
+ if (oldDist && oldDist.owner !== newDist.owner) {
1991
+ changes.push({
1992
+ partitionId,
1993
+ previousOwner: oldDist.owner,
1994
+ newOwner: newDist.owner,
1995
+ reason
1996
+ });
1997
+ }
1998
+ }
1999
+ this.emit("rebalanced", this.getPartitionMap(), changes);
1272
2000
  }
1273
2001
  /**
1274
- * Set the current number of pending async operations.
2002
+ * Set partition owner (called after migration completes)
1275
2003
  */
1276
- setBackpressurePendingOps(count) {
1277
- this.backpressurePendingOps.set(count);
2004
+ setOwner(partitionId, nodeId) {
2005
+ const partition = this.partitions.get(partitionId);
2006
+ if (!partition) return;
2007
+ const previousOwner = partition.owner;
2008
+ if (previousOwner === nodeId) return;
2009
+ partition.owner = nodeId;
2010
+ this.mapVersion++;
2011
+ logger.info({ partitionId, previousOwner, newOwner: nodeId, version: this.mapVersion }, "Partition owner updated");
2012
+ this.emit("partitionMoved", {
2013
+ partitionId,
2014
+ previousOwner,
2015
+ newOwner: nodeId,
2016
+ version: this.mapVersion
2017
+ });
1278
2018
  }
1279
2019
  /**
1280
- * Increment counter for times had to wait for capacity.
2020
+ * Get backups for a partition
1281
2021
  */
1282
- incBackpressureWaits() {
1283
- this.backpressureWaitsTotal.inc();
2022
+ getBackups(partitionId) {
2023
+ const dist = this.partitions.get(partitionId);
2024
+ return dist?.backups ?? [];
1284
2025
  }
1285
2026
  /**
1286
- * Increment counter for backpressure timeouts.
2027
+ * Get migration status
1287
2028
  */
1288
- incBackpressureTimeouts() {
1289
- this.backpressureTimeoutsTotal.inc();
2029
+ getMigrationStatus() {
2030
+ return this.migrationManager?.getStatus() ?? null;
1290
2031
  }
1291
- // === Connection scaling metric methods ===
1292
2032
  /**
1293
- * Increment counter for accepted connections.
2033
+ * Check if partition is currently migrating
1294
2034
  */
1295
- incConnectionsAccepted() {
1296
- this.connectionsAcceptedTotal.inc();
2035
+ isMigrating(partitionId) {
2036
+ return this.migrationManager?.isActive(partitionId) ?? false;
1297
2037
  }
1298
2038
  /**
1299
- * Increment counter for rejected connections.
2039
+ * Check if any partition is currently migrating
1300
2040
  */
1301
- incConnectionsRejected() {
1302
- this.connectionsRejectedTotal.inc();
2041
+ isRebalancing() {
2042
+ const status = this.getMigrationStatus();
2043
+ return status?.inProgress ?? false;
1303
2044
  }
1304
2045
  /**
1305
- * Set the current number of pending connections.
2046
+ * Get MigrationManager for configuration
1306
2047
  */
1307
- setConnectionsPending(count) {
1308
- this.connectionsPending.set(count);
2048
+ getMigrationManager() {
2049
+ return this.migrationManager;
1309
2050
  }
1310
2051
  /**
1311
- * Set the current connection rate per second.
2052
+ * Cancel all migrations
1312
2053
  */
1313
- setConnectionRatePerSecond(rate) {
1314
- this.connectionRatePerSecond.set(rate);
1315
- }
1316
- async getMetrics() {
1317
- return this.registry.metrics();
1318
- }
1319
- async getMetricsJson() {
1320
- const metrics = await this.registry.getMetricsAsJSON();
1321
- const result = {};
1322
- for (const metric of metrics) {
1323
- if (metric.values.length === 1) {
1324
- result[metric.name] = metric.values[0].value;
1325
- } else {
1326
- result[metric.name] = metric.values;
1327
- }
2054
+ async cancelMigrations() {
2055
+ if (this.migrationManager) {
2056
+ await this.migrationManager.cancelAll();
1328
2057
  }
1329
- return result;
1330
- }
1331
- getContentType() {
1332
- return this.registry.contentType;
1333
2058
  }
1334
2059
  };
1335
2060
 
1336
- // src/system/SystemManager.ts
1337
- var SystemManager = class {
1338
- constructor(cluster, metrics, getMap) {
1339
- this.cluster = cluster;
1340
- this.metrics = metrics;
1341
- this.getMap = getMap;
1342
- }
1343
- start() {
1344
- this.setupClusterMap();
1345
- this.setupStatsMap();
1346
- this.setupMapsMap();
1347
- this.statsInterval = setInterval(() => this.updateStats(), 5e3);
1348
- this.cluster.on("memberJoined", () => this.updateClusterMap());
1349
- this.cluster.on("memberLeft", () => this.updateClusterMap());
1350
- this.updateClusterMap();
1351
- this.updateStats();
2061
+ // src/cluster/LockManager.ts
2062
+ import { EventEmitter as EventEmitter5 } from "events";
2063
+ var _LockManager = class _LockManager extends EventEmitter5 {
2064
+ // 5 minutes
2065
+ constructor() {
2066
+ super();
2067
+ this.locks = /* @__PURE__ */ new Map();
2068
+ this.checkInterval = setInterval(() => this.cleanupExpiredLocks(), 1e3);
1352
2069
  }
1353
2070
  stop() {
1354
- if (this.statsInterval) {
1355
- clearInterval(this.statsInterval);
1356
- }
2071
+ clearInterval(this.checkInterval);
1357
2072
  }
1358
- notifyMapCreated(mapName) {
1359
- if (mapName.startsWith("$sys/")) return;
2073
+ acquire(name, clientId, requestId, ttl) {
2074
+ const safeTtl = Math.max(_LockManager.MIN_TTL, Math.min(ttl || _LockManager.MIN_TTL, _LockManager.MAX_TTL));
2075
+ let lock = this.locks.get(name);
2076
+ if (!lock) {
2077
+ lock = {
2078
+ name,
2079
+ owner: "",
2080
+ fencingToken: 0,
2081
+ expiry: 0,
2082
+ queue: []
2083
+ };
2084
+ this.locks.set(name, lock);
2085
+ }
2086
+ const now = Date.now();
2087
+ if (!lock.owner || lock.expiry < now) {
2088
+ this.grantLock(lock, clientId, safeTtl);
2089
+ return { granted: true, fencingToken: lock.fencingToken };
2090
+ }
2091
+ if (lock.owner === clientId) {
2092
+ lock.expiry = Math.max(lock.expiry, now + safeTtl);
2093
+ logger.info({ name, clientId, fencingToken: lock.fencingToken }, "Lock lease extended");
2094
+ return { granted: true, fencingToken: lock.fencingToken };
2095
+ }
2096
+ lock.queue.push({ clientId, requestId, ttl: safeTtl, timestamp: now });
2097
+ logger.info({ name, clientId, queueLength: lock.queue.length }, "Lock queued");
2098
+ return { granted: false };
2099
+ }
2100
+ release(name, clientId, fencingToken) {
2101
+ const lock = this.locks.get(name);
2102
+ if (!lock) return false;
2103
+ if (lock.owner !== clientId) {
2104
+ logger.warn({ name, clientId, owner: lock.owner }, "Release failed: Not owner");
2105
+ return false;
2106
+ }
2107
+ if (lock.fencingToken !== fencingToken) {
2108
+ logger.warn({ name, clientId, sentToken: fencingToken, actualToken: lock.fencingToken }, "Release failed: Token mismatch");
2109
+ return false;
2110
+ }
2111
+ this.processNext(lock);
2112
+ return true;
2113
+ }
2114
+ handleClientDisconnect(clientId) {
2115
+ for (const lock of this.locks.values()) {
2116
+ if (lock.owner === clientId) {
2117
+ logger.info({ name: lock.name, clientId }, "Releasing lock due to disconnect");
2118
+ this.processNext(lock);
2119
+ } else {
2120
+ const initialLen = lock.queue.length;
2121
+ lock.queue = lock.queue.filter((req) => req.clientId !== clientId);
2122
+ if (lock.queue.length < initialLen) {
2123
+ logger.info({ name: lock.name, clientId }, "Removed from lock queue due to disconnect");
2124
+ }
2125
+ }
2126
+ }
2127
+ }
2128
+ grantLock(lock, clientId, ttl) {
2129
+ lock.owner = clientId;
2130
+ lock.expiry = Date.now() + ttl;
2131
+ lock.fencingToken++;
2132
+ logger.info({ name: lock.name, clientId, fencingToken: lock.fencingToken }, "Lock granted");
2133
+ }
2134
+ processNext(lock) {
2135
+ const now = Date.now();
2136
+ lock.owner = "";
2137
+ lock.expiry = 0;
2138
+ while (lock.queue.length > 0) {
2139
+ const next = lock.queue.shift();
2140
+ this.grantLock(lock, next.clientId, next.ttl);
2141
+ this.emit("lockGranted", {
2142
+ clientId: next.clientId,
2143
+ requestId: next.requestId,
2144
+ name: lock.name,
2145
+ fencingToken: lock.fencingToken
2146
+ });
2147
+ return;
2148
+ }
2149
+ if (lock.queue.length === 0) {
2150
+ this.locks.delete(lock.name);
2151
+ }
2152
+ }
2153
+ cleanupExpiredLocks() {
2154
+ const now = Date.now();
2155
+ const lockNames = Array.from(this.locks.keys());
2156
+ for (const name of lockNames) {
2157
+ const lock = this.locks.get(name);
2158
+ if (!lock) continue;
2159
+ if (lock.owner && lock.expiry < now) {
2160
+ logger.info({ name: lock.name, owner: lock.owner }, "Lock expired, processing next");
2161
+ this.processNext(lock);
2162
+ } else if (!lock.owner && lock.queue.length === 0) {
2163
+ this.locks.delete(name);
2164
+ }
2165
+ }
2166
+ }
2167
+ };
2168
+ _LockManager.MIN_TTL = 1e3;
2169
+ // 1 second
2170
+ _LockManager.MAX_TTL = 3e5;
2171
+ var LockManager = _LockManager;
2172
+
2173
+ // src/security/SecurityManager.ts
2174
+ var SecurityManager = class {
2175
+ constructor(policies = []) {
2176
+ this.policies = [];
2177
+ this.policies = policies;
2178
+ }
2179
+ addPolicy(policy) {
2180
+ this.policies.push(policy);
2181
+ }
2182
+ checkPermission(principal, mapName, action) {
2183
+ if (principal.roles.includes("ADMIN")) {
2184
+ return true;
2185
+ }
2186
+ if (mapName.startsWith("$sys/")) {
2187
+ logger.warn({ userId: principal.userId, mapName }, "Access Denied: System Map requires ADMIN role");
2188
+ return false;
2189
+ }
2190
+ for (const policy of this.policies) {
2191
+ const hasRole = this.hasRole(principal, policy.role);
2192
+ const matchesMap = this.matchesMap(mapName, policy.mapNamePattern, principal);
2193
+ if (hasRole && matchesMap) {
2194
+ if (policy.actions.includes("ALL") || policy.actions.includes(action)) {
2195
+ return true;
2196
+ }
2197
+ } else {
2198
+ }
2199
+ }
2200
+ logger.warn({
2201
+ userId: principal.userId,
2202
+ roles: principal.roles,
2203
+ mapName,
2204
+ action,
2205
+ policyCount: this.policies.length
2206
+ }, "SecurityManager: Access Denied - No matching policy found");
2207
+ return false;
2208
+ }
2209
+ filterObject(object, principal, mapName) {
2210
+ if (!object || typeof object !== "object") return object;
2211
+ if (principal.roles.includes("ADMIN")) return object;
2212
+ if (Array.isArray(object)) {
2213
+ return object.map((item) => this.filterObject(item, principal, mapName));
2214
+ }
2215
+ let allowedFields = null;
2216
+ let accessGranted = false;
2217
+ for (const policy of this.policies) {
2218
+ if (this.hasRole(principal, policy.role) && this.matchesMap(mapName, policy.mapNamePattern, principal)) {
2219
+ if (policy.actions.includes("ALL") || policy.actions.includes("READ")) {
2220
+ accessGranted = true;
2221
+ if (!policy.allowedFields || policy.allowedFields.length === 0 || policy.allowedFields.includes("*")) {
2222
+ return object;
2223
+ }
2224
+ if (allowedFields === null) allowedFields = /* @__PURE__ */ new Set();
2225
+ policy.allowedFields.forEach((f) => allowedFields.add(f));
2226
+ }
2227
+ }
2228
+ }
2229
+ if (!accessGranted) return null;
2230
+ if (allowedFields === null) return object;
2231
+ const filtered = {};
2232
+ for (const key of Object.keys(object)) {
2233
+ if (allowedFields.has(key)) {
2234
+ filtered[key] = object[key];
2235
+ }
2236
+ }
2237
+ return filtered;
2238
+ }
2239
+ hasRole(principal, role) {
2240
+ return principal.roles.includes(role);
2241
+ }
2242
+ matchesMap(mapName, pattern, principal) {
2243
+ let finalPattern = pattern;
2244
+ if (pattern.includes("{userId}") && principal) {
2245
+ finalPattern = pattern.replace("{userId}", principal.userId);
2246
+ }
2247
+ if (finalPattern === "*") return true;
2248
+ if (finalPattern === mapName) return true;
2249
+ if (finalPattern.endsWith("*")) {
2250
+ const prefix = finalPattern.slice(0, -1);
2251
+ return mapName.startsWith(prefix);
2252
+ }
2253
+ return false;
2254
+ }
2255
+ };
2256
+
2257
+ // src/monitoring/MetricsService.ts
2258
+ import { Registry, Gauge, Counter, Summary, collectDefaultMetrics } from "prom-client";
2259
+ var MetricsService = class {
2260
+ constructor() {
2261
+ this.registry = new Registry();
2262
+ collectDefaultMetrics({ register: this.registry, prefix: "topgun_" });
2263
+ this.connectedClients = new Gauge({
2264
+ name: "topgun_connected_clients",
2265
+ help: "Number of currently connected clients",
2266
+ registers: [this.registry]
2267
+ });
2268
+ this.mapSizeItems = new Gauge({
2269
+ name: "topgun_map_size_items",
2270
+ help: "Number of items in a map",
2271
+ labelNames: ["map"],
2272
+ registers: [this.registry]
2273
+ });
2274
+ this.opsTotal = new Counter({
2275
+ name: "topgun_ops_total",
2276
+ help: "Total number of operations",
2277
+ labelNames: ["type", "map"],
2278
+ registers: [this.registry]
2279
+ });
2280
+ this.memoryUsage = new Gauge({
2281
+ name: "topgun_memory_usage_bytes",
2282
+ help: "Current memory usage in bytes",
2283
+ registers: [this.registry],
2284
+ collect() {
2285
+ this.set(process.memoryUsage().heapUsed);
2286
+ }
2287
+ });
2288
+ this.clusterMembers = new Gauge({
2289
+ name: "topgun_cluster_members",
2290
+ help: "Number of active cluster members",
2291
+ registers: [this.registry]
2292
+ });
2293
+ this.eventsRoutedTotal = new Counter({
2294
+ name: "topgun_events_routed_total",
2295
+ help: "Total number of events processed for routing",
2296
+ registers: [this.registry]
2297
+ });
2298
+ this.eventsFilteredBySubscription = new Counter({
2299
+ name: "topgun_events_filtered_by_subscription",
2300
+ help: "Events NOT sent due to no active subscriptions",
2301
+ registers: [this.registry]
2302
+ });
2303
+ this.subscribersPerEvent = new Summary({
2304
+ name: "topgun_subscribers_per_event",
2305
+ help: "Distribution of subscribers per event",
2306
+ percentiles: [0.5, 0.9, 0.99],
2307
+ registers: [this.registry]
2308
+ });
2309
+ this.eventQueueSize = new Gauge({
2310
+ name: "topgun_event_queue_size",
2311
+ help: "Current size of the event queue across all stripes",
2312
+ labelNames: ["stripe"],
2313
+ registers: [this.registry]
2314
+ });
2315
+ this.eventQueueEnqueued = new Counter({
2316
+ name: "topgun_event_queue_enqueued_total",
2317
+ help: "Total number of events enqueued",
2318
+ registers: [this.registry]
2319
+ });
2320
+ this.eventQueueDequeued = new Counter({
2321
+ name: "topgun_event_queue_dequeued_total",
2322
+ help: "Total number of events dequeued",
2323
+ registers: [this.registry]
2324
+ });
2325
+ this.eventQueueRejected = new Counter({
2326
+ name: "topgun_event_queue_rejected_total",
2327
+ help: "Total number of events rejected due to queue capacity",
2328
+ registers: [this.registry]
2329
+ });
2330
+ this.backpressureSyncForcedTotal = new Counter({
2331
+ name: "topgun_backpressure_sync_forced_total",
2332
+ help: "Total number of times sync processing was forced",
2333
+ registers: [this.registry]
2334
+ });
2335
+ this.backpressurePendingOps = new Gauge({
2336
+ name: "topgun_backpressure_pending_ops",
2337
+ help: "Current number of pending async operations",
2338
+ registers: [this.registry]
2339
+ });
2340
+ this.backpressureWaitsTotal = new Counter({
2341
+ name: "topgun_backpressure_waits_total",
2342
+ help: "Total number of times had to wait for capacity",
2343
+ registers: [this.registry]
2344
+ });
2345
+ this.backpressureTimeoutsTotal = new Counter({
2346
+ name: "topgun_backpressure_timeouts_total",
2347
+ help: "Total number of backpressure timeouts",
2348
+ registers: [this.registry]
2349
+ });
2350
+ this.connectionsAcceptedTotal = new Counter({
2351
+ name: "topgun_connections_accepted_total",
2352
+ help: "Total number of connections accepted",
2353
+ registers: [this.registry]
2354
+ });
2355
+ this.connectionsRejectedTotal = new Counter({
2356
+ name: "topgun_connections_rejected_total",
2357
+ help: "Total number of connections rejected due to rate limiting",
2358
+ registers: [this.registry]
2359
+ });
2360
+ this.connectionsPending = new Gauge({
2361
+ name: "topgun_connections_pending",
2362
+ help: "Number of connections currently pending (handshake in progress)",
2363
+ registers: [this.registry]
2364
+ });
2365
+ this.connectionRatePerSecond = new Gauge({
2366
+ name: "topgun_connection_rate_per_second",
2367
+ help: "Current connection rate per second",
2368
+ registers: [this.registry]
2369
+ });
2370
+ }
2371
+ destroy() {
2372
+ this.registry.clear();
2373
+ }
2374
+ setConnectedClients(count) {
2375
+ this.connectedClients.set(count);
2376
+ }
2377
+ setMapSize(mapName, size) {
2378
+ this.mapSizeItems.set({ map: mapName }, size);
2379
+ }
2380
+ incOp(type, mapName) {
2381
+ this.opsTotal.inc({ type, map: mapName });
2382
+ }
2383
+ setClusterMembers(count) {
2384
+ this.clusterMembers.set(count);
2385
+ }
2386
+ // === Subscription-based routing metric methods ===
2387
+ /**
2388
+ * Increment counter for total events processed for routing.
2389
+ */
2390
+ incEventsRouted() {
2391
+ this.eventsRoutedTotal.inc();
2392
+ }
2393
+ /**
2394
+ * Increment counter for events filtered out due to no subscribers.
2395
+ */
2396
+ incEventsFilteredBySubscription() {
2397
+ this.eventsFilteredBySubscription.inc();
2398
+ }
2399
+ /**
2400
+ * Record the number of subscribers for an event (for average calculation).
2401
+ */
2402
+ recordSubscribersPerEvent(count) {
2403
+ this.subscribersPerEvent.observe(count);
2404
+ }
2405
+ // === Bounded event queue metric methods ===
2406
+ /**
2407
+ * Set the current size of a specific queue stripe.
2408
+ */
2409
+ setEventQueueSize(stripe, size) {
2410
+ this.eventQueueSize.set({ stripe: String(stripe) }, size);
2411
+ }
2412
+ /**
2413
+ * Increment counter for events enqueued.
2414
+ */
2415
+ incEventQueueEnqueued() {
2416
+ this.eventQueueEnqueued.inc();
2417
+ }
2418
+ /**
2419
+ * Increment counter for events dequeued.
2420
+ */
2421
+ incEventQueueDequeued() {
2422
+ this.eventQueueDequeued.inc();
2423
+ }
2424
+ /**
2425
+ * Increment counter for events rejected due to queue capacity.
2426
+ */
2427
+ incEventQueueRejected() {
2428
+ this.eventQueueRejected.inc();
2429
+ }
2430
+ // === Backpressure metric methods ===
2431
+ /**
2432
+ * Increment counter for forced sync operations.
2433
+ */
2434
+ incBackpressureSyncForced() {
2435
+ this.backpressureSyncForcedTotal.inc();
2436
+ }
2437
+ /**
2438
+ * Set the current number of pending async operations.
2439
+ */
2440
+ setBackpressurePendingOps(count) {
2441
+ this.backpressurePendingOps.set(count);
2442
+ }
2443
+ /**
2444
+ * Increment counter for times had to wait for capacity.
2445
+ */
2446
+ incBackpressureWaits() {
2447
+ this.backpressureWaitsTotal.inc();
2448
+ }
2449
+ /**
2450
+ * Increment counter for backpressure timeouts.
2451
+ */
2452
+ incBackpressureTimeouts() {
2453
+ this.backpressureTimeoutsTotal.inc();
2454
+ }
2455
+ // === Connection scaling metric methods ===
2456
+ /**
2457
+ * Increment counter for accepted connections.
2458
+ */
2459
+ incConnectionsAccepted() {
2460
+ this.connectionsAcceptedTotal.inc();
2461
+ }
2462
+ /**
2463
+ * Increment counter for rejected connections.
2464
+ */
2465
+ incConnectionsRejected() {
2466
+ this.connectionsRejectedTotal.inc();
2467
+ }
2468
+ /**
2469
+ * Set the current number of pending connections.
2470
+ */
2471
+ setConnectionsPending(count) {
2472
+ this.connectionsPending.set(count);
2473
+ }
2474
+ /**
2475
+ * Set the current connection rate per second.
2476
+ */
2477
+ setConnectionRatePerSecond(rate) {
2478
+ this.connectionRatePerSecond.set(rate);
2479
+ }
2480
+ async getMetrics() {
2481
+ return this.registry.metrics();
2482
+ }
2483
+ async getMetricsJson() {
2484
+ const metrics = await this.registry.getMetricsAsJSON();
2485
+ const result = {};
2486
+ for (const metric of metrics) {
2487
+ if (metric.values.length === 1) {
2488
+ result[metric.name] = metric.values[0].value;
2489
+ } else {
2490
+ result[metric.name] = metric.values;
2491
+ }
2492
+ }
2493
+ return result;
2494
+ }
2495
+ getContentType() {
2496
+ return this.registry.contentType;
2497
+ }
2498
+ };
2499
+
2500
+ // src/system/SystemManager.ts
2501
+ var SystemManager = class {
2502
+ constructor(cluster, metrics, getMap) {
2503
+ this.cluster = cluster;
2504
+ this.metrics = metrics;
2505
+ this.getMap = getMap;
2506
+ }
2507
+ start() {
2508
+ this.setupClusterMap();
2509
+ this.setupStatsMap();
2510
+ this.setupMapsMap();
2511
+ this.statsInterval = setInterval(() => this.updateStats(), 5e3);
2512
+ this.cluster.on("memberJoined", () => this.updateClusterMap());
2513
+ this.cluster.on("memberLeft", () => this.updateClusterMap());
2514
+ this.updateClusterMap();
2515
+ this.updateStats();
2516
+ }
2517
+ stop() {
2518
+ if (this.statsInterval) {
2519
+ clearInterval(this.statsInterval);
2520
+ }
2521
+ }
2522
+ notifyMapCreated(mapName) {
2523
+ if (mapName.startsWith("$sys/")) return;
1360
2524
  this.updateMapsMap(mapName);
1361
2525
  }
1362
2526
  setupClusterMap() {
@@ -1411,8 +2575,8 @@ var SystemManager = class {
1411
2575
  };
1412
2576
 
1413
2577
  // src/utils/BoundedEventQueue.ts
1414
- import { EventEmitter as EventEmitter3 } from "events";
1415
- var BoundedEventQueue = class extends EventEmitter3 {
2578
+ import { EventEmitter as EventEmitter6 } from "events";
2579
+ var BoundedEventQueue = class extends EventEmitter6 {
1416
2580
  constructor(options) {
1417
2581
  super();
1418
2582
  this.queue = [];
@@ -1844,7 +3008,7 @@ var BackpressureRegulator = class {
1844
3008
 
1845
3009
  // src/utils/CoalescingWriter.ts
1846
3010
  import { WebSocket as WebSocket2 } from "ws";
1847
- import { serialize as serialize2 } from "@topgunbuild/core";
3011
+ import { serialize as serialize3 } from "@topgunbuild/core";
1848
3012
 
1849
3013
  // src/memory/BufferPool.ts
1850
3014
  var DEFAULT_CONFIG2 = {
@@ -2375,7 +3539,7 @@ var CoalescingWriter = class {
2375
3539
  if (this.closed) {
2376
3540
  return;
2377
3541
  }
2378
- const data = serialize2(message);
3542
+ const data = serialize3(message);
2379
3543
  this.writeRaw(data, urgent);
2380
3544
  }
2381
3545
  /**
@@ -2559,7 +3723,7 @@ var CoalescingWriter = class {
2559
3723
  offset += msg.data.length;
2560
3724
  }
2561
3725
  const usedBatch = batch.subarray(0, totalSize);
2562
- const batchEnvelope = serialize2({
3726
+ const batchEnvelope = serialize3({
2563
3727
  type: "BATCH",
2564
3728
  count: messages.length,
2565
3729
  data: usedBatch
@@ -2574,13 +3738,23 @@ var CoalescingWriter = class {
2574
3738
  // src/utils/coalescingPresets.ts
2575
3739
  var coalescingPresets = {
2576
3740
  /**
2577
- * Conservative defaults - good for low-latency workloads.
2578
- * Minimizes batching delay at the cost of more network calls.
2579
- * Use for: gaming, real-time chat, interactive applications.
3741
+ * Low latency - optimized for minimal response time.
3742
+ * Best for: gaming, real-time chat, interactive applications.
3743
+ * Benchmark: p50=2ms, ~18K ops/sec
3744
+ */
3745
+ lowLatency: {
3746
+ maxBatchSize: 100,
3747
+ maxDelayMs: 1,
3748
+ maxBatchBytes: 65536
3749
+ // 64KB
3750
+ },
3751
+ /**
3752
+ * Conservative - good balance of latency and batching.
3753
+ * Use for: general purpose with latency sensitivity.
2580
3754
  */
2581
3755
  conservative: {
2582
3756
  maxBatchSize: 100,
2583
- maxDelayMs: 5,
3757
+ maxDelayMs: 2,
2584
3758
  maxBatchBytes: 65536
2585
3759
  // 64KB
2586
3760
  },
@@ -2591,7 +3765,7 @@ var coalescingPresets = {
2591
3765
  */
2592
3766
  balanced: {
2593
3767
  maxBatchSize: 300,
2594
- maxDelayMs: 8,
3768
+ maxDelayMs: 2,
2595
3769
  maxBatchBytes: 131072
2596
3770
  // 128KB
2597
3771
  },
@@ -2599,10 +3773,11 @@ var coalescingPresets = {
2599
3773
  * High throughput - optimized for write-heavy workloads.
2600
3774
  * Higher batching for better network utilization.
2601
3775
  * Use for: data ingestion, logging, IoT data streams.
3776
+ * Benchmark: p50=7ms, ~18K ops/sec
2602
3777
  */
2603
3778
  highThroughput: {
2604
3779
  maxBatchSize: 500,
2605
- maxDelayMs: 10,
3780
+ maxDelayMs: 2,
2606
3781
  maxBatchBytes: 262144
2607
3782
  // 256KB
2608
3783
  },
@@ -2613,7 +3788,7 @@ var coalescingPresets = {
2613
3788
  */
2614
3789
  aggressive: {
2615
3790
  maxBatchSize: 1e3,
2616
- maxDelayMs: 15,
3791
+ maxDelayMs: 5,
2617
3792
  maxBatchBytes: 524288
2618
3793
  // 512KB
2619
3794
  }
@@ -3144,6 +4319,7 @@ var WorkerPool = class {
3144
4319
 
3145
4320
  // src/workers/MerkleWorker.ts
3146
4321
  import { join as join2 } from "path";
4322
+ import { hashString as coreHashString } from "@topgunbuild/core";
3147
4323
  var WORKER_THRESHOLD = 10;
3148
4324
  var taskIdCounter = 0;
3149
4325
  function generateTaskId() {
@@ -3353,12 +4529,7 @@ var MerkleWorker = class {
3353
4529
  }
3354
4530
  // ============ Hash utilities ============
3355
4531
  hashString(str) {
3356
- let hash = 2166136261;
3357
- for (let i = 0; i < str.length; i++) {
3358
- hash ^= str.charCodeAt(i);
3359
- hash = Math.imul(hash, 16777619);
3360
- }
3361
- return hash >>> 0;
4532
+ return coreHashString(str);
3362
4533
  }
3363
4534
  buildTree(entries, depth) {
3364
4535
  const root = { hash: 0, children: {} };
@@ -3857,726 +5028,1387 @@ var SharedMemoryManager = class {
3857
5028
  return true;
3858
5029
  }
3859
5030
  /**
3860
- * Read data length from a slot.
5031
+ * Read data length from a slot.
5032
+ */
5033
+ getDataLength(slotIndex) {
5034
+ const lengthView = new DataView(
5035
+ this.buffer,
5036
+ this.getLengthOffset(slotIndex),
5037
+ 4
5038
+ );
5039
+ return lengthView.getUint32(0, true);
5040
+ }
5041
+ /**
5042
+ * Get data view for a slot (for reading).
5043
+ */
5044
+ getDataView(slotIndex) {
5045
+ const length = this.getDataLength(slotIndex);
5046
+ const dataOffset = this.getDataOffset(slotIndex);
5047
+ return new Uint8Array(this.buffer, dataOffset, length);
5048
+ }
5049
+ /**
5050
+ * Get slot status.
5051
+ */
5052
+ getStatus(slotIndex) {
5053
+ return Atomics.load(this.statusArray, this.getStatusOffset(slotIndex));
5054
+ }
5055
+ /**
5056
+ * Wait for a specific status with timeout.
5057
+ * Returns the actual status (may differ if timeout occurred).
5058
+ */
5059
+ waitForStatus(slotIndex, expectedStatus, timeoutMs = 5e3) {
5060
+ const statusOffset = this.getStatusOffset(slotIndex);
5061
+ const deadline = Date.now() + timeoutMs;
5062
+ while (Date.now() < deadline) {
5063
+ const status = Atomics.load(this.statusArray, statusOffset);
5064
+ if (status === expectedStatus || status === 255 /* ERROR */) {
5065
+ return status;
5066
+ }
5067
+ const remaining = deadline - Date.now();
5068
+ if (remaining > 0) {
5069
+ Atomics.wait(
5070
+ this.statusArray,
5071
+ statusOffset,
5072
+ status,
5073
+ Math.min(remaining, 100)
5074
+ );
5075
+ }
5076
+ }
5077
+ return Atomics.load(this.statusArray, statusOffset);
5078
+ }
5079
+ /**
5080
+ * Wait for result and read it.
5081
+ * Returns null on timeout or error.
5082
+ */
5083
+ waitForResult(slot, timeoutMs = 5e3) {
5084
+ const status = this.waitForStatus(
5085
+ slot.index,
5086
+ 4 /* RESULT_READY */,
5087
+ timeoutMs
5088
+ );
5089
+ if (status === 4 /* RESULT_READY */) {
5090
+ const length = this.getDataLength(slot.index);
5091
+ const result = new Uint8Array(length);
5092
+ result.set(slot.dataView.subarray(0, length));
5093
+ return result;
5094
+ }
5095
+ return null;
5096
+ }
5097
+ /**
5098
+ * Get the SharedArrayBuffer for passing to workers.
5099
+ */
5100
+ getBuffer() {
5101
+ return this.buffer;
5102
+ }
5103
+ /**
5104
+ * Get configuration needed by workers.
5105
+ */
5106
+ getWorkerConfig() {
5107
+ return {
5108
+ sharedBuffer: this.buffer,
5109
+ slotSize: this.slotSize,
5110
+ slotCount: this.slotCount,
5111
+ metadataSize: this.metadataSize
5112
+ };
5113
+ }
5114
+ /**
5115
+ * Get statistics.
5116
+ */
5117
+ getStats() {
5118
+ return {
5119
+ totalSize: this.buffer.byteLength,
5120
+ slotCount: this.slotCount,
5121
+ slotSize: this.slotSize,
5122
+ allocatedSlots: this.allocatedCount,
5123
+ availableSlots: this.freeSlots.size,
5124
+ peakUsage: this.peakUsage,
5125
+ totalAllocations: this.totalAllocations,
5126
+ totalReleases: this.totalReleases
5127
+ };
5128
+ }
5129
+ /**
5130
+ * Check if SharedArrayBuffer is available in current environment.
5131
+ */
5132
+ static isAvailable() {
5133
+ try {
5134
+ new SharedArrayBuffer(1);
5135
+ return true;
5136
+ } catch {
5137
+ return false;
5138
+ }
5139
+ }
5140
+ /**
5141
+ * Shutdown and release resources.
5142
+ * Resets all slots to FREE status.
5143
+ */
5144
+ shutdown() {
5145
+ for (let i = 0; i < this.slotCount; i++) {
5146
+ Atomics.store(this.statusArray, this.getStatusOffset(i), 0 /* FREE */);
5147
+ }
5148
+ this.freeSlots.clear();
5149
+ for (let i = 0; i < this.slotCount; i++) {
5150
+ this.freeSlots.add(i);
5151
+ }
5152
+ this.allocatedCount = 0;
5153
+ }
5154
+ };
5155
+
5156
+ // src/tasklet/TaskletScheduler.ts
5157
+ var DEFAULT_CONFIG4 = {
5158
+ defaultTimeBudgetMs: 5,
5159
+ maxConcurrent: 10,
5160
+ tickIntervalMs: 1,
5161
+ metricsEnabled: true
5162
+ };
5163
+ var TaskletScheduler = class {
5164
+ constructor(config) {
5165
+ this.activeTasklets = /* @__PURE__ */ new Map();
5166
+ this.tickTimer = null;
5167
+ this.isRunning = false;
5168
+ this.isShuttingDown = false;
5169
+ // Metrics
5170
+ this.totalScheduled = 0;
5171
+ this.completedTasklets = 0;
5172
+ this.cancelledTasklets = 0;
5173
+ this.totalIterations = 0;
5174
+ this.singleIterationCompletions = 0;
5175
+ this.totalExecutionTimeMs = 0;
5176
+ this.config = { ...DEFAULT_CONFIG4, ...config };
5177
+ }
5178
+ /**
5179
+ * Schedule a tasklet for execution.
5180
+ * Returns a promise that resolves when the tasklet completes.
5181
+ */
5182
+ schedule(tasklet) {
5183
+ if (this.isShuttingDown) {
5184
+ return Promise.reject(new Error("Scheduler is shutting down"));
5185
+ }
5186
+ return new Promise((resolve, reject) => {
5187
+ const taskletId = `${tasklet.name}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
5188
+ if (this.activeTasklets.size >= this.config.maxConcurrent) {
5189
+ reject(new Error(`Max concurrent tasklets (${this.config.maxConcurrent}) reached`));
5190
+ return;
5191
+ }
5192
+ const state = {
5193
+ tasklet,
5194
+ resolve,
5195
+ reject,
5196
+ iterations: 0,
5197
+ startTime: Date.now(),
5198
+ lastProgressTime: Date.now()
5199
+ };
5200
+ this.activeTasklets.set(taskletId, state);
5201
+ this.totalScheduled++;
5202
+ if (!this.isRunning) {
5203
+ this.startScheduler();
5204
+ }
5205
+ });
5206
+ }
5207
+ /**
5208
+ * Run a tasklet synchronously (blocking).
5209
+ * Useful for small operations or when cooperative scheduling isn't needed.
5210
+ */
5211
+ runSync(tasklet) {
5212
+ let state;
5213
+ let iterations = 0;
5214
+ const startTime = Date.now();
5215
+ do {
5216
+ state = tasklet.call();
5217
+ iterations++;
5218
+ } while (state === "MADE_PROGRESS");
5219
+ if (state === "NO_PROGRESS") {
5220
+ throw new Error(`Tasklet ${tasklet.name} made no progress`);
5221
+ }
5222
+ if (this.config.metricsEnabled) {
5223
+ this.totalIterations += iterations;
5224
+ this.totalExecutionTimeMs += Date.now() - startTime;
5225
+ if (iterations === 1) {
5226
+ this.singleIterationCompletions++;
5227
+ }
5228
+ }
5229
+ return tasklet.getResult();
5230
+ }
5231
+ /**
5232
+ * Cancel a specific tasklet by name pattern.
5233
+ * Returns the number of tasklets cancelled.
5234
+ */
5235
+ cancel(namePattern) {
5236
+ let cancelled = 0;
5237
+ const pattern = typeof namePattern === "string" ? new RegExp(`^${namePattern}`) : namePattern;
5238
+ for (const [id, state] of this.activeTasklets) {
5239
+ if (pattern.test(state.tasklet.name)) {
5240
+ this.cancelTasklet(id, state);
5241
+ cancelled++;
5242
+ }
5243
+ }
5244
+ return cancelled;
5245
+ }
5246
+ /**
5247
+ * Cancel all running tasklets.
5248
+ */
5249
+ cancelAll() {
5250
+ let cancelled = 0;
5251
+ for (const [id, state] of this.activeTasklets) {
5252
+ this.cancelTasklet(id, state);
5253
+ cancelled++;
5254
+ }
5255
+ return cancelled;
5256
+ }
5257
+ /**
5258
+ * Get scheduler statistics.
5259
+ */
5260
+ getStats() {
5261
+ return {
5262
+ totalScheduled: this.totalScheduled,
5263
+ activeTasklets: this.activeTasklets.size,
5264
+ completedTasklets: this.completedTasklets,
5265
+ cancelledTasklets: this.cancelledTasklets,
5266
+ totalIterations: this.totalIterations,
5267
+ avgIterationsPerTasklet: this.completedTasklets > 0 ? this.totalIterations / this.completedTasklets : 0,
5268
+ singleIterationCompletions: this.singleIterationCompletions,
5269
+ totalExecutionTimeMs: this.totalExecutionTimeMs
5270
+ };
5271
+ }
5272
+ /**
5273
+ * Reset statistics.
3861
5274
  */
3862
- getDataLength(slotIndex) {
3863
- const lengthView = new DataView(
3864
- this.buffer,
3865
- this.getLengthOffset(slotIndex),
3866
- 4
3867
- );
3868
- return lengthView.getUint32(0, true);
5275
+ resetStats() {
5276
+ this.totalScheduled = 0;
5277
+ this.completedTasklets = 0;
5278
+ this.cancelledTasklets = 0;
5279
+ this.totalIterations = 0;
5280
+ this.singleIterationCompletions = 0;
5281
+ this.totalExecutionTimeMs = 0;
3869
5282
  }
3870
5283
  /**
3871
- * Get data view for a slot (for reading).
5284
+ * Shutdown the scheduler.
5285
+ * Cancels all running tasklets and stops the tick timer.
3872
5286
  */
3873
- getDataView(slotIndex) {
3874
- const length = this.getDataLength(slotIndex);
3875
- const dataOffset = this.getDataOffset(slotIndex);
3876
- return new Uint8Array(this.buffer, dataOffset, length);
5287
+ shutdown() {
5288
+ this.isShuttingDown = true;
5289
+ this.cancelAll();
5290
+ this.stopScheduler();
3877
5291
  }
3878
5292
  /**
3879
- * Get slot status.
5293
+ * Check if scheduler is running.
3880
5294
  */
3881
- getStatus(slotIndex) {
3882
- return Atomics.load(this.statusArray, this.getStatusOffset(slotIndex));
5295
+ get running() {
5296
+ return this.isRunning;
3883
5297
  }
3884
5298
  /**
3885
- * Wait for a specific status with timeout.
3886
- * Returns the actual status (may differ if timeout occurred).
5299
+ * Get number of active tasklets.
3887
5300
  */
3888
- waitForStatus(slotIndex, expectedStatus, timeoutMs = 5e3) {
3889
- const statusOffset = this.getStatusOffset(slotIndex);
3890
- const deadline = Date.now() + timeoutMs;
3891
- while (Date.now() < deadline) {
3892
- const status = Atomics.load(this.statusArray, statusOffset);
3893
- if (status === expectedStatus || status === 255 /* ERROR */) {
3894
- return status;
5301
+ get activeCount() {
5302
+ return this.activeTasklets.size;
5303
+ }
5304
+ startScheduler() {
5305
+ if (this.isRunning) return;
5306
+ this.isRunning = true;
5307
+ this.scheduleTick();
5308
+ }
5309
+ stopScheduler() {
5310
+ this.isRunning = false;
5311
+ if (this.tickTimer) {
5312
+ clearImmediate(this.tickTimer);
5313
+ this.tickTimer = null;
5314
+ }
5315
+ }
5316
+ scheduleTick() {
5317
+ if (!this.isRunning) return;
5318
+ this.tickTimer = setImmediate(() => {
5319
+ this.tick();
5320
+ });
5321
+ }
5322
+ tick() {
5323
+ if (!this.isRunning || this.activeTasklets.size === 0) {
5324
+ this.stopScheduler();
5325
+ return;
5326
+ }
5327
+ const tickStart = Date.now();
5328
+ const taskletIds = Array.from(this.activeTasklets.keys());
5329
+ for (const id of taskletIds) {
5330
+ const state = this.activeTasklets.get(id);
5331
+ if (!state) continue;
5332
+ try {
5333
+ const iterationStart = Date.now();
5334
+ const result = state.tasklet.call();
5335
+ const iterationTime = Date.now() - iterationStart;
5336
+ state.iterations++;
5337
+ state.lastProgressTime = Date.now();
5338
+ if (this.config.metricsEnabled) {
5339
+ this.totalIterations++;
5340
+ this.totalExecutionTimeMs += iterationTime;
5341
+ }
5342
+ if (result === "DONE") {
5343
+ this.completeTasklet(id, state);
5344
+ } else if (result === "NO_PROGRESS") {
5345
+ }
5346
+ } catch (error) {
5347
+ this.failTasklet(id, state, error);
3895
5348
  }
3896
- const remaining = deadline - Date.now();
3897
- if (remaining > 0) {
3898
- Atomics.wait(
3899
- this.statusArray,
3900
- statusOffset,
3901
- status,
3902
- Math.min(remaining, 100)
3903
- );
5349
+ if (Date.now() - tickStart > this.config.defaultTimeBudgetMs * 2) {
5350
+ break;
3904
5351
  }
3905
5352
  }
3906
- return Atomics.load(this.statusArray, statusOffset);
5353
+ if (this.activeTasklets.size > 0) {
5354
+ this.scheduleTick();
5355
+ } else {
5356
+ this.stopScheduler();
5357
+ }
3907
5358
  }
3908
- /**
3909
- * Wait for result and read it.
3910
- * Returns null on timeout or error.
3911
- */
3912
- waitForResult(slot, timeoutMs = 5e3) {
3913
- const status = this.waitForStatus(
3914
- slot.index,
3915
- 4 /* RESULT_READY */,
3916
- timeoutMs
3917
- );
3918
- if (status === 4 /* RESULT_READY */) {
3919
- const length = this.getDataLength(slot.index);
3920
- const result = new Uint8Array(length);
3921
- result.set(slot.dataView.subarray(0, length));
3922
- return result;
5359
+ completeTasklet(id, state) {
5360
+ this.activeTasklets.delete(id);
5361
+ this.completedTasklets++;
5362
+ if (state.iterations === 1) {
5363
+ this.singleIterationCompletions++;
3923
5364
  }
3924
- return null;
5365
+ try {
5366
+ const result = state.tasklet.getResult();
5367
+ state.resolve(result);
5368
+ } catch (error) {
5369
+ state.reject(error);
5370
+ }
5371
+ }
5372
+ failTasklet(id, state, error) {
5373
+ this.activeTasklets.delete(id);
5374
+ state.reject(error);
5375
+ }
5376
+ cancelTasklet(id, state) {
5377
+ this.activeTasklets.delete(id);
5378
+ this.cancelledTasklets++;
5379
+ if (state.tasklet.onCancel) {
5380
+ try {
5381
+ state.tasklet.onCancel();
5382
+ } catch {
5383
+ }
5384
+ }
5385
+ state.reject(new Error(`Tasklet ${state.tasklet.name} was cancelled`));
5386
+ }
5387
+ };
5388
+
5389
+ // src/tasklet/tasklets/IteratorTasklet.ts
5390
+ var DEFAULT_CONFIG5 = {
5391
+ timeBudgetMs: 5,
5392
+ maxItemsPerIteration: 1e3
5393
+ };
5394
+ var IteratorTasklet = class {
5395
+ constructor(iterator, config) {
5396
+ this.itemsProcessed = 0;
5397
+ this.isDone = false;
5398
+ this.iterator = iterator;
5399
+ this.config = { ...DEFAULT_CONFIG5, ...config };
3925
5400
  }
3926
5401
  /**
3927
- * Get the SharedArrayBuffer for passing to workers.
5402
+ * Execute one chunk of iteration.
3928
5403
  */
3929
- getBuffer() {
3930
- return this.buffer;
5404
+ call() {
5405
+ if (this.isDone) {
5406
+ return "DONE";
5407
+ }
5408
+ const deadline = Date.now() + this.config.timeBudgetMs;
5409
+ let processedThisIteration = 0;
5410
+ while (Date.now() < deadline && processedThisIteration < this.config.maxItemsPerIteration) {
5411
+ const { value, done } = this.iterator.next();
5412
+ if (done) {
5413
+ this.isDone = true;
5414
+ return "DONE";
5415
+ }
5416
+ this.processItem(value);
5417
+ this.itemsProcessed++;
5418
+ processedThisIteration++;
5419
+ }
5420
+ return "MADE_PROGRESS";
3931
5421
  }
3932
5422
  /**
3933
- * Get configuration needed by workers.
5423
+ * Called when tasklet is cancelled.
3934
5424
  */
3935
- getWorkerConfig() {
3936
- return {
3937
- sharedBuffer: this.buffer,
3938
- slotSize: this.slotSize,
3939
- slotCount: this.slotCount,
3940
- metadataSize: this.metadataSize
3941
- };
5425
+ onCancel() {
3942
5426
  }
3943
5427
  /**
3944
- * Get statistics.
5428
+ * Get number of items processed so far.
3945
5429
  */
3946
- getStats() {
3947
- return {
3948
- totalSize: this.buffer.byteLength,
3949
- slotCount: this.slotCount,
3950
- slotSize: this.slotSize,
3951
- allocatedSlots: this.allocatedCount,
3952
- availableSlots: this.freeSlots.size,
3953
- peakUsage: this.peakUsage,
3954
- totalAllocations: this.totalAllocations,
3955
- totalReleases: this.totalReleases
3956
- };
5430
+ get processed() {
5431
+ return this.itemsProcessed;
5432
+ }
5433
+ };
5434
+ var FilterTasklet = class extends IteratorTasklet {
5435
+ constructor(name, iterator, predicate, config) {
5436
+ super(iterator, config);
5437
+ this.results = [];
5438
+ this.name = name;
5439
+ this.predicate = predicate;
5440
+ }
5441
+ processItem(item) {
5442
+ if (this.predicate(item)) {
5443
+ this.results.push(item);
5444
+ }
5445
+ }
5446
+ getResult() {
5447
+ return this.results;
5448
+ }
5449
+ };
5450
+ var MapTasklet = class extends IteratorTasklet {
5451
+ constructor(name, iterator, mapper, config) {
5452
+ super(iterator, config);
5453
+ this.results = [];
5454
+ this.name = name;
5455
+ this.mapper = mapper;
5456
+ }
5457
+ processItem(item) {
5458
+ this.results.push(this.mapper(item));
5459
+ }
5460
+ getResult() {
5461
+ return this.results;
5462
+ }
5463
+ };
5464
+ var ForEachTasklet = class extends IteratorTasklet {
5465
+ constructor(name, iterator, action, config) {
5466
+ super(iterator, config);
5467
+ this.name = name;
5468
+ this.action = action;
5469
+ }
5470
+ processItem(item) {
5471
+ this.action(item);
5472
+ }
5473
+ getResult() {
5474
+ return this.itemsProcessed;
3957
5475
  }
3958
- /**
3959
- * Check if SharedArrayBuffer is available in current environment.
3960
- */
3961
- static isAvailable() {
3962
- try {
3963
- new SharedArrayBuffer(1);
3964
- return true;
3965
- } catch {
3966
- return false;
3967
- }
5476
+ };
5477
+ var ReduceTasklet = class extends IteratorTasklet {
5478
+ constructor(name, iterator, initialValue, reducer, config) {
5479
+ super(iterator, config);
5480
+ this.name = name;
5481
+ this.accumulator = initialValue;
5482
+ this.reducer = reducer;
3968
5483
  }
3969
- /**
3970
- * Shutdown and release resources.
3971
- * Resets all slots to FREE status.
3972
- */
3973
- shutdown() {
3974
- for (let i = 0; i < this.slotCount; i++) {
3975
- Atomics.store(this.statusArray, this.getStatusOffset(i), 0 /* FREE */);
3976
- }
3977
- this.freeSlots.clear();
3978
- for (let i = 0; i < this.slotCount; i++) {
3979
- this.freeSlots.add(i);
3980
- }
3981
- this.allocatedCount = 0;
5484
+ processItem(item) {
5485
+ this.accumulator = this.reducer(this.accumulator, item);
5486
+ }
5487
+ getResult() {
5488
+ return this.accumulator;
3982
5489
  }
3983
5490
  };
3984
5491
 
3985
- // src/tasklet/TaskletScheduler.ts
3986
- var DEFAULT_CONFIG4 = {
3987
- defaultTimeBudgetMs: 5,
3988
- maxConcurrent: 10,
3989
- tickIntervalMs: 1,
3990
- metricsEnabled: true
3991
- };
3992
- var TaskletScheduler = class {
5492
+ // src/ack/WriteAckManager.ts
5493
+ import { EventEmitter as EventEmitter7 } from "events";
5494
+ import {
5495
+ WriteConcern,
5496
+ DEFAULT_WRITE_CONCERN_TIMEOUT,
5497
+ isWriteConcernAchieved,
5498
+ getHighestWriteConcernLevel
5499
+ } from "@topgunbuild/core";
5500
+ var WriteAckManager = class extends EventEmitter7 {
3993
5501
  constructor(config) {
3994
- this.activeTasklets = /* @__PURE__ */ new Map();
3995
- this.tickTimer = null;
3996
- this.isRunning = false;
3997
- this.isShuttingDown = false;
3998
- // Metrics
3999
- this.totalScheduled = 0;
4000
- this.completedTasklets = 0;
4001
- this.cancelledTasklets = 0;
4002
- this.totalIterations = 0;
4003
- this.singleIterationCompletions = 0;
4004
- this.totalExecutionTimeMs = 0;
4005
- this.config = { ...DEFAULT_CONFIG4, ...config };
5502
+ super();
5503
+ this.pending = /* @__PURE__ */ new Map();
5504
+ this.defaultTimeout = config?.defaultTimeout ?? DEFAULT_WRITE_CONCERN_TIMEOUT;
4006
5505
  }
4007
5506
  /**
4008
- * Schedule a tasklet for execution.
4009
- * Returns a promise that resolves when the tasklet completes.
5507
+ * Register a pending write operation.
5508
+ * Returns a promise that resolves when target Write Concern is achieved.
5509
+ *
5510
+ * @param opId - Operation ID
5511
+ * @param writeConcern - Target Write Concern level
5512
+ * @param timeout - Optional timeout in ms (defaults to config or 5000ms)
5513
+ * @returns Promise that resolves with WriteResult
4010
5514
  */
4011
- schedule(tasklet) {
4012
- if (this.isShuttingDown) {
4013
- return Promise.reject(new Error("Scheduler is shutting down"));
5515
+ registerPending(opId, writeConcern, timeout) {
5516
+ if (writeConcern === WriteConcern.FIRE_AND_FORGET) {
5517
+ return Promise.resolve({
5518
+ success: true,
5519
+ opId,
5520
+ achievedLevel: WriteConcern.FIRE_AND_FORGET,
5521
+ latencyMs: 0
5522
+ });
4014
5523
  }
4015
5524
  return new Promise((resolve, reject) => {
4016
- const taskletId = `${tasklet.name}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
4017
- if (this.activeTasklets.size >= this.config.maxConcurrent) {
4018
- reject(new Error(`Max concurrent tasklets (${this.config.maxConcurrent}) reached`));
4019
- return;
4020
- }
4021
- const state = {
4022
- tasklet,
5525
+ const effectiveTimeout = timeout ?? this.defaultTimeout;
5526
+ const timestamp = Date.now();
5527
+ const pendingWrite = {
5528
+ opId,
5529
+ writeConcern,
5530
+ timestamp,
5531
+ timeout: effectiveTimeout,
4023
5532
  resolve,
4024
5533
  reject,
4025
- iterations: 0,
4026
- startTime: Date.now(),
4027
- lastProgressTime: Date.now()
5534
+ achievedLevels: /* @__PURE__ */ new Set([WriteConcern.FIRE_AND_FORGET])
4028
5535
  };
4029
- this.activeTasklets.set(taskletId, state);
4030
- this.totalScheduled++;
4031
- if (!this.isRunning) {
4032
- this.startScheduler();
5536
+ pendingWrite.timeoutHandle = setTimeout(() => {
5537
+ this.handleTimeout(opId);
5538
+ }, effectiveTimeout);
5539
+ this.pending.set(opId, pendingWrite);
5540
+ logger.debug(
5541
+ { opId, writeConcern, timeout: effectiveTimeout },
5542
+ "Registered pending write"
5543
+ );
5544
+ if (writeConcern === WriteConcern.MEMORY) {
5545
+ this.notifyLevel(opId, WriteConcern.MEMORY);
4033
5546
  }
4034
5547
  });
4035
5548
  }
4036
5549
  /**
4037
- * Run a tasklet synchronously (blocking).
4038
- * Useful for small operations or when cooperative scheduling isn't needed.
5550
+ * Notify that a Write Concern level has been achieved for an operation.
5551
+ *
5552
+ * @param opId - Operation ID
5553
+ * @param level - Write Concern level that was achieved
4039
5554
  */
4040
- runSync(tasklet) {
4041
- let state;
4042
- let iterations = 0;
4043
- const startTime = Date.now();
4044
- do {
4045
- state = tasklet.call();
4046
- iterations++;
4047
- } while (state === "MADE_PROGRESS");
4048
- if (state === "NO_PROGRESS") {
4049
- throw new Error(`Tasklet ${tasklet.name} made no progress`);
5555
+ notifyLevel(opId, level) {
5556
+ const pending = this.pending.get(opId);
5557
+ if (!pending) {
5558
+ return;
4050
5559
  }
4051
- if (this.config.metricsEnabled) {
4052
- this.totalIterations += iterations;
4053
- this.totalExecutionTimeMs += Date.now() - startTime;
4054
- if (iterations === 1) {
4055
- this.singleIterationCompletions++;
4056
- }
5560
+ pending.achievedLevels.add(level);
5561
+ logger.debug(
5562
+ { opId, level, target: pending.writeConcern },
5563
+ "Write Concern level achieved"
5564
+ );
5565
+ if (isWriteConcernAchieved(pending.achievedLevels, pending.writeConcern)) {
5566
+ this.resolvePending(opId, level);
4057
5567
  }
4058
- return tasklet.getResult();
4059
5568
  }
4060
5569
  /**
4061
- * Cancel a specific tasklet by name pattern.
4062
- * Returns the number of tasklets cancelled.
5570
+ * Notify multiple operations that a Write Concern level has been achieved.
5571
+ * Useful for batch operations.
5572
+ *
5573
+ * @param opIds - Array of operation IDs
5574
+ * @param level - Write Concern level that was achieved
4063
5575
  */
4064
- cancel(namePattern) {
4065
- let cancelled = 0;
4066
- const pattern = typeof namePattern === "string" ? new RegExp(`^${namePattern}`) : namePattern;
4067
- for (const [id, state] of this.activeTasklets) {
4068
- if (pattern.test(state.tasklet.name)) {
4069
- this.cancelTasklet(id, state);
4070
- cancelled++;
4071
- }
5576
+ notifyLevelBatch(opIds, level) {
5577
+ for (const opId of opIds) {
5578
+ this.notifyLevel(opId, level);
4072
5579
  }
4073
- return cancelled;
4074
5580
  }
4075
5581
  /**
4076
- * Cancel all running tasklets.
5582
+ * Check if an operation is still pending.
5583
+ *
5584
+ * @param opId - Operation ID
5585
+ * @returns true if operation is pending
4077
5586
  */
4078
- cancelAll() {
4079
- let cancelled = 0;
4080
- for (const [id, state] of this.activeTasklets) {
4081
- this.cancelTasklet(id, state);
4082
- cancelled++;
5587
+ isPending(opId) {
5588
+ return this.pending.has(opId);
5589
+ }
5590
+ /**
5591
+ * Get the target Write Concern level for a pending operation.
5592
+ *
5593
+ * @param opId - Operation ID
5594
+ * @returns Target Write Concern level or undefined if not pending
5595
+ */
5596
+ getTargetLevel(opId) {
5597
+ return this.pending.get(opId)?.writeConcern;
5598
+ }
5599
+ /**
5600
+ * Get the highest achieved level for a pending operation.
5601
+ *
5602
+ * @param opId - Operation ID
5603
+ * @returns Highest achieved level or undefined if not pending
5604
+ */
5605
+ getAchievedLevel(opId) {
5606
+ const pending = this.pending.get(opId);
5607
+ if (!pending) return void 0;
5608
+ return getHighestWriteConcernLevel(pending.achievedLevels);
5609
+ }
5610
+ /**
5611
+ * Resolve a pending write with success.
5612
+ */
5613
+ resolvePending(opId, achievedLevel) {
5614
+ const pending = this.pending.get(opId);
5615
+ if (!pending) return;
5616
+ if (pending.timeoutHandle) {
5617
+ clearTimeout(pending.timeoutHandle);
4083
5618
  }
4084
- return cancelled;
5619
+ const latencyMs = Date.now() - pending.timestamp;
5620
+ const result = {
5621
+ success: true,
5622
+ opId,
5623
+ achievedLevel,
5624
+ latencyMs
5625
+ };
5626
+ pending.resolve(result);
5627
+ this.pending.delete(opId);
5628
+ logger.debug(
5629
+ { opId, achievedLevel, latencyMs },
5630
+ "Write resolved successfully"
5631
+ );
5632
+ this.emit("resolved", result);
4085
5633
  }
4086
5634
  /**
4087
- * Get scheduler statistics.
5635
+ * Handle timeout for a pending write.
4088
5636
  */
4089
- getStats() {
4090
- return {
4091
- totalScheduled: this.totalScheduled,
4092
- activeTasklets: this.activeTasklets.size,
4093
- completedTasklets: this.completedTasklets,
4094
- cancelledTasklets: this.cancelledTasklets,
4095
- totalIterations: this.totalIterations,
4096
- avgIterationsPerTasklet: this.completedTasklets > 0 ? this.totalIterations / this.completedTasklets : 0,
4097
- singleIterationCompletions: this.singleIterationCompletions,
4098
- totalExecutionTimeMs: this.totalExecutionTimeMs
5637
+ handleTimeout(opId) {
5638
+ const pending = this.pending.get(opId);
5639
+ if (!pending) return;
5640
+ const highestAchieved = getHighestWriteConcernLevel(pending.achievedLevels);
5641
+ const latencyMs = Date.now() - pending.timestamp;
5642
+ const result = {
5643
+ success: false,
5644
+ opId,
5645
+ achievedLevel: highestAchieved,
5646
+ latencyMs,
5647
+ error: `Timeout: achieved ${highestAchieved}, requested ${pending.writeConcern}`
4099
5648
  };
5649
+ pending.resolve(result);
5650
+ this.pending.delete(opId);
5651
+ logger.warn(
5652
+ { opId, requested: pending.writeConcern, achieved: highestAchieved, latencyMs },
5653
+ "Write timed out"
5654
+ );
5655
+ this.emit("timeout", {
5656
+ opId,
5657
+ requested: pending.writeConcern,
5658
+ achieved: highestAchieved,
5659
+ latencyMs
5660
+ });
4100
5661
  }
4101
5662
  /**
4102
- * Reset statistics.
5663
+ * Fail a pending write with an error.
5664
+ *
5665
+ * @param opId - Operation ID
5666
+ * @param error - Error message
4103
5667
  */
4104
- resetStats() {
4105
- this.totalScheduled = 0;
4106
- this.completedTasklets = 0;
4107
- this.cancelledTasklets = 0;
4108
- this.totalIterations = 0;
4109
- this.singleIterationCompletions = 0;
4110
- this.totalExecutionTimeMs = 0;
5668
+ failPending(opId, error) {
5669
+ const pending = this.pending.get(opId);
5670
+ if (!pending) return;
5671
+ if (pending.timeoutHandle) {
5672
+ clearTimeout(pending.timeoutHandle);
5673
+ }
5674
+ const latencyMs = Date.now() - pending.timestamp;
5675
+ const highestAchieved = getHighestWriteConcernLevel(pending.achievedLevels);
5676
+ const result = {
5677
+ success: false,
5678
+ opId,
5679
+ achievedLevel: highestAchieved,
5680
+ latencyMs,
5681
+ error
5682
+ };
5683
+ pending.resolve(result);
5684
+ this.pending.delete(opId);
5685
+ logger.error({ opId, error, latencyMs }, "Write failed");
5686
+ this.emit("failed", result);
4111
5687
  }
4112
5688
  /**
4113
- * Shutdown the scheduler.
4114
- * Cancels all running tasklets and stops the tick timer.
5689
+ * Get pending writes statistics.
4115
5690
  */
4116
- shutdown() {
4117
- this.isShuttingDown = true;
4118
- this.cancelAll();
4119
- this.stopScheduler();
5691
+ getStats() {
5692
+ const byLevel = {
5693
+ [WriteConcern.FIRE_AND_FORGET]: 0,
5694
+ [WriteConcern.MEMORY]: 0,
5695
+ [WriteConcern.APPLIED]: 0,
5696
+ [WriteConcern.REPLICATED]: 0,
5697
+ [WriteConcern.PERSISTED]: 0
5698
+ };
5699
+ for (const pending of this.pending.values()) {
5700
+ byLevel[pending.writeConcern]++;
5701
+ }
5702
+ return { pending: this.pending.size, byLevel };
4120
5703
  }
4121
5704
  /**
4122
- * Check if scheduler is running.
5705
+ * Get all pending operation IDs.
4123
5706
  */
4124
- get running() {
4125
- return this.isRunning;
5707
+ getPendingIds() {
5708
+ return Array.from(this.pending.keys());
4126
5709
  }
4127
5710
  /**
4128
- * Get number of active tasklets.
5711
+ * Clear all pending writes (for shutdown).
5712
+ * Rejects all pending promises with an error.
4129
5713
  */
4130
- get activeCount() {
4131
- return this.activeTasklets.size;
4132
- }
4133
- startScheduler() {
4134
- if (this.isRunning) return;
4135
- this.isRunning = true;
4136
- this.scheduleTick();
4137
- }
4138
- stopScheduler() {
4139
- this.isRunning = false;
4140
- if (this.tickTimer) {
4141
- clearImmediate(this.tickTimer);
4142
- this.tickTimer = null;
4143
- }
4144
- }
4145
- scheduleTick() {
4146
- if (!this.isRunning) return;
4147
- this.tickTimer = setImmediate(() => {
4148
- this.tick();
4149
- });
4150
- }
4151
- tick() {
4152
- if (!this.isRunning || this.activeTasklets.size === 0) {
4153
- this.stopScheduler();
4154
- return;
4155
- }
4156
- const tickStart = Date.now();
4157
- const taskletIds = Array.from(this.activeTasklets.keys());
4158
- for (const id of taskletIds) {
4159
- const state = this.activeTasklets.get(id);
4160
- if (!state) continue;
4161
- try {
4162
- const iterationStart = Date.now();
4163
- const result = state.tasklet.call();
4164
- const iterationTime = Date.now() - iterationStart;
4165
- state.iterations++;
4166
- state.lastProgressTime = Date.now();
4167
- if (this.config.metricsEnabled) {
4168
- this.totalIterations++;
4169
- this.totalExecutionTimeMs += iterationTime;
4170
- }
4171
- if (result === "DONE") {
4172
- this.completeTasklet(id, state);
4173
- } else if (result === "NO_PROGRESS") {
4174
- }
4175
- } catch (error) {
4176
- this.failTasklet(id, state, error);
4177
- }
4178
- if (Date.now() - tickStart > this.config.defaultTimeBudgetMs * 2) {
4179
- break;
5714
+ clear() {
5715
+ const count = this.pending.size;
5716
+ for (const pending of this.pending.values()) {
5717
+ if (pending.timeoutHandle) {
5718
+ clearTimeout(pending.timeoutHandle);
4180
5719
  }
5720
+ pending.reject(new Error("WriteAckManager cleared"));
4181
5721
  }
4182
- if (this.activeTasklets.size > 0) {
4183
- this.scheduleTick();
4184
- } else {
4185
- this.stopScheduler();
4186
- }
4187
- }
4188
- completeTasklet(id, state) {
4189
- this.activeTasklets.delete(id);
4190
- this.completedTasklets++;
4191
- if (state.iterations === 1) {
4192
- this.singleIterationCompletions++;
4193
- }
4194
- try {
4195
- const result = state.tasklet.getResult();
4196
- state.resolve(result);
4197
- } catch (error) {
4198
- state.reject(error);
5722
+ this.pending.clear();
5723
+ if (count > 0) {
5724
+ logger.info({ count }, "WriteAckManager cleared");
4199
5725
  }
4200
5726
  }
4201
- failTasklet(id, state, error) {
4202
- this.activeTasklets.delete(id);
4203
- state.reject(error);
4204
- }
4205
- cancelTasklet(id, state) {
4206
- this.activeTasklets.delete(id);
4207
- this.cancelledTasklets++;
4208
- if (state.tasklet.onCancel) {
4209
- try {
4210
- state.tasklet.onCancel();
4211
- } catch {
5727
+ /**
5728
+ * Graceful shutdown - resolves all pending writes with their current achieved level.
5729
+ */
5730
+ shutdown() {
5731
+ const count = this.pending.size;
5732
+ for (const [opId, pending] of this.pending.entries()) {
5733
+ if (pending.timeoutHandle) {
5734
+ clearTimeout(pending.timeoutHandle);
4212
5735
  }
5736
+ const highestAchieved = getHighestWriteConcernLevel(pending.achievedLevels);
5737
+ const latencyMs = Date.now() - pending.timestamp;
5738
+ const result = {
5739
+ success: highestAchieved === pending.writeConcern,
5740
+ opId,
5741
+ achievedLevel: highestAchieved,
5742
+ latencyMs,
5743
+ error: highestAchieved !== pending.writeConcern ? `Shutdown: achieved ${highestAchieved}, requested ${pending.writeConcern}` : void 0
5744
+ };
5745
+ pending.resolve(result);
5746
+ }
5747
+ this.pending.clear();
5748
+ if (count > 0) {
5749
+ logger.info({ count }, "WriteAckManager shutdown");
4213
5750
  }
4214
- state.reject(new Error(`Tasklet ${state.tasklet.name} was cancelled`));
4215
5751
  }
4216
5752
  };
4217
5753
 
4218
- // src/tasklet/tasklets/IteratorTasklet.ts
4219
- var DEFAULT_CONFIG5 = {
4220
- timeBudgetMs: 5,
4221
- maxItemsPerIteration: 1e3
5754
+ // src/cluster/ReplicationPipeline.ts
5755
+ import { EventEmitter as EventEmitter8 } from "events";
5756
+ import {
5757
+ ConsistencyLevel,
5758
+ DEFAULT_REPLICATION_CONFIG
5759
+ } from "@topgunbuild/core";
5760
+
5761
+ // src/cluster/LagTracker.ts
5762
+ var DEFAULT_LAG_TRACKER_CONFIG = {
5763
+ historySize: 100,
5764
+ laggyThresholdMs: 5e3,
5765
+ unhealthyThresholdMs: 3e4
4222
5766
  };
4223
- var IteratorTasklet = class {
4224
- constructor(iterator, config) {
4225
- this.itemsProcessed = 0;
4226
- this.isDone = false;
4227
- this.iterator = iterator;
4228
- this.config = { ...DEFAULT_CONFIG5, ...config };
5767
+ var LagTracker = class {
5768
+ constructor(config = {}) {
5769
+ this.lagByNode = /* @__PURE__ */ new Map();
5770
+ this.config = {
5771
+ ...DEFAULT_LAG_TRACKER_CONFIG,
5772
+ ...config
5773
+ };
4229
5774
  }
4230
5775
  /**
4231
- * Execute one chunk of iteration.
5776
+ * Update lag measurement for a node
4232
5777
  */
4233
- call() {
4234
- if (this.isDone) {
4235
- return "DONE";
5778
+ update(nodeId, lagMs) {
5779
+ let info = this.lagByNode.get(nodeId);
5780
+ if (!info) {
5781
+ info = {
5782
+ current: 0,
5783
+ history: [],
5784
+ lastUpdate: Date.now(),
5785
+ pendingOps: 0
5786
+ };
5787
+ this.lagByNode.set(nodeId, info);
4236
5788
  }
4237
- const deadline = Date.now() + this.config.timeBudgetMs;
4238
- let processedThisIteration = 0;
4239
- while (Date.now() < deadline && processedThisIteration < this.config.maxItemsPerIteration) {
4240
- const { value, done } = this.iterator.next();
4241
- if (done) {
4242
- this.isDone = true;
4243
- return "DONE";
4244
- }
4245
- this.processItem(value);
4246
- this.itemsProcessed++;
4247
- processedThisIteration++;
5789
+ info.current = lagMs;
5790
+ info.history.push(lagMs);
5791
+ if (info.history.length > this.config.historySize) {
5792
+ info.history.shift();
4248
5793
  }
4249
- return "MADE_PROGRESS";
5794
+ info.lastUpdate = Date.now();
4250
5795
  }
4251
5796
  /**
4252
- * Called when tasklet is cancelled.
5797
+ * Record acknowledgment from a node (lag effectively becomes 0)
4253
5798
  */
4254
- onCancel() {
5799
+ recordAck(nodeId) {
5800
+ const info = this.lagByNode.get(nodeId);
5801
+ if (info) {
5802
+ info.current = 0;
5803
+ info.lastUpdate = Date.now();
5804
+ if (info.pendingOps > 0) {
5805
+ info.pendingOps--;
5806
+ }
5807
+ }
4255
5808
  }
4256
5809
  /**
4257
- * Get number of items processed so far.
5810
+ * Increment pending operations counter for a node
4258
5811
  */
4259
- get processed() {
4260
- return this.itemsProcessed;
4261
- }
4262
- };
4263
- var FilterTasklet = class extends IteratorTasklet {
4264
- constructor(name, iterator, predicate, config) {
4265
- super(iterator, config);
4266
- this.results = [];
4267
- this.name = name;
4268
- this.predicate = predicate;
4269
- }
4270
- processItem(item) {
4271
- if (this.predicate(item)) {
4272
- this.results.push(item);
5812
+ incrementPending(nodeId) {
5813
+ let info = this.lagByNode.get(nodeId);
5814
+ if (!info) {
5815
+ info = {
5816
+ current: 0,
5817
+ history: [],
5818
+ lastUpdate: Date.now(),
5819
+ pendingOps: 0
5820
+ };
5821
+ this.lagByNode.set(nodeId, info);
4273
5822
  }
5823
+ info.pendingOps++;
4274
5824
  }
4275
- getResult() {
4276
- return this.results;
5825
+ /**
5826
+ * Get lag statistics for a specific node
5827
+ */
5828
+ getLag(nodeId) {
5829
+ const info = this.lagByNode.get(nodeId);
5830
+ if (!info || info.history.length === 0) {
5831
+ return { current: 0, avg: 0, max: 0, percentile99: 0 };
5832
+ }
5833
+ const sorted = [...info.history].sort((a, b) => a - b);
5834
+ const avg = sorted.reduce((a, b) => a + b, 0) / sorted.length;
5835
+ const max = sorted[sorted.length - 1] || 0;
5836
+ const p99Index = Math.floor(sorted.length * 0.99);
5837
+ const percentile99 = sorted[p99Index] || max;
5838
+ return {
5839
+ current: info.current,
5840
+ avg: Math.round(avg * 100) / 100,
5841
+ // Round to 2 decimal places
5842
+ max,
5843
+ percentile99
5844
+ };
4277
5845
  }
4278
- };
4279
- var MapTasklet = class extends IteratorTasklet {
4280
- constructor(name, iterator, mapper, config) {
4281
- super(iterator, config);
4282
- this.results = [];
4283
- this.name = name;
4284
- this.mapper = mapper;
5846
+ /**
5847
+ * Get pending operations count for a node
5848
+ */
5849
+ getPendingOps(nodeId) {
5850
+ const info = this.lagByNode.get(nodeId);
5851
+ return info?.pendingOps ?? 0;
4285
5852
  }
4286
- processItem(item) {
4287
- this.results.push(this.mapper(item));
5853
+ /**
5854
+ * Get overall replication health status
5855
+ */
5856
+ getHealth() {
5857
+ const unhealthyNodes = [];
5858
+ const laggyNodes = [];
5859
+ let totalLag = 0;
5860
+ let nodeCount = 0;
5861
+ const now = Date.now();
5862
+ for (const [nodeId, info] of this.lagByNode) {
5863
+ const timeSinceUpdate = now - info.lastUpdate;
5864
+ if (timeSinceUpdate > this.config.unhealthyThresholdMs) {
5865
+ unhealthyNodes.push(nodeId);
5866
+ } else if (info.current > this.config.laggyThresholdMs) {
5867
+ laggyNodes.push(nodeId);
5868
+ }
5869
+ totalLag += info.current;
5870
+ nodeCount++;
5871
+ }
5872
+ const avgLagMs = nodeCount > 0 ? totalLag / nodeCount : 0;
5873
+ return {
5874
+ healthy: unhealthyNodes.length === 0,
5875
+ unhealthyNodes,
5876
+ laggyNodes,
5877
+ avgLagMs: Math.round(avgLagMs * 100) / 100
5878
+ };
4288
5879
  }
4289
- getResult() {
4290
- return this.results;
5880
+ /**
5881
+ * Get average lag across all tracked nodes
5882
+ */
5883
+ getAverageLag() {
5884
+ let total = 0;
5885
+ let count = 0;
5886
+ for (const info of this.lagByNode.values()) {
5887
+ total += info.current;
5888
+ count++;
5889
+ }
5890
+ return count > 0 ? total / count : 0;
4291
5891
  }
4292
- };
4293
- var ForEachTasklet = class extends IteratorTasklet {
4294
- constructor(name, iterator, action, config) {
4295
- super(iterator, config);
4296
- this.name = name;
4297
- this.action = action;
5892
+ /**
5893
+ * Check if a specific node is considered healthy
5894
+ */
5895
+ isNodeHealthy(nodeId) {
5896
+ const info = this.lagByNode.get(nodeId);
5897
+ if (!info) return true;
5898
+ const timeSinceUpdate = Date.now() - info.lastUpdate;
5899
+ return timeSinceUpdate < this.config.unhealthyThresholdMs;
4298
5900
  }
4299
- processItem(item) {
4300
- this.action(item);
5901
+ /**
5902
+ * Check if a specific node is considered laggy
5903
+ */
5904
+ isNodeLaggy(nodeId) {
5905
+ const info = this.lagByNode.get(nodeId);
5906
+ if (!info) return false;
5907
+ return info.current > this.config.laggyThresholdMs;
4301
5908
  }
4302
- getResult() {
4303
- return this.itemsProcessed;
5909
+ /**
5910
+ * Remove a node from tracking
5911
+ */
5912
+ removeNode(nodeId) {
5913
+ this.lagByNode.delete(nodeId);
4304
5914
  }
4305
- };
4306
- var ReduceTasklet = class extends IteratorTasklet {
4307
- constructor(name, iterator, initialValue, reducer, config) {
4308
- super(iterator, config);
4309
- this.name = name;
4310
- this.accumulator = initialValue;
4311
- this.reducer = reducer;
5915
+ /**
5916
+ * Get all tracked node IDs
5917
+ */
5918
+ getTrackedNodes() {
5919
+ return Array.from(this.lagByNode.keys());
4312
5920
  }
4313
- processItem(item) {
4314
- this.accumulator = this.reducer(this.accumulator, item);
5921
+ /**
5922
+ * Get raw lag info for a node (for advanced monitoring)
5923
+ */
5924
+ getRawLagInfo(nodeId) {
5925
+ return this.lagByNode.get(nodeId);
4315
5926
  }
4316
- getResult() {
4317
- return this.accumulator;
5927
+ /**
5928
+ * Clear all tracking data
5929
+ */
5930
+ clear() {
5931
+ this.lagByNode.clear();
5932
+ }
5933
+ /**
5934
+ * Export metrics in Prometheus format
5935
+ */
5936
+ toPrometheusMetrics() {
5937
+ const lines = [
5938
+ "# HELP topgun_replication_lag_ms Current replication lag in milliseconds",
5939
+ "# TYPE topgun_replication_lag_ms gauge"
5940
+ ];
5941
+ for (const [nodeId, info] of this.lagByNode) {
5942
+ lines.push(`topgun_replication_lag_ms{node="${nodeId}"} ${info.current}`);
5943
+ }
5944
+ lines.push("");
5945
+ lines.push("# HELP topgun_replication_pending_ops Pending replication operations");
5946
+ lines.push("# TYPE topgun_replication_pending_ops gauge");
5947
+ for (const [nodeId, info] of this.lagByNode) {
5948
+ lines.push(`topgun_replication_pending_ops{node="${nodeId}"} ${info.pendingOps}`);
5949
+ }
5950
+ const health = this.getHealth();
5951
+ lines.push("");
5952
+ lines.push("# HELP topgun_replication_healthy Cluster replication health (1=healthy, 0=unhealthy)");
5953
+ lines.push("# TYPE topgun_replication_healthy gauge");
5954
+ lines.push(`topgun_replication_healthy ${health.healthy ? 1 : 0}`);
5955
+ lines.push("");
5956
+ lines.push("# HELP topgun_replication_avg_lag_ms Average replication lag across all nodes");
5957
+ lines.push("# TYPE topgun_replication_avg_lag_ms gauge");
5958
+ lines.push(`topgun_replication_avg_lag_ms ${health.avgLagMs}`);
5959
+ return lines.join("\n");
4318
5960
  }
4319
5961
  };
4320
5962
 
4321
- // src/ack/WriteAckManager.ts
4322
- import { EventEmitter as EventEmitter4 } from "events";
4323
- import {
4324
- WriteConcern,
4325
- DEFAULT_WRITE_CONCERN_TIMEOUT,
4326
- isWriteConcernAchieved,
4327
- getHighestWriteConcernLevel
4328
- } from "@topgunbuild/core";
4329
- var WriteAckManager = class extends EventEmitter4 {
4330
- constructor(config) {
5963
+ // src/cluster/ReplicationPipeline.ts
5964
+ var ReplicationTimeoutError = class extends Error {
5965
+ constructor(opId, targetNodes, ackedNodes) {
5966
+ super(
5967
+ `Replication timeout for operation ${opId}. Expected: ${targetNodes.join(", ")}, Acked: ${ackedNodes.join(", ")}`
5968
+ );
5969
+ this.opId = opId;
5970
+ this.targetNodes = targetNodes;
5971
+ this.ackedNodes = ackedNodes;
5972
+ this.name = "ReplicationTimeoutError";
5973
+ }
5974
+ };
5975
+ var ReplicationPipeline = class extends EventEmitter8 {
5976
+ constructor(clusterManager, partitionService, config = {}) {
4331
5977
  super();
4332
- this.pending = /* @__PURE__ */ new Map();
4333
- this.defaultTimeout = config?.defaultTimeout ?? DEFAULT_WRITE_CONCERN_TIMEOUT;
5978
+ // Replication queues per node (for EVENTUAL mode)
5979
+ this.replicationQueue = /* @__PURE__ */ new Map();
5980
+ // Pending acknowledgments (for STRONG/QUORUM mode)
5981
+ this.pendingAcks = /* @__PURE__ */ new Map();
5982
+ // Queue processor timer
5983
+ this.queueProcessorTimer = null;
5984
+ // Operation applier callback (injected by ServerCoordinator)
5985
+ this.operationApplier = null;
5986
+ this.clusterManager = clusterManager;
5987
+ this.partitionService = partitionService;
5988
+ this.nodeId = clusterManager.config.nodeId;
5989
+ this.config = {
5990
+ ...DEFAULT_REPLICATION_CONFIG,
5991
+ ...config
5992
+ };
5993
+ this.lagTracker = new LagTracker();
5994
+ this.setupMessageHandlers();
5995
+ this.startQueueProcessor();
4334
5996
  }
5997
+ // ============================================
5998
+ // Configuration
5999
+ // ============================================
4335
6000
  /**
4336
- * Register a pending write operation.
4337
- * Returns a promise that resolves when target Write Concern is achieved.
4338
- *
4339
- * @param opId - Operation ID
4340
- * @param writeConcern - Target Write Concern level
4341
- * @param timeout - Optional timeout in ms (defaults to config or 5000ms)
4342
- * @returns Promise that resolves with WriteResult
6001
+ * Set the operation applier callback
6002
+ * This is called when replicated operations are received from other nodes
4343
6003
  */
4344
- registerPending(opId, writeConcern, timeout) {
4345
- if (writeConcern === WriteConcern.FIRE_AND_FORGET) {
4346
- return Promise.resolve({
4347
- success: true,
4348
- opId,
4349
- achievedLevel: WriteConcern.FIRE_AND_FORGET,
4350
- latencyMs: 0
4351
- });
6004
+ setOperationApplier(applier) {
6005
+ this.operationApplier = applier;
6006
+ }
6007
+ // ============================================
6008
+ // Replication API
6009
+ // ============================================
6010
+ /**
6011
+ * Replicate operation to backup nodes
6012
+ */
6013
+ async replicate(operation, opId, key, options = {}) {
6014
+ const consistency = options.consistency ?? this.config.defaultConsistency;
6015
+ const partitionId = this.partitionService.getPartitionId(key);
6016
+ const backups = this.partitionService.getBackups(partitionId);
6017
+ if (backups.length === 0) {
6018
+ return { success: true, ackedBy: [this.nodeId] };
6019
+ }
6020
+ switch (consistency) {
6021
+ case ConsistencyLevel.STRONG:
6022
+ return this.replicateStrong(operation, opId, backups, options.timeout);
6023
+ case ConsistencyLevel.QUORUM:
6024
+ return this.replicateQuorum(operation, opId, backups, options.timeout);
6025
+ case ConsistencyLevel.EVENTUAL:
6026
+ return this.replicateEventual(operation, opId, backups);
4352
6027
  }
6028
+ }
6029
+ /**
6030
+ * STRONG: Wait for all replicas to acknowledge
6031
+ */
6032
+ async replicateStrong(operation, opId, backups, timeout) {
6033
+ const targetNodes = backups;
4353
6034
  return new Promise((resolve, reject) => {
4354
- const effectiveTimeout = timeout ?? this.defaultTimeout;
4355
- const timestamp = Date.now();
4356
- const pendingWrite = {
6035
+ const pending = {
4357
6036
  opId,
4358
- writeConcern,
4359
- timestamp,
4360
- timeout: effectiveTimeout,
4361
- resolve,
4362
- reject,
4363
- achievedLevels: /* @__PURE__ */ new Set([WriteConcern.FIRE_AND_FORGET])
6037
+ consistency: ConsistencyLevel.STRONG,
6038
+ targetNodes,
6039
+ ackedNodes: /* @__PURE__ */ new Set(),
6040
+ resolve: () => resolve({
6041
+ success: true,
6042
+ ackedBy: [this.nodeId, ...targetNodes]
6043
+ }),
6044
+ reject: (error) => reject(error),
6045
+ timeout: setTimeout(() => {
6046
+ this.pendingAcks.delete(opId);
6047
+ const ackedList = Array.from(pending.ackedNodes);
6048
+ reject(new ReplicationTimeoutError(opId, targetNodes, ackedList));
6049
+ }, timeout ?? this.config.ackTimeoutMs),
6050
+ startTime: Date.now()
4364
6051
  };
4365
- pendingWrite.timeoutHandle = setTimeout(() => {
4366
- this.handleTimeout(opId);
4367
- }, effectiveTimeout);
4368
- this.pending.set(opId, pendingWrite);
4369
- logger.debug(
4370
- { opId, writeConcern, timeout: effectiveTimeout },
4371
- "Registered pending write"
4372
- );
4373
- if (writeConcern === WriteConcern.MEMORY) {
4374
- this.notifyLevel(opId, WriteConcern.MEMORY);
6052
+ this.pendingAcks.set(opId, pending);
6053
+ for (const nodeId of targetNodes) {
6054
+ this.lagTracker.incrementPending(nodeId);
6055
+ }
6056
+ for (const nodeId of targetNodes) {
6057
+ this.sendReplication(nodeId, operation, opId, ConsistencyLevel.STRONG);
4375
6058
  }
4376
6059
  });
4377
6060
  }
4378
6061
  /**
4379
- * Notify that a Write Concern level has been achieved for an operation.
4380
- *
4381
- * @param opId - Operation ID
4382
- * @param level - Write Concern level that was achieved
6062
+ * QUORUM: Wait for majority of replicas
4383
6063
  */
4384
- notifyLevel(opId, level) {
4385
- const pending = this.pending.get(opId);
4386
- if (!pending) {
4387
- return;
6064
+ async replicateQuorum(operation, opId, backups, timeout) {
6065
+ const targetNodes = backups;
6066
+ const quorumSize = Math.floor(targetNodes.length / 2) + 1;
6067
+ return new Promise((resolve, reject) => {
6068
+ const ackedNodes = /* @__PURE__ */ new Set();
6069
+ const pending = {
6070
+ opId,
6071
+ consistency: ConsistencyLevel.QUORUM,
6072
+ targetNodes,
6073
+ ackedNodes,
6074
+ resolve: () => {
6075
+ const ackedSnapshot = Array.from(ackedNodes);
6076
+ const ackedBy = [this.nodeId, ...ackedSnapshot];
6077
+ resolve({ success: true, ackedBy });
6078
+ },
6079
+ reject: (error) => reject(error),
6080
+ timeout: setTimeout(() => {
6081
+ this.pendingAcks.delete(opId);
6082
+ const ackedList = Array.from(ackedNodes);
6083
+ reject(new ReplicationTimeoutError(opId, targetNodes, ackedList));
6084
+ }, timeout ?? this.config.ackTimeoutMs),
6085
+ startTime: Date.now()
6086
+ };
6087
+ this.pendingAcks.set(opId, pending);
6088
+ for (const nodeId of targetNodes) {
6089
+ this.lagTracker.incrementPending(nodeId);
6090
+ }
6091
+ for (const nodeId of targetNodes) {
6092
+ this.sendReplication(nodeId, operation, opId, ConsistencyLevel.QUORUM);
6093
+ }
6094
+ });
6095
+ }
6096
+ /**
6097
+ * EVENTUAL: Fire-and-forget with queue
6098
+ */
6099
+ async replicateEventual(operation, opId, backups) {
6100
+ for (const nodeId of backups) {
6101
+ this.enqueue(nodeId, {
6102
+ opId,
6103
+ operation,
6104
+ consistency: ConsistencyLevel.EVENTUAL,
6105
+ timestamp: Date.now(),
6106
+ retryCount: 0
6107
+ });
4388
6108
  }
4389
- pending.achievedLevels.add(level);
4390
- logger.debug(
4391
- { opId, level, target: pending.writeConcern },
4392
- "Write Concern level achieved"
4393
- );
4394
- if (isWriteConcernAchieved(pending.achievedLevels, pending.writeConcern)) {
4395
- this.resolvePending(opId, level);
6109
+ return { success: true, ackedBy: [this.nodeId] };
6110
+ }
6111
+ // ============================================
6112
+ // Queue Management
6113
+ // ============================================
6114
+ /**
6115
+ * Add task to replication queue
6116
+ */
6117
+ enqueue(nodeId, task) {
6118
+ let queue = this.replicationQueue.get(nodeId);
6119
+ if (!queue) {
6120
+ queue = [];
6121
+ this.replicationQueue.set(nodeId, queue);
4396
6122
  }
6123
+ if (queue.length >= this.config.queueSizeLimit) {
6124
+ this.emit("queueOverflow", nodeId);
6125
+ logger.warn({ nodeId, queueSize: queue.length }, "Replication queue overflow, dropping oldest");
6126
+ queue.shift();
6127
+ }
6128
+ queue.push(task);
6129
+ this.lagTracker.incrementPending(nodeId);
4397
6130
  }
4398
6131
  /**
4399
- * Notify multiple operations that a Write Concern level has been achieved.
4400
- * Useful for batch operations.
4401
- *
4402
- * @param opIds - Array of operation IDs
4403
- * @param level - Write Concern level that was achieved
6132
+ * Start queue processor
4404
6133
  */
4405
- notifyLevelBatch(opIds, level) {
4406
- for (const opId of opIds) {
4407
- this.notifyLevel(opId, level);
6134
+ startQueueProcessor() {
6135
+ if (this.queueProcessorTimer) return;
6136
+ this.queueProcessorTimer = setInterval(() => {
6137
+ for (const nodeId of this.replicationQueue.keys()) {
6138
+ this.processQueue(nodeId).catch((err) => {
6139
+ logger.error({ nodeId, error: err }, "Error processing replication queue");
6140
+ this.emit("error", err);
6141
+ });
6142
+ }
6143
+ }, this.config.batchIntervalMs);
6144
+ }
6145
+ /**
6146
+ * Stop queue processor
6147
+ */
6148
+ stopQueueProcessor() {
6149
+ if (this.queueProcessorTimer) {
6150
+ clearInterval(this.queueProcessorTimer);
6151
+ this.queueProcessorTimer = null;
4408
6152
  }
4409
6153
  }
4410
6154
  /**
4411
- * Check if an operation is still pending.
4412
- *
4413
- * @param opId - Operation ID
4414
- * @returns true if operation is pending
6155
+ * Process replication queue for a node
4415
6156
  */
4416
- isPending(opId) {
4417
- return this.pending.has(opId);
6157
+ async processQueue(nodeId) {
6158
+ const queue = this.replicationQueue.get(nodeId);
6159
+ if (!queue || queue.length === 0) return;
6160
+ const batch = queue.splice(0, this.config.batchSize);
6161
+ try {
6162
+ this.clusterManager.send(nodeId, "OP_FORWARD", {
6163
+ _replication: {
6164
+ type: "REPLICATION_BATCH",
6165
+ payload: {
6166
+ operations: batch.map((t) => t.operation),
6167
+ opIds: batch.map((t) => t.opId)
6168
+ }
6169
+ }
6170
+ });
6171
+ const oldestTimestamp = Math.min(...batch.map((t) => t.timestamp));
6172
+ this.lagTracker.update(nodeId, Date.now() - oldestTimestamp);
6173
+ logger.debug({ nodeId, batchSize: batch.length }, "Sent replication batch");
6174
+ } catch (error) {
6175
+ for (const task of batch) {
6176
+ task.retryCount++;
6177
+ if (task.retryCount <= this.config.maxRetries) {
6178
+ queue.unshift(task);
6179
+ } else {
6180
+ logger.warn({ nodeId, opId: task.opId, retries: task.retryCount }, "Replication task exceeded max retries");
6181
+ this.emit("replicationFailed", task.opId, new Error("Max retries exceeded"));
6182
+ }
6183
+ }
6184
+ }
4418
6185
  }
6186
+ // ============================================
6187
+ // Message Handling
6188
+ // ============================================
4419
6189
  /**
4420
- * Get the target Write Concern level for a pending operation.
4421
- *
4422
- * @param opId - Operation ID
4423
- * @returns Target Write Concern level or undefined if not pending
6190
+ * Send replication message to a node
4424
6191
  */
4425
- getTargetLevel(opId) {
4426
- return this.pending.get(opId)?.writeConcern;
6192
+ sendReplication(nodeId, operation, opId, consistency) {
6193
+ this.clusterManager.send(nodeId, "OP_FORWARD", {
6194
+ _replication: {
6195
+ type: "REPLICATION",
6196
+ payload: {
6197
+ opId,
6198
+ operation,
6199
+ consistency
6200
+ }
6201
+ }
6202
+ });
4427
6203
  }
4428
6204
  /**
4429
- * Get the highest achieved level for a pending operation.
4430
- *
4431
- * @param opId - Operation ID
4432
- * @returns Highest achieved level or undefined if not pending
6205
+ * Setup cluster message handlers
4433
6206
  */
4434
- getAchievedLevel(opId) {
4435
- const pending = this.pending.get(opId);
4436
- if (!pending) return void 0;
4437
- return getHighestWriteConcernLevel(pending.achievedLevels);
6207
+ setupMessageHandlers() {
6208
+ this.clusterManager.on("message", (msg) => {
6209
+ if (msg.payload?._replication) {
6210
+ const replication = msg.payload._replication;
6211
+ switch (replication.type) {
6212
+ case "REPLICATION":
6213
+ this.handleReplication(msg.senderId, replication.payload);
6214
+ break;
6215
+ case "REPLICATION_BATCH":
6216
+ this.handleReplicationBatch(msg.senderId, replication.payload);
6217
+ break;
6218
+ case "REPLICATION_ACK":
6219
+ this.handleReplicationAck(msg.senderId, replication.payload);
6220
+ break;
6221
+ case "REPLICATION_BATCH_ACK":
6222
+ this.handleReplicationBatchAck(msg.senderId, replication.payload);
6223
+ break;
6224
+ }
6225
+ }
6226
+ });
4438
6227
  }
4439
6228
  /**
4440
- * Resolve a pending write with success.
6229
+ * Handle incoming replication request (on backup node)
4441
6230
  */
4442
- resolvePending(opId, achievedLevel) {
4443
- const pending = this.pending.get(opId);
4444
- if (!pending) return;
4445
- if (pending.timeoutHandle) {
4446
- clearTimeout(pending.timeoutHandle);
6231
+ async handleReplication(sourceNode, payload) {
6232
+ const { opId, operation, consistency } = payload;
6233
+ logger.debug({ sourceNode, opId, consistency }, "Received replication");
6234
+ let success = true;
6235
+ if (this.operationApplier) {
6236
+ try {
6237
+ success = await this.operationApplier(operation, opId, sourceNode);
6238
+ } catch (error) {
6239
+ logger.error({ sourceNode, opId, error }, "Failed to apply replicated operation");
6240
+ success = false;
6241
+ }
6242
+ } else {
6243
+ logger.warn({ sourceNode, opId }, "No operation applier set, operation not applied");
6244
+ }
6245
+ if (consistency === ConsistencyLevel.STRONG || consistency === ConsistencyLevel.QUORUM) {
6246
+ this.clusterManager.send(sourceNode, "OP_FORWARD", {
6247
+ _replication: {
6248
+ type: "REPLICATION_ACK",
6249
+ payload: {
6250
+ opId,
6251
+ success,
6252
+ timestamp: Date.now()
6253
+ }
6254
+ }
6255
+ });
4447
6256
  }
4448
- const latencyMs = Date.now() - pending.timestamp;
4449
- const result = {
4450
- success: true,
4451
- opId,
4452
- achievedLevel,
4453
- latencyMs
4454
- };
4455
- pending.resolve(result);
4456
- this.pending.delete(opId);
4457
- logger.debug(
4458
- { opId, achievedLevel, latencyMs },
4459
- "Write resolved successfully"
4460
- );
4461
- this.emit("resolved", result);
4462
6257
  }
4463
6258
  /**
4464
- * Handle timeout for a pending write.
6259
+ * Handle incoming batch replication (on backup node)
4465
6260
  */
4466
- handleTimeout(opId) {
4467
- const pending = this.pending.get(opId);
4468
- if (!pending) return;
4469
- const highestAchieved = getHighestWriteConcernLevel(pending.achievedLevels);
4470
- const latencyMs = Date.now() - pending.timestamp;
4471
- const result = {
4472
- success: false,
4473
- opId,
4474
- achievedLevel: highestAchieved,
4475
- latencyMs,
4476
- error: `Timeout: achieved ${highestAchieved}, requested ${pending.writeConcern}`
4477
- };
4478
- pending.resolve(result);
4479
- this.pending.delete(opId);
4480
- logger.warn(
4481
- { opId, requested: pending.writeConcern, achieved: highestAchieved, latencyMs },
4482
- "Write timed out"
4483
- );
4484
- this.emit("timeout", {
4485
- opId,
4486
- requested: pending.writeConcern,
4487
- achieved: highestAchieved,
4488
- latencyMs
6261
+ async handleReplicationBatch(sourceNode, payload) {
6262
+ const { operations, opIds } = payload;
6263
+ logger.debug({ sourceNode, count: operations.length }, "Received replication batch");
6264
+ let allSuccess = true;
6265
+ if (this.operationApplier) {
6266
+ for (let i = 0; i < operations.length; i++) {
6267
+ try {
6268
+ const success = await this.operationApplier(operations[i], opIds[i], sourceNode);
6269
+ if (!success) {
6270
+ allSuccess = false;
6271
+ }
6272
+ } catch (error) {
6273
+ logger.error({ sourceNode, opId: opIds[i], error }, "Failed to apply replicated operation in batch");
6274
+ allSuccess = false;
6275
+ }
6276
+ }
6277
+ } else {
6278
+ logger.warn({ sourceNode, count: operations.length }, "No operation applier set, batch not applied");
6279
+ }
6280
+ this.clusterManager.send(sourceNode, "OP_FORWARD", {
6281
+ _replication: {
6282
+ type: "REPLICATION_BATCH_ACK",
6283
+ payload: {
6284
+ opIds,
6285
+ success: allSuccess,
6286
+ timestamp: Date.now()
6287
+ }
6288
+ }
4489
6289
  });
4490
6290
  }
4491
6291
  /**
4492
- * Fail a pending write with an error.
4493
- *
4494
- * @param opId - Operation ID
4495
- * @param error - Error message
6292
+ * Handle replication acknowledgment (on owner node)
6293
+ */
6294
+ handleReplicationAck(sourceNode, payload) {
6295
+ const { opId, success } = payload;
6296
+ this.lagTracker.recordAck(sourceNode);
6297
+ const pending = this.pendingAcks.get(opId);
6298
+ if (!pending) return;
6299
+ if (!success) {
6300
+ logger.warn({ sourceNode, opId }, "Replication rejected by backup");
6301
+ return;
6302
+ }
6303
+ pending.ackedNodes.add(sourceNode);
6304
+ const lag = Date.now() - pending.startTime;
6305
+ this.lagTracker.update(sourceNode, lag);
6306
+ const ackedCount = pending.ackedNodes.size;
6307
+ const targetCount = pending.targetNodes.length;
6308
+ switch (pending.consistency) {
6309
+ case ConsistencyLevel.STRONG:
6310
+ if (ackedCount === targetCount) {
6311
+ clearTimeout(pending.timeout);
6312
+ this.pendingAcks.delete(opId);
6313
+ pending.resolve();
6314
+ this.emit("replicationComplete", opId, [this.nodeId, ...pending.ackedNodes]);
6315
+ }
6316
+ break;
6317
+ case ConsistencyLevel.QUORUM:
6318
+ const quorumSize = Math.floor(targetCount / 2) + 1;
6319
+ if (ackedCount >= quorumSize) {
6320
+ clearTimeout(pending.timeout);
6321
+ this.pendingAcks.delete(opId);
6322
+ pending.resolve();
6323
+ this.emit("replicationComplete", opId, [this.nodeId, ...pending.ackedNodes]);
6324
+ }
6325
+ break;
6326
+ }
6327
+ }
6328
+ /**
6329
+ * Handle batch acknowledgment (on owner node)
6330
+ */
6331
+ handleReplicationBatchAck(sourceNode, payload) {
6332
+ const { success } = payload;
6333
+ this.lagTracker.recordAck(sourceNode);
6334
+ if (!success) {
6335
+ logger.warn({ sourceNode, count: payload.opIds.length }, "Batch replication rejected");
6336
+ }
6337
+ }
6338
+ // ============================================
6339
+ // Status and Metrics
6340
+ // ============================================
6341
+ /**
6342
+ * Get replication lag for a specific node
4496
6343
  */
4497
- failPending(opId, error) {
4498
- const pending = this.pending.get(opId);
4499
- if (!pending) return;
4500
- if (pending.timeoutHandle) {
4501
- clearTimeout(pending.timeoutHandle);
4502
- }
4503
- const latencyMs = Date.now() - pending.timestamp;
4504
- const highestAchieved = getHighestWriteConcernLevel(pending.achievedLevels);
4505
- const result = {
4506
- success: false,
4507
- opId,
4508
- achievedLevel: highestAchieved,
4509
- latencyMs,
4510
- error
4511
- };
4512
- pending.resolve(result);
4513
- this.pending.delete(opId);
4514
- logger.error({ opId, error, latencyMs }, "Write failed");
4515
- this.emit("failed", result);
6344
+ getLag(nodeId) {
6345
+ return this.lagTracker.getLag(nodeId);
4516
6346
  }
4517
6347
  /**
4518
- * Get pending writes statistics.
6348
+ * Get overall replication health
4519
6349
  */
4520
- getStats() {
4521
- const byLevel = {
4522
- [WriteConcern.FIRE_AND_FORGET]: 0,
4523
- [WriteConcern.MEMORY]: 0,
4524
- [WriteConcern.APPLIED]: 0,
4525
- [WriteConcern.REPLICATED]: 0,
4526
- [WriteConcern.PERSISTED]: 0
4527
- };
4528
- for (const pending of this.pending.values()) {
4529
- byLevel[pending.writeConcern]++;
4530
- }
4531
- return { pending: this.pending.size, byLevel };
6350
+ getHealth() {
6351
+ return this.lagTracker.getHealth();
4532
6352
  }
4533
6353
  /**
4534
- * Get all pending operation IDs.
6354
+ * Get queue size for a specific node
4535
6355
  */
4536
- getPendingIds() {
4537
- return Array.from(this.pending.keys());
6356
+ getQueueSize(nodeId) {
6357
+ return this.replicationQueue.get(nodeId)?.length ?? 0;
4538
6358
  }
4539
6359
  /**
4540
- * Clear all pending writes (for shutdown).
4541
- * Rejects all pending promises with an error.
6360
+ * Get total pending operations across all nodes
4542
6361
  */
4543
- clear() {
4544
- const count = this.pending.size;
4545
- for (const pending of this.pending.values()) {
4546
- if (pending.timeoutHandle) {
4547
- clearTimeout(pending.timeoutHandle);
4548
- }
4549
- pending.reject(new Error("WriteAckManager cleared"));
4550
- }
4551
- this.pending.clear();
4552
- if (count > 0) {
4553
- logger.info({ count }, "WriteAckManager cleared");
6362
+ getTotalPending() {
6363
+ let total = 0;
6364
+ for (const queue of this.replicationQueue.values()) {
6365
+ total += queue.length;
4554
6366
  }
6367
+ return total + this.pendingAcks.size;
4555
6368
  }
4556
6369
  /**
4557
- * Graceful shutdown - resolves all pending writes with their current achieved level.
6370
+ * Check if a node is considered synced (low lag)
4558
6371
  */
4559
- shutdown() {
4560
- const count = this.pending.size;
4561
- for (const [opId, pending] of this.pending.entries()) {
4562
- if (pending.timeoutHandle) {
4563
- clearTimeout(pending.timeoutHandle);
4564
- }
4565
- const highestAchieved = getHighestWriteConcernLevel(pending.achievedLevels);
4566
- const latencyMs = Date.now() - pending.timestamp;
4567
- const result = {
4568
- success: highestAchieved === pending.writeConcern,
4569
- opId,
4570
- achievedLevel: highestAchieved,
4571
- latencyMs,
4572
- error: highestAchieved !== pending.writeConcern ? `Shutdown: achieved ${highestAchieved}, requested ${pending.writeConcern}` : void 0
4573
- };
4574
- pending.resolve(result);
6372
+ isSynced(nodeId, maxLagMs = 1e3) {
6373
+ const lag = this.lagTracker.getLag(nodeId);
6374
+ return lag.current < maxLagMs;
6375
+ }
6376
+ /**
6377
+ * Get LagTracker for advanced monitoring
6378
+ */
6379
+ getLagTracker() {
6380
+ return this.lagTracker;
6381
+ }
6382
+ /**
6383
+ * Export metrics in Prometheus format
6384
+ */
6385
+ toPrometheusMetrics() {
6386
+ const lines = [];
6387
+ lines.push("# HELP topgun_replication_queue_size Pending operations in replication queue");
6388
+ lines.push("# TYPE topgun_replication_queue_size gauge");
6389
+ for (const [nodeId, queue] of this.replicationQueue) {
6390
+ lines.push(`topgun_replication_queue_size{node="${nodeId}"} ${queue.length}`);
4575
6391
  }
4576
- this.pending.clear();
4577
- if (count > 0) {
4578
- logger.info({ count }, "WriteAckManager shutdown");
6392
+ lines.push("");
6393
+ lines.push("# HELP topgun_replication_pending_acks Pending synchronous acknowledgments");
6394
+ lines.push("# TYPE topgun_replication_pending_acks gauge");
6395
+ lines.push(`topgun_replication_pending_acks ${this.pendingAcks.size}`);
6396
+ lines.push("");
6397
+ lines.push(this.lagTracker.toPrometheusMetrics());
6398
+ return lines.join("\n");
6399
+ }
6400
+ /**
6401
+ * Cleanup resources
6402
+ */
6403
+ close() {
6404
+ this.stopQueueProcessor();
6405
+ for (const [opId, pending] of this.pendingAcks) {
6406
+ clearTimeout(pending.timeout);
6407
+ pending.reject(new Error("ReplicationPipeline closed"));
4579
6408
  }
6409
+ this.pendingAcks.clear();
6410
+ this.replicationQueue.clear();
6411
+ this.lagTracker.clear();
4580
6412
  }
4581
6413
  };
4582
6414
 
@@ -4741,6 +6573,22 @@ var ServerCoordinator = class {
4741
6573
  tls: config.clusterTls
4742
6574
  });
4743
6575
  this.partitionService = new PartitionService(this.cluster);
6576
+ if (config.replicationEnabled !== false) {
6577
+ this.replicationPipeline = new ReplicationPipeline(
6578
+ this.cluster,
6579
+ this.partitionService,
6580
+ {
6581
+ ...DEFAULT_REPLICATION_CONFIG2,
6582
+ defaultConsistency: config.defaultConsistency ?? ConsistencyLevel2.EVENTUAL,
6583
+ ...config.replicationConfig
6584
+ }
6585
+ );
6586
+ this.replicationPipeline.setOperationApplier(this.applyReplicatedOperation.bind(this));
6587
+ logger.info({ nodeId: config.nodeId }, "ReplicationPipeline initialized");
6588
+ }
6589
+ this.partitionService.on("rebalanced", (partitionMap, changes) => {
6590
+ this.broadcastPartitionMap(partitionMap);
6591
+ });
4744
6592
  this.lockManager = new LockManager();
4745
6593
  this.lockManager.on("lockGranted", (evt) => this.handleLockGranted(evt));
4746
6594
  this.topicManager = new TopicManager({
@@ -4857,7 +6705,7 @@ var ServerCoordinator = class {
4857
6705
  this.metricsService.destroy();
4858
6706
  this.wss.close();
4859
6707
  logger.info(`Closing ${this.clients.size} client connections...`);
4860
- const shutdownMsg = serialize3({ type: "SHUTDOWN_PENDING", retryAfter: 5e3 });
6708
+ const shutdownMsg = serialize4({ type: "SHUTDOWN_PENDING", retryAfter: 5e3 });
4861
6709
  for (const client of this.clients.values()) {
4862
6710
  try {
4863
6711
  if (client.socket.readyState === WebSocket3.OPEN) {
@@ -4879,6 +6727,9 @@ var ServerCoordinator = class {
4879
6727
  await this.workerPool.shutdown(5e3);
4880
6728
  logger.info("Worker pool shutdown complete.");
4881
6729
  }
6730
+ if (this.replicationPipeline) {
6731
+ this.replicationPipeline.close();
6732
+ }
4882
6733
  if (this.cluster) {
4883
6734
  this.cluster.stop();
4884
6735
  }
@@ -5026,7 +6877,7 @@ var ServerCoordinator = class {
5026
6877
  this.clients.delete(clientId);
5027
6878
  this.metricsService.setConnectedClients(this.clients.size);
5028
6879
  });
5029
- ws.send(serialize3({ type: "AUTH_REQUIRED" }));
6880
+ ws.send(serialize4({ type: "AUTH_REQUIRED" }));
5030
6881
  }
5031
6882
  async handleMessage(client, rawMessage) {
5032
6883
  const parseResult = MessageSchema.safeParse(rawMessage);
@@ -5436,6 +7287,23 @@ var ServerCoordinator = class {
5436
7287
  }
5437
7288
  break;
5438
7289
  }
7290
+ // ============ Phase 4: Partition Map Request Handler ============
7291
+ case "PARTITION_MAP_REQUEST": {
7292
+ const clientVersion = message.payload?.currentVersion ?? 0;
7293
+ const currentMap = this.partitionService.getPartitionMap();
7294
+ if (clientVersion < currentMap.version) {
7295
+ client.writer.write({
7296
+ type: "PARTITION_MAP",
7297
+ payload: currentMap
7298
+ });
7299
+ logger.debug({
7300
+ clientId: client.id,
7301
+ clientVersion,
7302
+ serverVersion: currentMap.version
7303
+ }, "Sent partition map to client");
7304
+ }
7305
+ break;
7306
+ }
5439
7307
  // ============ ORMap Sync Message Handlers ============
5440
7308
  case "ORMAP_SYNC_INIT": {
5441
7309
  if (!this.securityManager.checkPermission(client.principal, message.mapName, "READ")) {
@@ -5639,6 +7507,28 @@ var ServerCoordinator = class {
5639
7507
  client.lastActiveHlc = this.hlc.now();
5640
7508
  }
5641
7509
  }
7510
+ // ============ Phase 4: Partition Map Broadcast ============
7511
+ /**
7512
+ * Broadcast partition map to all connected and authenticated clients.
7513
+ * Called when partition topology changes (node join/leave/failover).
7514
+ */
7515
+ broadcastPartitionMap(partitionMap) {
7516
+ const message = {
7517
+ type: "PARTITION_MAP",
7518
+ payload: partitionMap
7519
+ };
7520
+ let broadcastCount = 0;
7521
+ for (const client of this.clients.values()) {
7522
+ if (client.isAuthenticated && client.socket.readyState === WebSocket3.OPEN) {
7523
+ client.writer.write(message);
7524
+ broadcastCount++;
7525
+ }
7526
+ }
7527
+ logger.info({
7528
+ version: partitionMap.version,
7529
+ clientCount: broadcastCount
7530
+ }, "Broadcast partition map to clients");
7531
+ }
5642
7532
  broadcast(message, excludeClientId) {
5643
7533
  const isServerEvent = message.type === "SERVER_EVENT";
5644
7534
  if (isServerEvent) {
@@ -5669,7 +7559,7 @@ var ServerCoordinator = class {
5669
7559
  client.writer.write({ ...message, payload: newPayload });
5670
7560
  }
5671
7561
  } else {
5672
- const msgData = serialize3(message);
7562
+ const msgData = serialize4(message);
5673
7563
  for (const [id, client] of this.clients) {
5674
7564
  if (id !== excludeClientId && client.socket.readyState === 1) {
5675
7565
  client.writer.writeRaw(msgData);
@@ -5747,7 +7637,7 @@ var ServerCoordinator = class {
5747
7637
  payload: { events: filteredEvents },
5748
7638
  timestamp: this.hlc.now()
5749
7639
  };
5750
- const serializedBatch = serialize3(batchMessage);
7640
+ const serializedBatch = serialize4(batchMessage);
5751
7641
  for (const client of clients) {
5752
7642
  try {
5753
7643
  client.writer.writeRaw(serializedBatch);
@@ -5832,7 +7722,7 @@ var ServerCoordinator = class {
5832
7722
  payload: { events: filteredEvents },
5833
7723
  timestamp: this.hlc.now()
5834
7724
  };
5835
- const serializedBatch = serialize3(batchMessage);
7725
+ const serializedBatch = serialize4(batchMessage);
5836
7726
  for (const client of clients) {
5837
7727
  sendPromises.push(new Promise((resolve, reject) => {
5838
7728
  try {
@@ -6117,6 +8007,26 @@ var ServerCoordinator = class {
6117
8007
  }
6118
8008
  }
6119
8009
  }
8010
+ /**
8011
+ * Apply replicated operation from another node (callback for ReplicationPipeline)
8012
+ * This is called when we receive a replicated operation as a backup node
8013
+ */
8014
+ async applyReplicatedOperation(operation, opId, sourceNode) {
8015
+ try {
8016
+ const op = operation;
8017
+ logger.debug({ sourceNode, opId, mapName: op.mapName, key: op.key }, "Applying replicated operation");
8018
+ const { eventPayload } = this.applyOpToMap(op);
8019
+ this.broadcast({
8020
+ type: "SERVER_EVENT",
8021
+ payload: eventPayload,
8022
+ timestamp: this.hlc.now()
8023
+ });
8024
+ return true;
8025
+ } catch (error) {
8026
+ logger.error({ sourceNode, opId, error }, "Failed to apply replicated operation");
8027
+ return false;
8028
+ }
8029
+ }
6120
8030
  /**
6121
8031
  * Build OpContext for interceptors.
6122
8032
  */
@@ -6205,6 +8115,12 @@ var ServerCoordinator = class {
6205
8115
  throw err;
6206
8116
  }
6207
8117
  const { eventPayload } = this.applyOpToMap(op);
8118
+ if (this.replicationPipeline && !fromCluster) {
8119
+ const opId = op.id || `${op.mapName}:${op.key}:${Date.now()}`;
8120
+ this.replicationPipeline.replicate(op, opId, op.key).catch((err) => {
8121
+ logger.warn({ opId, key: op.key, err }, "Replication failed (non-fatal)");
8122
+ });
8123
+ }
6208
8124
  this.broadcast({
6209
8125
  type: "SERVER_EVENT",
6210
8126
  payload: eventPayload,
@@ -6327,6 +8243,12 @@ var ServerCoordinator = class {
6327
8243
  throw err;
6328
8244
  }
6329
8245
  const { eventPayload } = this.applyOpToMap(op);
8246
+ if (this.replicationPipeline) {
8247
+ const opId = op.id || `${op.mapName}:${op.key}:${Date.now()}`;
8248
+ this.replicationPipeline.replicate(op, opId, op.key).catch((err) => {
8249
+ logger.warn({ opId, key: op.key, err }, "Batch replication failed (non-fatal)");
8250
+ });
8251
+ }
6330
8252
  batchedEvents.push(eventPayload);
6331
8253
  this.broadcastToCluster(eventPayload);
6332
8254
  this.runAfterInterceptors(op, context);
@@ -7274,18 +9196,403 @@ function logNativeStatus() {
7274
9196
  ` - SharedArrayBuffer: ${status.sharedArrayBuffer ? "available" : "unavailable"}`
7275
9197
  );
7276
9198
  }
9199
+
9200
+ // src/cluster/ClusterCoordinator.ts
9201
+ import { EventEmitter as EventEmitter9 } from "events";
9202
+ import {
9203
+ DEFAULT_MIGRATION_CONFIG as DEFAULT_MIGRATION_CONFIG3,
9204
+ DEFAULT_REPLICATION_CONFIG as DEFAULT_REPLICATION_CONFIG3
9205
+ } from "@topgunbuild/core";
9206
+ var DEFAULT_CLUSTER_COORDINATOR_CONFIG = {
9207
+ gradualRebalancing: true,
9208
+ migration: DEFAULT_MIGRATION_CONFIG3,
9209
+ replication: DEFAULT_REPLICATION_CONFIG3,
9210
+ replicationEnabled: true
9211
+ };
9212
+ var ClusterCoordinator = class extends EventEmitter9 {
9213
+ constructor(config) {
9214
+ super();
9215
+ this.replicationPipeline = null;
9216
+ // State
9217
+ this.started = false;
9218
+ this.actualPort = 0;
9219
+ this.config = {
9220
+ ...DEFAULT_CLUSTER_COORDINATOR_CONFIG,
9221
+ ...config
9222
+ };
9223
+ this.clusterManager = new ClusterManager(this.config.cluster);
9224
+ this.lagTracker = new LagTracker();
9225
+ const partitionServiceConfig = {
9226
+ gradualRebalancing: this.config.gradualRebalancing,
9227
+ migration: this.config.migration
9228
+ };
9229
+ this.partitionService = new PartitionService(this.clusterManager, partitionServiceConfig);
9230
+ if (this.config.replicationEnabled) {
9231
+ this.replicationPipeline = new ReplicationPipeline(
9232
+ this.clusterManager,
9233
+ this.partitionService,
9234
+ this.config.replication
9235
+ );
9236
+ }
9237
+ this.setupEventHandlers();
9238
+ }
9239
+ // ============================================
9240
+ // Lifecycle Methods
9241
+ // ============================================
9242
+ /**
9243
+ * Start the cluster coordinator
9244
+ */
9245
+ async start() {
9246
+ if (this.started) {
9247
+ return this.actualPort;
9248
+ }
9249
+ logger.info({ nodeId: this.config.cluster.nodeId }, "Starting ClusterCoordinator");
9250
+ this.actualPort = await this.clusterManager.start();
9251
+ const migrationManager = this.partitionService.getMigrationManager();
9252
+ if (migrationManager && this.config.dataCollector) {
9253
+ migrationManager.setDataCollector(this.config.dataCollector);
9254
+ }
9255
+ if (migrationManager && this.config.dataStorer) {
9256
+ migrationManager.setDataStorer(this.config.dataStorer);
9257
+ }
9258
+ this.started = true;
9259
+ this.emit("started");
9260
+ logger.info({ nodeId: this.config.cluster.nodeId, port: this.actualPort }, "ClusterCoordinator started");
9261
+ return this.actualPort;
9262
+ }
9263
+ /**
9264
+ * Stop the cluster coordinator
9265
+ */
9266
+ async stop() {
9267
+ if (!this.started) return;
9268
+ logger.info({ nodeId: this.config.cluster.nodeId }, "Stopping ClusterCoordinator");
9269
+ await this.partitionService.cancelMigrations();
9270
+ this.replicationPipeline?.close();
9271
+ this.clusterManager.stop();
9272
+ this.started = false;
9273
+ this.emit("stopped");
9274
+ logger.info({ nodeId: this.config.cluster.nodeId }, "ClusterCoordinator stopped");
9275
+ }
9276
+ // ============================================
9277
+ // Cluster Information
9278
+ // ============================================
9279
+ /**
9280
+ * Get local node ID
9281
+ */
9282
+ getNodeId() {
9283
+ return this.config.cluster.nodeId;
9284
+ }
9285
+ /**
9286
+ * Get cluster port
9287
+ */
9288
+ getPort() {
9289
+ return this.actualPort;
9290
+ }
9291
+ /**
9292
+ * Get all cluster members
9293
+ */
9294
+ getMembers() {
9295
+ return this.clusterManager.getMembers();
9296
+ }
9297
+ /**
9298
+ * Check if this is the local node
9299
+ */
9300
+ isLocal(nodeId) {
9301
+ return this.clusterManager.isLocal(nodeId);
9302
+ }
9303
+ /**
9304
+ * Check if coordinator is started
9305
+ */
9306
+ isStarted() {
9307
+ return this.started;
9308
+ }
9309
+ // ============================================
9310
+ // Partition Operations
9311
+ // ============================================
9312
+ /**
9313
+ * Get current partition map
9314
+ */
9315
+ getPartitionMap() {
9316
+ return this.partitionService.getPartitionMap();
9317
+ }
9318
+ /**
9319
+ * Get partition map version
9320
+ */
9321
+ getPartitionMapVersion() {
9322
+ return this.partitionService.getMapVersion();
9323
+ }
9324
+ /**
9325
+ * Get partition ID for a key
9326
+ */
9327
+ getPartitionId(key) {
9328
+ return this.partitionService.getPartitionId(key);
9329
+ }
9330
+ /**
9331
+ * Get owner node for a key
9332
+ */
9333
+ getOwner(key) {
9334
+ return this.partitionService.getOwner(key);
9335
+ }
9336
+ /**
9337
+ * Check if this node owns the key
9338
+ */
9339
+ isLocalOwner(key) {
9340
+ return this.partitionService.isLocalOwner(key);
9341
+ }
9342
+ /**
9343
+ * Check if this node is a backup for the key
9344
+ */
9345
+ isLocalBackup(key) {
9346
+ return this.partitionService.isLocalBackup(key);
9347
+ }
9348
+ /**
9349
+ * Get backup nodes for a partition
9350
+ */
9351
+ getBackups(partitionId) {
9352
+ return this.partitionService.getBackups(partitionId);
9353
+ }
9354
+ /**
9355
+ * Check if partition is currently migrating
9356
+ */
9357
+ isMigrating(partitionId) {
9358
+ return this.partitionService.isMigrating(partitionId);
9359
+ }
9360
+ /**
9361
+ * Check if any rebalancing is in progress
9362
+ */
9363
+ isRebalancing() {
9364
+ return this.partitionService.isRebalancing();
9365
+ }
9366
+ // ============================================
9367
+ // Migration Operations
9368
+ // ============================================
9369
+ /**
9370
+ * Get migration status
9371
+ */
9372
+ getMigrationStatus() {
9373
+ return this.partitionService.getMigrationStatus();
9374
+ }
9375
+ /**
9376
+ * Get migration metrics
9377
+ */
9378
+ getMigrationMetrics() {
9379
+ return this.partitionService.getMigrationManager()?.getMetrics() ?? null;
9380
+ }
9381
+ /**
9382
+ * Cancel all active migrations
9383
+ */
9384
+ async cancelMigrations() {
9385
+ await this.partitionService.cancelMigrations();
9386
+ }
9387
+ /**
9388
+ * Set data collector for migrations
9389
+ */
9390
+ setDataCollector(collector) {
9391
+ const migrationManager = this.partitionService.getMigrationManager();
9392
+ if (migrationManager) {
9393
+ migrationManager.setDataCollector(collector);
9394
+ }
9395
+ }
9396
+ /**
9397
+ * Set data storer for incoming migrations
9398
+ */
9399
+ setDataStorer(storer) {
9400
+ const migrationManager = this.partitionService.getMigrationManager();
9401
+ if (migrationManager) {
9402
+ migrationManager.setDataStorer(storer);
9403
+ }
9404
+ }
9405
+ // ============================================
9406
+ // Replication Operations
9407
+ // ============================================
9408
+ /**
9409
+ * Replicate an operation to backup nodes
9410
+ */
9411
+ async replicate(operation, opId, key, options = {}) {
9412
+ if (!this.replicationPipeline) {
9413
+ return { success: true, ackedBy: [] };
9414
+ }
9415
+ return this.replicationPipeline.replicate(operation, opId, key, options);
9416
+ }
9417
+ /**
9418
+ * Get replication health status
9419
+ */
9420
+ getReplicationHealth() {
9421
+ return this.lagTracker.getHealth();
9422
+ }
9423
+ /**
9424
+ * Get replication lag for a specific node
9425
+ */
9426
+ getReplicationLag(nodeId) {
9427
+ return this.lagTracker.getLag(nodeId);
9428
+ }
9429
+ /**
9430
+ * Check if a node is healthy for replication
9431
+ */
9432
+ isNodeHealthy(nodeId) {
9433
+ return this.lagTracker.isNodeHealthy(nodeId);
9434
+ }
9435
+ /**
9436
+ * Check if a node is laggy
9437
+ */
9438
+ isNodeLaggy(nodeId) {
9439
+ return this.lagTracker.isNodeLaggy(nodeId);
9440
+ }
9441
+ // ============================================
9442
+ // Cluster Communication
9443
+ // ============================================
9444
+ /**
9445
+ * Send message to a specific node
9446
+ */
9447
+ send(nodeId, message) {
9448
+ this.clusterManager.sendToNode(nodeId, message);
9449
+ }
9450
+ /**
9451
+ * Broadcast message to all nodes
9452
+ */
9453
+ broadcast(message) {
9454
+ for (const nodeId of this.clusterManager.getMembers()) {
9455
+ if (!this.clusterManager.isLocal(nodeId)) {
9456
+ this.clusterManager.sendToNode(nodeId, message);
9457
+ }
9458
+ }
9459
+ }
9460
+ // ============================================
9461
+ // Component Access
9462
+ // ============================================
9463
+ /**
9464
+ * Get underlying ClusterManager
9465
+ */
9466
+ getClusterManager() {
9467
+ return this.clusterManager;
9468
+ }
9469
+ /**
9470
+ * Get underlying PartitionService
9471
+ */
9472
+ getPartitionService() {
9473
+ return this.partitionService;
9474
+ }
9475
+ /**
9476
+ * Get underlying ReplicationPipeline
9477
+ */
9478
+ getReplicationPipeline() {
9479
+ return this.replicationPipeline;
9480
+ }
9481
+ /**
9482
+ * Get underlying LagTracker
9483
+ */
9484
+ getLagTracker() {
9485
+ return this.lagTracker;
9486
+ }
9487
+ // ============================================
9488
+ // Metrics Export
9489
+ // ============================================
9490
+ /**
9491
+ * Get all metrics in Prometheus format
9492
+ */
9493
+ getPrometheusMetrics() {
9494
+ const lines = [];
9495
+ lines.push("# HELP topgun_cluster_members Number of cluster members");
9496
+ lines.push("# TYPE topgun_cluster_members gauge");
9497
+ lines.push(`topgun_cluster_members ${this.clusterManager.getMembers().length}`);
9498
+ lines.push("");
9499
+ lines.push("# HELP topgun_cluster_started Cluster started status (1=started, 0=stopped)");
9500
+ lines.push("# TYPE topgun_cluster_started gauge");
9501
+ lines.push(`topgun_cluster_started ${this.started ? 1 : 0}`);
9502
+ lines.push("");
9503
+ lines.push("# HELP topgun_partition_map_version Current partition map version");
9504
+ lines.push("# TYPE topgun_partition_map_version gauge");
9505
+ lines.push(`topgun_partition_map_version ${this.partitionService.getMapVersion()}`);
9506
+ const migrationMetrics = this.getMigrationMetrics();
9507
+ if (migrationMetrics) {
9508
+ lines.push("");
9509
+ lines.push("# HELP topgun_migrations_started Total migrations started");
9510
+ lines.push("# TYPE topgun_migrations_started counter");
9511
+ lines.push(`topgun_migrations_started ${migrationMetrics.migrationsStarted}`);
9512
+ lines.push("");
9513
+ lines.push("# HELP topgun_migrations_completed Total migrations completed");
9514
+ lines.push("# TYPE topgun_migrations_completed counter");
9515
+ lines.push(`topgun_migrations_completed ${migrationMetrics.migrationsCompleted}`);
9516
+ lines.push("");
9517
+ lines.push("# HELP topgun_migrations_failed Total migrations failed");
9518
+ lines.push("# TYPE topgun_migrations_failed counter");
9519
+ lines.push(`topgun_migrations_failed ${migrationMetrics.migrationsFailed}`);
9520
+ lines.push("");
9521
+ lines.push("# HELP topgun_migrations_active Currently active migrations");
9522
+ lines.push("# TYPE topgun_migrations_active gauge");
9523
+ lines.push(`topgun_migrations_active ${migrationMetrics.activeMigrations}`);
9524
+ lines.push("");
9525
+ lines.push("# HELP topgun_migrations_queued Queued migrations");
9526
+ lines.push("# TYPE topgun_migrations_queued gauge");
9527
+ lines.push(`topgun_migrations_queued ${migrationMetrics.queuedMigrations}`);
9528
+ }
9529
+ lines.push("");
9530
+ lines.push(this.lagTracker.toPrometheusMetrics());
9531
+ return lines.join("\n");
9532
+ }
9533
+ // ============================================
9534
+ // Private Methods
9535
+ // ============================================
9536
+ setupEventHandlers() {
9537
+ this.clusterManager.on("memberJoined", (nodeId) => {
9538
+ logger.info({ nodeId }, "Cluster member joined");
9539
+ this.emit("member:joined", nodeId);
9540
+ });
9541
+ this.clusterManager.on("memberLeft", (nodeId) => {
9542
+ logger.info({ nodeId }, "Cluster member left");
9543
+ this.lagTracker.removeNode(nodeId);
9544
+ this.emit("member:left", nodeId);
9545
+ });
9546
+ this.partitionService.on("rebalanced", (map, changes) => {
9547
+ logger.info({ version: map.version, changesCount: changes.length }, "Partition map rebalanced");
9548
+ this.emit("partition:rebalanced", map, changes);
9549
+ });
9550
+ this.partitionService.on("partitionMoved", (info) => {
9551
+ this.emit("partition:moved", info);
9552
+ });
9553
+ const migrationManager = this.partitionService.getMigrationManager();
9554
+ if (migrationManager) {
9555
+ migrationManager.on("migrationStarted", (partitionId, targetNode) => {
9556
+ this.emit("migration:started", partitionId, targetNode);
9557
+ });
9558
+ migrationManager.on("migrationComplete", (partitionId) => {
9559
+ this.emit("migration:completed", partitionId);
9560
+ });
9561
+ migrationManager.on("migrationFailed", (partitionId, error) => {
9562
+ this.emit("migration:failed", partitionId, error);
9563
+ });
9564
+ }
9565
+ if (this.replicationPipeline) {
9566
+ this.replicationPipeline.on("ackReceived", (nodeId) => {
9567
+ this.lagTracker.recordAck(nodeId);
9568
+ });
9569
+ this.replicationPipeline.on("replicationSent", (nodeId) => {
9570
+ this.lagTracker.incrementPending(nodeId);
9571
+ });
9572
+ }
9573
+ }
9574
+ };
7277
9575
  export {
7278
9576
  BufferPool,
9577
+ ClusterCoordinator,
9578
+ ClusterManager,
7279
9579
  ConnectionRateLimiter,
9580
+ DEFAULT_CLUSTER_COORDINATOR_CONFIG,
9581
+ DEFAULT_LAG_TRACKER_CONFIG,
7280
9582
  FilterTasklet,
7281
9583
  ForEachTasklet,
7282
9584
  IteratorTasklet,
9585
+ LagTracker,
9586
+ LockManager,
7283
9587
  MapTasklet,
7284
9588
  MemoryServerAdapter,
9589
+ MigrationManager,
7285
9590
  ObjectPool,
9591
+ PartitionService,
7286
9592
  PostgresAdapter,
7287
9593
  RateLimitInterceptor,
7288
9594
  ReduceTasklet,
9595
+ ReplicationPipeline,
7289
9596
  SecurityManager,
7290
9597
  ServerCoordinator,
7291
9598
  TaskletScheduler,