@celerispay/hazelcast-client 3.12.5 → 3.12.7-3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +111 -87
- package/CHANGES_UNCOMMITTED.md +53 -0
- package/FAILOVER_FIXES.md +148 -230
- package/FAULT_TOLERANCE_IMPROVEMENTS.md +208 -0
- package/HAZELCAST_CLIENT_EVOLUTION.md +402 -0
- package/QUICK_START.md +184 -95
- package/RELEASE_SUMMARY.md +227 -147
- package/lib/HeartbeatService.js +11 -2
- package/lib/PartitionService.d.ts +14 -0
- package/lib/PartitionService.js +32 -9
- package/lib/invocation/ClientConnection.d.ts +14 -0
- package/lib/invocation/ClientConnection.js +95 -1
- package/lib/invocation/ClientConnectionManager.d.ts +99 -0
- package/lib/invocation/ClientConnectionManager.js +394 -10
- package/lib/invocation/ClusterService.d.ts +98 -5
- package/lib/invocation/ClusterService.js +541 -17
- package/lib/invocation/ConnectionAuthenticator.d.ts +11 -0
- package/lib/invocation/ConnectionAuthenticator.js +85 -12
- package/lib/invocation/CredentialPreservationService.d.ts +141 -0
- package/lib/invocation/CredentialPreservationService.js +377 -0
- package/lib/invocation/HazelcastFailoverManager.d.ts +102 -0
- package/lib/invocation/HazelcastFailoverManager.js +285 -0
- package/lib/invocation/InvocationService.js +8 -0
- package/lib/nearcache/StaleReadDetectorImpl.js +31 -4
- package/lib/proxy/ProxyManager.js +25 -4
- package/package.json +20 -28
|
@@ -24,6 +24,8 @@ var Util_1 = require("../Util");
|
|
|
24
24
|
var MemberAttributeEvent_1 = require("../core/MemberAttributeEvent");
|
|
25
25
|
var MembershipEvent_1 = require("../core/MembershipEvent");
|
|
26
26
|
var UuidUtil_1 = require("../util/UuidUtil");
|
|
27
|
+
var Address = require("../Address");
|
|
28
|
+
var HazelcastFailoverManager_1 = require("./HazelcastFailoverManager");
|
|
27
29
|
var MemberEvent;
|
|
28
30
|
(function (MemberEvent) {
|
|
29
31
|
MemberEvent[MemberEvent["ADDED"] = 1] = "ADDED";
|
|
@@ -49,10 +51,15 @@ var ClusterService = /** @class */ (function () {
|
|
|
49
51
|
this.lastFailoverAttempt = 0;
|
|
50
52
|
this.failoverCooldown = 5000; // 5 seconds cooldown between failover attempts
|
|
51
53
|
this.downAddresses = new Map(); // address -> timestamp when marked down
|
|
52
|
-
this.addressBlockDuration =
|
|
54
|
+
this.addressBlockDuration = 15000; // Reduced from 30000ms to 15000ms
|
|
55
|
+
this.reconnectionTask = null;
|
|
56
|
+
this.reconnectionInterval = 10000; // 10 seconds between reconnection attempts
|
|
53
57
|
this.client = client;
|
|
54
58
|
this.logger = this.client.getLoggingService().getLogger();
|
|
55
59
|
this.members = [];
|
|
60
|
+
this.startReconnectionTask();
|
|
61
|
+
this.startStateLoggingTask();
|
|
62
|
+
this.failoverManager = new HazelcastFailoverManager_1.HazelcastFailoverManager(client, this.logger);
|
|
56
63
|
}
|
|
57
64
|
/**
|
|
58
65
|
* Starts cluster service.
|
|
@@ -98,6 +105,24 @@ var ClusterService = /** @class */ (function () {
|
|
|
98
105
|
return Array.from(new Set(Array.from(addresses).concat(Array.from(providerAddresses))));
|
|
99
106
|
});
|
|
100
107
|
};
|
|
108
|
+
/**
|
|
109
|
+
* Returns the owner connection if available
|
|
110
|
+
*/
|
|
111
|
+
ClusterService.prototype.getOwnerConnection = function () {
|
|
112
|
+
return this.ownerConnection;
|
|
113
|
+
};
|
|
114
|
+
/**
|
|
115
|
+
* Returns whether failover is currently in progress
|
|
116
|
+
*/
|
|
117
|
+
ClusterService.prototype.isFailoverInProgress = function () {
|
|
118
|
+
return this.failoverInProgress;
|
|
119
|
+
};
|
|
120
|
+
/**
|
|
121
|
+
* Returns the list of known addresses in the cluster
|
|
122
|
+
*/
|
|
123
|
+
ClusterService.prototype.getKnownAddresses = function () {
|
|
124
|
+
return this.knownAddresses.slice(); // Return a copy to prevent external modification
|
|
125
|
+
};
|
|
101
126
|
/**
|
|
102
127
|
* Returns the list of members in the cluster.
|
|
103
128
|
* @returns
|
|
@@ -139,16 +164,15 @@ var ClusterService = /** @class */ (function () {
|
|
|
139
164
|
ClusterService.prototype.getClientInfo = function () {
|
|
140
165
|
var info = new ClientInfo_1.ClientInfo();
|
|
141
166
|
info.uuid = this.uuid;
|
|
142
|
-
|
|
167
|
+
var ownerConnection = this.getOwnerConnection();
|
|
168
|
+
if (ownerConnection) {
|
|
169
|
+
info.localAddress = ownerConnection.getLocalAddress();
|
|
170
|
+
}
|
|
171
|
+
else {
|
|
172
|
+
info.localAddress = null;
|
|
173
|
+
}
|
|
143
174
|
return info;
|
|
144
175
|
};
|
|
145
|
-
/**
|
|
146
|
-
* Returns the connection associated with owner node of this client.
|
|
147
|
-
* @returns {ClientConnection}
|
|
148
|
-
*/
|
|
149
|
-
ClusterService.prototype.getOwnerConnection = function () {
|
|
150
|
-
return this.ownerConnection;
|
|
151
|
-
};
|
|
152
176
|
/**
|
|
153
177
|
* Adds MembershipListener to listen for membership updates. There is no check for duplicate registrations,
|
|
154
178
|
* so if you register the listener twice, it will get events twice.
|
|
@@ -180,7 +204,11 @@ var ClusterService = /** @class */ (function () {
|
|
|
180
204
|
var handleAttributeChange = _this.handleMemberAttributeChange.bind(_this);
|
|
181
205
|
ClientAddMembershipListenerCodec_1.ClientAddMembershipListenerCodec.handle(m, handleMember, handleMemberList, handleAttributeChange, null);
|
|
182
206
|
};
|
|
183
|
-
|
|
207
|
+
var ownerConnection = this.getOwnerConnection();
|
|
208
|
+
if (!ownerConnection) {
|
|
209
|
+
return Promise.reject(new Error('Cannot initialize membership listener: no owner connection available'));
|
|
210
|
+
}
|
|
211
|
+
return this.client.getInvocationService().invokeOnConnection(ownerConnection, request, handler)
|
|
184
212
|
.then(function (resp) {
|
|
185
213
|
_this.logger.trace('ClusterService', 'Registered listener with id '
|
|
186
214
|
+ ClientAddMembershipListenerCodec_1.ClientAddMembershipListenerCodec.decodeResponse(resp).response);
|
|
@@ -196,6 +224,8 @@ var ClusterService = /** @class */ (function () {
|
|
|
196
224
|
};
|
|
197
225
|
ClusterService.prototype.onConnectionClosed = function (connection) {
|
|
198
226
|
this.logger.warn('ClusterService', 'Connection closed to ' + connection.toString());
|
|
227
|
+
// Mark the address as down when connection is closed
|
|
228
|
+
this.markAddressAsDown(connection.getAddress());
|
|
199
229
|
if (connection.isAuthenticatedAsOwner()) {
|
|
200
230
|
this.ownerConnection = null;
|
|
201
231
|
this.triggerFailover();
|
|
@@ -203,6 +233,8 @@ var ClusterService = /** @class */ (function () {
|
|
|
203
233
|
};
|
|
204
234
|
ClusterService.prototype.onHeartbeatStopped = function (connection) {
|
|
205
235
|
this.logger.warn('ClusterService', connection.toString() + ' stopped heartbeating.');
|
|
236
|
+
// Mark the address as down when heartbeat stops
|
|
237
|
+
this.markAddressAsDown(connection.getAddress());
|
|
206
238
|
if (connection.isAuthenticatedAsOwner()) {
|
|
207
239
|
this.client.getConnectionManager().destroyConnection(connection.getAddress());
|
|
208
240
|
this.ownerConnection = null;
|
|
@@ -210,7 +242,6 @@ var ClusterService = /** @class */ (function () {
|
|
|
210
242
|
}
|
|
211
243
|
};
|
|
212
244
|
ClusterService.prototype.triggerFailover = function () {
|
|
213
|
-
var _this = this;
|
|
214
245
|
var now = Date.now();
|
|
215
246
|
if (this.failoverInProgress || (now - this.lastFailoverAttempt) < this.failoverCooldown) {
|
|
216
247
|
this.logger.debug('ClusterService', 'Failover already in progress or too soon since last attempt');
|
|
@@ -218,23 +249,116 @@ var ClusterService = /** @class */ (function () {
|
|
|
218
249
|
}
|
|
219
250
|
this.failoverInProgress = true;
|
|
220
251
|
this.lastFailoverAttempt = now;
|
|
221
|
-
this
|
|
252
|
+
// Check if this is a single-node scenario
|
|
253
|
+
var isSingleNode = this.knownAddresses.length === 1;
|
|
254
|
+
if (isSingleNode) {
|
|
255
|
+
this.logger.info('ClusterService', '🔄 SINGLE-NODE CLUSTER RESET: Node restart detected, starting fresh...');
|
|
256
|
+
this.handleSingleNodeClusterReset();
|
|
257
|
+
}
|
|
258
|
+
else {
|
|
259
|
+
this.logger.info('ClusterService', '🚀 Starting failover process - SERVER-FIRST APPROACH...');
|
|
260
|
+
// SERVER-FIRST: No credential preservation needed
|
|
261
|
+
// We trust the server will provide correct member information
|
|
262
|
+
this.logger.info('ClusterService', '🎯 SERVER-FIRST: No credential management - trusting server data');
|
|
263
|
+
this.handleMultiNodeFailover();
|
|
264
|
+
}
|
|
265
|
+
};
|
|
266
|
+
/**
|
|
267
|
+
* Handles single-node cluster reset - treats node restart as fresh cluster
|
|
268
|
+
*/
|
|
269
|
+
ClusterService.prototype.handleSingleNodeClusterReset = function () {
|
|
270
|
+
var _this = this;
|
|
271
|
+
this.logger.info('ClusterService', '🧹 SINGLE-NODE RESET: Clearing all credentials and state...');
|
|
272
|
+
// Clear all stored credentials - node restart means fresh cluster
|
|
273
|
+
this.client.getConnectionManager().clearAllCredentials();
|
|
274
|
+
// Reset client UUIDs - will be assigned fresh by server
|
|
275
|
+
this.uuid = null;
|
|
276
|
+
this.ownerUuid = null;
|
|
277
|
+
// Log state before reset
|
|
278
|
+
this.logCurrentState();
|
|
279
|
+
// Force cleanup of all dead connections
|
|
280
|
+
this.client.getConnectionManager().forceCleanupDeadConnections();
|
|
281
|
+
// Clear partition information
|
|
282
|
+
this.client.getPartitionService().clearPartitionTable();
|
|
283
|
+
// Direct reconnection without waiting for member events
|
|
284
|
+
this.logger.info('ClusterService', '🔄 SINGLE-NODE RESET: Attempting direct reconnection...');
|
|
285
|
+
this.connectToCluster()
|
|
286
|
+
.then(function () {
|
|
287
|
+
_this.logger.info('ClusterService', '✅ Single-node cluster reset completed successfully');
|
|
288
|
+
_this.logCurrentState();
|
|
289
|
+
})
|
|
290
|
+
.catch(function (error) {
|
|
291
|
+
_this.logger.error('ClusterService', 'Single-node cluster reset failed', error);
|
|
292
|
+
_this.logCurrentState();
|
|
293
|
+
})
|
|
294
|
+
.finally(function () {
|
|
295
|
+
_this.failoverInProgress = false;
|
|
296
|
+
});
|
|
297
|
+
};
|
|
298
|
+
/**
|
|
299
|
+
* Handles multi-node failover - preserves existing logic
|
|
300
|
+
*/
|
|
301
|
+
ClusterService.prototype.handleMultiNodeFailover = function () {
|
|
302
|
+
var _this = this;
|
|
303
|
+
// Log state before failover
|
|
304
|
+
this.logCurrentState();
|
|
305
|
+
// Force cleanup of all dead connections to prevent leakage
|
|
306
|
+
this.client.getConnectionManager().forceCleanupDeadConnections();
|
|
222
307
|
// Clear any stale partition information
|
|
223
308
|
this.client.getPartitionService().clearPartitionTable();
|
|
224
309
|
// Attempt to reconnect to cluster
|
|
225
310
|
this.connectToCluster()
|
|
226
311
|
.then(function () {
|
|
227
|
-
_this.logger.info('ClusterService', 'Failover completed successfully');
|
|
312
|
+
_this.logger.info('ClusterService', '✅ Failover completed successfully - SERVER-FIRST approach');
|
|
313
|
+
// No credential management needed - server handles everything
|
|
314
|
+
_this.logCurrentState(); // Log state after successful failover
|
|
228
315
|
})
|
|
229
316
|
.catch(function (error) {
|
|
230
317
|
_this.logger.error('ClusterService', 'Failover failed', error);
|
|
231
|
-
// If failover fails,
|
|
232
|
-
_this.
|
|
318
|
+
// If failover fails, try to unblock at least one address to allow recovery
|
|
319
|
+
_this.attemptEmergencyRecovery();
|
|
320
|
+
_this.logCurrentState(); // Log state after failed failover
|
|
321
|
+
// Don't shutdown immediately, give recovery a chance
|
|
233
322
|
})
|
|
234
323
|
.finally(function () {
|
|
235
324
|
_this.failoverInProgress = false;
|
|
236
325
|
});
|
|
237
326
|
};
|
|
327
|
+
/**
|
|
328
|
+
* Attempts emergency recovery when failover fails
|
|
329
|
+
*/
|
|
330
|
+
ClusterService.prototype.attemptEmergencyRecovery = function () {
|
|
331
|
+
var _this = this;
|
|
332
|
+
this.logger.warn('ClusterService', 'Attempting emergency recovery...');
|
|
333
|
+
// Unblock at least one address to allow recovery
|
|
334
|
+
if (this.downAddresses.size > 0) {
|
|
335
|
+
var firstBlockedAddress_1 = Array.from(this.downAddresses.keys())[0];
|
|
336
|
+
this.logger.info('ClusterService', "Emergency unblocking address " + firstBlockedAddress_1);
|
|
337
|
+
this.downAddresses.delete(firstBlockedAddress_1);
|
|
338
|
+
// Try to connect to the unblocked address
|
|
339
|
+
try {
|
|
340
|
+
var _a = firstBlockedAddress_1.split(':'), host = _a[0], portStr = _a[1];
|
|
341
|
+
var port = parseInt(portStr, 10);
|
|
342
|
+
if (host && !isNaN(port)) {
|
|
343
|
+
var address_1 = new Address(host, port);
|
|
344
|
+
this.logger.info('ClusterService', "Attempting emergency connection to " + firstBlockedAddress_1);
|
|
345
|
+
// Try to connect without blocking
|
|
346
|
+
this.client.getConnectionManager().getOrConnect(address_1, false)
|
|
347
|
+
.then(function (connection) {
|
|
348
|
+
_this.logger.info('ClusterService', "Emergency connection successful to " + firstBlockedAddress_1);
|
|
349
|
+
_this.evaluateOwnershipChange(address_1, connection);
|
|
350
|
+
_this.client.getPartitionService().refresh();
|
|
351
|
+
})
|
|
352
|
+
.catch(function (error) {
|
|
353
|
+
_this.logger.warn('ClusterService', "Emergency connection failed to " + firstBlockedAddress_1 + ":", error);
|
|
354
|
+
});
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
catch (error) {
|
|
358
|
+
this.logger.error('ClusterService', 'Error during emergency recovery:', error);
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
};
|
|
238
362
|
ClusterService.prototype.isAddressKnownDown = function (address) {
|
|
239
363
|
var addressStr = address.toString();
|
|
240
364
|
var downTime = this.downAddresses.get(addressStr);
|
|
@@ -285,7 +409,7 @@ var ClusterService = /** @class */ (function () {
|
|
|
285
409
|
+ ', attempt period: ' + attemptPeriod + ', down addresses: ' + this.getDownAddressesInfo());
|
|
286
410
|
if (this.knownAddresses.length <= index) {
|
|
287
411
|
remainingAttemptLimit = remainingAttemptLimit - 1;
|
|
288
|
-
if (remainingAttemptLimit
|
|
412
|
+
if (remainingAttemptLimit <= 0) {
|
|
289
413
|
var errorMessage = 'Unable to connect to any of the following addresses: ' +
|
|
290
414
|
this.knownAddresses.map(function (element) {
|
|
291
415
|
return element.toString();
|
|
@@ -341,12 +465,48 @@ var ClusterService = /** @class */ (function () {
|
|
|
341
465
|
this.members = members;
|
|
342
466
|
this.client.getPartitionService().refresh();
|
|
343
467
|
this.logger.info('ClusterService', 'Members received.', this.members);
|
|
468
|
+
// Log current state after member list update
|
|
469
|
+
this.logCurrentState();
|
|
470
|
+
// In smart routing mode, proactively open connections to all non-owner members
|
|
471
|
+
// so that partition-aware routing can work across all nodes.
|
|
472
|
+
if (this.client.getConfig().networkConfig.smartRouting) {
|
|
473
|
+
this.connectToNonOwnerMembers(members);
|
|
474
|
+
}
|
|
344
475
|
var events = this.detectMembershipEvents(prevMembers);
|
|
345
476
|
for (var _i = 0, events_1 = events; _i < events_1.length; _i++) {
|
|
346
477
|
var event = events_1[_i];
|
|
347
478
|
this.fireMembershipEvent(event);
|
|
348
479
|
}
|
|
349
480
|
};
|
|
481
|
+
/**
|
|
482
|
+
* Proactively opens connections to all non-owner cluster members.
|
|
483
|
+
* Required for smart routing: the client needs a live connection to every
|
|
484
|
+
* node so it can route operations to the correct partition owner.
|
|
485
|
+
* Failures are silently ignored — the periodic reconnection task will retry.
|
|
486
|
+
*/
|
|
487
|
+
ClusterService.prototype.connectToNonOwnerMembers = function (members) {
|
|
488
|
+
var _this = this;
|
|
489
|
+
var ownerAddress = this.ownerConnection ? this.ownerConnection.getAddress().toString() : null;
|
|
490
|
+
members.forEach(function (member) {
|
|
491
|
+
var memberAddressStr = member.address.toString();
|
|
492
|
+
// Skip the owner — already connected
|
|
493
|
+
if (memberAddressStr === ownerAddress) {
|
|
494
|
+
return;
|
|
495
|
+
}
|
|
496
|
+
// Skip if already connected
|
|
497
|
+
if (_this.client.getConnectionManager().hasConnection(member.address)) {
|
|
498
|
+
return;
|
|
499
|
+
}
|
|
500
|
+
_this.logger.info('ClusterService', "\uD83D\uDD17 SMART-ROUTING: Opening connection to non-owner member " + memberAddressStr);
|
|
501
|
+
_this.client.getConnectionManager().getOrConnect(member.address, false)
|
|
502
|
+
.then(function () {
|
|
503
|
+
_this.logger.info('ClusterService', "\u2705 SMART-ROUTING: Connected to member " + memberAddressStr);
|
|
504
|
+
})
|
|
505
|
+
.catch(function (err) {
|
|
506
|
+
_this.logger.warn('ClusterService', "\u26A0\uFE0F SMART-ROUTING: Could not connect to member " + memberAddressStr + ": " + err.message);
|
|
507
|
+
});
|
|
508
|
+
});
|
|
509
|
+
};
|
|
350
510
|
ClusterService.prototype.detectMembershipEvents = function (prevMembers) {
|
|
351
511
|
var events = [];
|
|
352
512
|
var eventMembers = Array.from(this.members);
|
|
@@ -399,6 +559,8 @@ var ClusterService = /** @class */ (function () {
|
|
|
399
559
|
};
|
|
400
560
|
ClusterService.prototype.memberAdded = function (member) {
|
|
401
561
|
this.members.push(member);
|
|
562
|
+
// Handle member added and update preserved credentials
|
|
563
|
+
this.handleMemberAdded(member);
|
|
402
564
|
var membershipEvent = new MembershipEvent_1.MembershipEvent(member, MemberEvent.ADDED, this.members);
|
|
403
565
|
this.fireMembershipEvent(membershipEvent);
|
|
404
566
|
};
|
|
@@ -408,10 +570,372 @@ var ClusterService = /** @class */ (function () {
|
|
|
408
570
|
var removedMemberList = this.members.splice(memberIndex, 1);
|
|
409
571
|
assert(removedMemberList.length === 1);
|
|
410
572
|
}
|
|
411
|
-
this
|
|
573
|
+
// Check if we have a healthy connection to this member
|
|
574
|
+
var connectionManager = this.client.getConnectionManager();
|
|
575
|
+
var existingConnection = connectionManager.getConnection(member.address);
|
|
576
|
+
if (existingConnection && existingConnection.isHealthy()) {
|
|
577
|
+
// If the connection is healthy, don't destroy it immediately
|
|
578
|
+
// This prevents unnecessary disconnections during temporary network issues
|
|
579
|
+
this.logger.info('ClusterService', "Member removed but connection is healthy: " + member.address.toString() + ", preserving connection");
|
|
580
|
+
// Only destroy if we're not in failover mode
|
|
581
|
+
if (!this.failoverInProgress) {
|
|
582
|
+
this.logger.debug('ClusterService', "Destroying healthy connection to removed member: " + member.address.toString());
|
|
583
|
+
connectionManager.destroyConnection(member.address);
|
|
584
|
+
}
|
|
585
|
+
else {
|
|
586
|
+
this.logger.debug('ClusterService', "Preserving healthy connection during failover: " + member.address.toString());
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
else {
|
|
590
|
+
// If connection is unhealthy, destroy it
|
|
591
|
+
this.logger.info('ClusterService', "Member removed with unhealthy connection: " + member.address.toString() + ", destroying connection");
|
|
592
|
+
connectionManager.destroyConnection(member.address);
|
|
593
|
+
}
|
|
412
594
|
var membershipEvent = new MembershipEvent_1.MembershipEvent(member, MemberEvent.REMOVED, this.members);
|
|
413
595
|
this.fireMembershipEvent(membershipEvent);
|
|
414
596
|
};
|
|
597
|
+
/**
|
|
598
|
+
* Performs comprehensive credential cleanup when cluster membership changes
|
|
599
|
+
* This ensures ALL credentials are consistent with the current cluster owner UUID
|
|
600
|
+
* @param connectionManager The connection manager instance
|
|
601
|
+
* @param currentClusterOwnerUuid The current cluster owner UUID
|
|
602
|
+
*/
|
|
603
|
+
/**
|
|
604
|
+
* Handles member added event - SERVER-FIRST APPROACH
|
|
605
|
+
* We trust what the server tells us and store it as credentials
|
|
606
|
+
* @param member The member that was added
|
|
607
|
+
*/
|
|
608
|
+
ClusterService.prototype.handleMemberAdded = function (member) {
|
|
609
|
+
var _this = this;
|
|
610
|
+
this.logger.info('ClusterService', "\u2705 SERVER CONFIRMED: Member[ uuid: " + member.uuid + ", address: " + member.address.toString() + "] added to cluster");
|
|
611
|
+
// SERVER-FIRST: Store server data as credentials
|
|
612
|
+
// The server is the authority - we store what it tells us
|
|
613
|
+
this.logger.info('ClusterService', "\uD83C\uDFAF SERVER-FIRST: Storing server member data as credentials - server is authority");
|
|
614
|
+
var connectionManager = this.client.getConnectionManager();
|
|
615
|
+
// Store the server-provided UUID as the authoritative credential
|
|
616
|
+
if (connectionManager && typeof connectionManager.updatePreservedCredentials === 'function') {
|
|
617
|
+
connectionManager.updatePreservedCredentials(member.address, member.uuid);
|
|
618
|
+
}
|
|
619
|
+
// Record that we received a member added event for this address
|
|
620
|
+
if (connectionManager && typeof connectionManager.recordMemberAddedEvent === 'function') {
|
|
621
|
+
connectionManager.recordMemberAddedEvent(member.address);
|
|
622
|
+
}
|
|
623
|
+
// Find the current owner from the cluster state
|
|
624
|
+
var currentOwner = this.findCurrentOwner();
|
|
625
|
+
if (currentOwner) {
|
|
626
|
+
this.logger.info('ClusterService', "\uD83D\uDD04 SERVER-FIRST: Updating ALL credentials with current owner UUID: " + currentOwner.uuid);
|
|
627
|
+
// Update all credentials with the current owner UUID from server
|
|
628
|
+
if (connectionManager && typeof connectionManager.updateAllCredentialsWithNewOwnerUuid === 'function') {
|
|
629
|
+
connectionManager.updateAllCredentialsWithNewOwnerUuid(currentOwner.uuid);
|
|
630
|
+
}
|
|
631
|
+
// CRITICAL FIX: Update client's own UUIDs to match server expectations
|
|
632
|
+
this.logger.info('ClusterService', "\uD83D\uDD04 SERVER-FIRST: Updating client UUIDs to match server state");
|
|
633
|
+
this.logger.info('ClusterService', " - Old Client UUID: " + (this.uuid || 'NOT SET'));
|
|
634
|
+
this.logger.info('ClusterService', " - Old Owner UUID: " + (this.ownerUuid || 'NOT SET'));
|
|
635
|
+
// Update client's own UUIDs with server-provided data
|
|
636
|
+
// The client UUID should match the owner's UUID for authentication
|
|
637
|
+
this.uuid = currentOwner.uuid;
|
|
638
|
+
this.ownerUuid = currentOwner.uuid;
|
|
639
|
+
this.logger.info('ClusterService', " - New Client UUID: " + this.uuid);
|
|
640
|
+
this.logger.info('ClusterService', " - New Owner UUID: " + this.ownerUuid);
|
|
641
|
+
}
|
|
642
|
+
// Refresh partition table (KEEPING REFRESH METHOD UNTOUCHED as requested)
|
|
643
|
+
this.client.getPartitionService().refresh();
|
|
644
|
+
this.logger.info('ClusterService', "\u2705 SERVER-FIRST: Member " + member.uuid + " at " + member.address.toString() + " credentials stored from server data");
|
|
645
|
+
// In smart routing mode, proactively open a connection to this member
|
|
646
|
+
// so it is immediately available for partition-aware routing.
|
|
647
|
+
if (this.client.getConfig().networkConfig.smartRouting) {
|
|
648
|
+
var ownerAddress = this.ownerConnection ? this.ownerConnection.getAddress().toString() : null;
|
|
649
|
+
var memberAddressStr_1 = member.address.toString();
|
|
650
|
+
if (memberAddressStr_1 !== ownerAddress && !this.client.getConnectionManager().hasConnection(member.address)) {
|
|
651
|
+
this.logger.info('ClusterService', "\uD83D\uDD17 SMART-ROUTING: Opening connection to newly joined member " + memberAddressStr_1);
|
|
652
|
+
this.client.getConnectionManager().getOrConnect(member.address, false)
|
|
653
|
+
.then(function () {
|
|
654
|
+
_this.logger.info('ClusterService', "\u2705 SMART-ROUTING: Connected to newly joined member " + memberAddressStr_1);
|
|
655
|
+
})
|
|
656
|
+
.catch(function (err) {
|
|
657
|
+
_this.logger.warn('ClusterService', "\u26A0\uFE0F SMART-ROUTING: Could not connect to newly joined member " + memberAddressStr_1 + ": " + err.message);
|
|
658
|
+
});
|
|
659
|
+
}
|
|
660
|
+
}
|
|
661
|
+
};
|
|
662
|
+
/**
|
|
663
|
+
* Finds the current owner from the cluster state
|
|
664
|
+
* @returns The current owner member or null if not found
|
|
665
|
+
*/
|
|
666
|
+
ClusterService.prototype.findCurrentOwner = function () {
|
|
667
|
+
// Check if we have an active owner connection
|
|
668
|
+
var ownerConnection = this.ownerConnection;
|
|
669
|
+
if (ownerConnection && ownerConnection.isAlive()) {
|
|
670
|
+
var ownerAddress = ownerConnection.getAddress();
|
|
671
|
+
// Find the member with this address
|
|
672
|
+
for (var _i = 0, _a = this.members; _i < _a.length; _i++) {
|
|
673
|
+
var member = _a[_i];
|
|
674
|
+
if (member.address.toString() === ownerAddress.toString()) {
|
|
675
|
+
this.logger.debug('ClusterService', "Found current owner: " + member.uuid + " at " + member.address.toString());
|
|
676
|
+
return member;
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
// Fallback: look for any member that might be the owner
|
|
681
|
+
this.logger.debug('ClusterService', "No active owner connection found, checking member list for potential owner");
|
|
682
|
+
return null;
|
|
683
|
+
};
|
|
684
|
+
/**
|
|
685
|
+
* Logs the current state for debugging purposes
|
|
686
|
+
*/
|
|
687
|
+
ClusterService.prototype.logCurrentState = function () {
|
|
688
|
+
var _this = this;
|
|
689
|
+
var activeConnections = Object.keys(this.client.getConnectionManager().getEstablishedConnections()).length;
|
|
690
|
+
var memberCount = this.members.length;
|
|
691
|
+
var downAddressesCount = this.downAddresses.size;
|
|
692
|
+
var hasOwner = !!this.ownerConnection;
|
|
693
|
+
this.logger.info('ClusterService', "Current State - Members: " + memberCount + ", Active Connections: " + activeConnections + ", Down Addresses: " + downAddressesCount + ", Has Owner: " + hasOwner);
|
|
694
|
+
if (this.ownerConnection) {
|
|
695
|
+
this.logger.info('ClusterService', "Owner Connection: " + this.ownerConnection.getAddress().toString() + ", Alive: " + this.ownerConnection.isAlive());
|
|
696
|
+
}
|
|
697
|
+
// Log all active connections
|
|
698
|
+
var connections = this.client.getConnectionManager().getEstablishedConnections();
|
|
699
|
+
Object.keys(connections).forEach(function (addressStr) {
|
|
700
|
+
var connection = connections[addressStr];
|
|
701
|
+
_this.logger.debug('ClusterService', "Connection to " + addressStr + ": Alive=" + connection.isAlive() + ", Owner=" + connection.isAuthenticatedAsOwner());
|
|
702
|
+
});
|
|
703
|
+
// Log down addresses
|
|
704
|
+
if (downAddressesCount > 0) {
|
|
705
|
+
var downAddresses = Array.from(this.downAddresses.keys());
|
|
706
|
+
this.logger.debug('ClusterService', "Down Addresses: " + downAddresses.join(', '));
|
|
707
|
+
}
|
|
708
|
+
};
|
|
709
|
+
ClusterService.prototype.startReconnectionTask = function () {
|
|
710
|
+
var _this = this;
|
|
711
|
+
// Periodically attempt to reconnect to previously failed addresses
|
|
712
|
+
this.reconnectionTask = setInterval(function () {
|
|
713
|
+
_this.attemptReconnectionToFailedNodes();
|
|
714
|
+
}, this.reconnectionInterval);
|
|
715
|
+
};
|
|
716
|
+
/**
|
|
717
|
+
* Starts a periodic task to log the current state for debugging
|
|
718
|
+
*/
|
|
719
|
+
ClusterService.prototype.startStateLoggingTask = function () {
|
|
720
|
+
var _this = this;
|
|
721
|
+
// Log state every 30 seconds for debugging
|
|
722
|
+
setInterval(function () {
|
|
723
|
+
if (_this.client.getLifecycleService().isRunning()) {
|
|
724
|
+
_this.logCurrentState();
|
|
725
|
+
}
|
|
726
|
+
}, 30000);
|
|
727
|
+
};
|
|
728
|
+
ClusterService.prototype.attemptReconnectionToFailedNodes = function () {
|
|
729
|
+
var _this = this;
|
|
730
|
+
// Allow reconnection even during failover, but be more careful
|
|
731
|
+
if (this.failoverInProgress) {
|
|
732
|
+
this.logger.debug('ClusterService', 'Skipping reconnection attempt during failover');
|
|
733
|
+
return;
|
|
734
|
+
}
|
|
735
|
+
var now = Date.now();
|
|
736
|
+
var addressesToReconnect = [];
|
|
737
|
+
var totalDownAddresses = this.downAddresses.size;
|
|
738
|
+
// If we have no down addresses, we can skip
|
|
739
|
+
if (totalDownAddresses === 0) {
|
|
740
|
+
return;
|
|
741
|
+
}
|
|
742
|
+
// Find addresses that are no longer blocked
|
|
743
|
+
this.downAddresses.forEach(function (downTime, addressStr) {
|
|
744
|
+
var timeSinceDown = now - downTime;
|
|
745
|
+
if (timeSinceDown > _this.addressBlockDuration) {
|
|
746
|
+
// Parse the address string back to Address object
|
|
747
|
+
try {
|
|
748
|
+
var _a = addressStr.split(':'), host = _a[0], portStr = _a[1];
|
|
749
|
+
var port = parseInt(portStr, 10);
|
|
750
|
+
if (host && !isNaN(port)) {
|
|
751
|
+
var address = new Address(host, port);
|
|
752
|
+
// Check if we already have a connection to this address
|
|
753
|
+
if (_this.client.getConnectionManager().hasConnection(address)) {
|
|
754
|
+
_this.logger.debug('ClusterService', "Already have active connection to " + addressStr + ", removing from down addresses");
|
|
755
|
+
_this.downAddresses.delete(addressStr);
|
|
756
|
+
return;
|
|
757
|
+
}
|
|
758
|
+
addressesToReconnect.push(address);
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
catch (error) {
|
|
762
|
+
_this.logger.warn('ClusterService', "Failed to parse address " + addressStr + " for reconnection:", error);
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
});
|
|
766
|
+
if (addressesToReconnect.length > 0) {
|
|
767
|
+
this.logger.info('ClusterService', "Attempting to reconnect to " + addressesToReconnect.length + " previously failed nodes: " + addressesToReconnect.map(function (addr) { return addr.toString(); }).join(', '));
|
|
768
|
+
// Attempt to establish connections to each unblocked address
|
|
769
|
+
addressesToReconnect.forEach(function (address) {
|
|
770
|
+
_this.attemptReconnectionToAddress(address);
|
|
771
|
+
});
|
|
772
|
+
}
|
|
773
|
+
else if (totalDownAddresses > 0) {
|
|
774
|
+
// Log remaining blocked addresses for debugging
|
|
775
|
+
var remainingBlocked = Array.from(this.downAddresses.keys()).map(function (addr) {
|
|
776
|
+
var downTime = _this.downAddresses.get(addr);
|
|
777
|
+
var timeSinceDown = now - downTime;
|
|
778
|
+
var remainingTime = Math.max(0, _this.addressBlockDuration - timeSinceDown);
|
|
779
|
+
return addr + " (" + Math.ceil(remainingTime / 1000) + "s remaining)";
|
|
780
|
+
});
|
|
781
|
+
this.logger.debug('ClusterService', "Still waiting for " + totalDownAddresses + " addresses to unblock: " + remainingBlocked.join(', '));
|
|
782
|
+
}
|
|
783
|
+
// Log current state after reconnection attempts
|
|
784
|
+
this.logCurrentState();
|
|
785
|
+
};
|
|
786
|
+
/**
|
|
787
|
+
* Attempts to establish a connection to a specific address
|
|
788
|
+
* @param address The address to reconnect to
|
|
789
|
+
*/
|
|
790
|
+
ClusterService.prototype.attemptReconnectionToAddress = function (address) {
|
|
791
|
+
var _this = this;
|
|
792
|
+
var addressStr = address.toString();
|
|
793
|
+
// Check if we already have a connection to this address
|
|
794
|
+
if (this.client.getConnectionManager().hasConnection(address)) {
|
|
795
|
+
this.logger.debug('ClusterService', "Already have active connection to " + addressStr + ", skipping reconnection");
|
|
796
|
+
this.downAddresses.delete(addressStr);
|
|
797
|
+
return;
|
|
798
|
+
}
|
|
799
|
+
// Check if we're already trying to connect to this address
|
|
800
|
+
var connectionManager = this.client.getConnectionManager();
|
|
801
|
+
var establishedConnections = connectionManager.getEstablishedConnections();
|
|
802
|
+
var pendingConnections = Object.keys(connectionManager.getPendingConnections()).length;
|
|
803
|
+
if (pendingConnections > 0) {
|
|
804
|
+
this.logger.debug('ClusterService', "Already have pending connections, skipping reconnection to " + addressStr);
|
|
805
|
+
return;
|
|
806
|
+
}
|
|
807
|
+
// Remove from down addresses to allow connection attempt
|
|
808
|
+
this.downAddresses.delete(addressStr);
|
|
809
|
+
this.logger.debug('ClusterService', "Attempting reconnection to " + addressStr);
|
|
810
|
+
// Attempt to establish connection (not as owner, just as regular member connection)
|
|
811
|
+
this.client.getConnectionManager().getOrConnect(address, false)
|
|
812
|
+
.then(function (connection) {
|
|
813
|
+
_this.logger.info('ClusterService', "Successfully reconnected to " + addressStr);
|
|
814
|
+
// Only evaluate ownership change if we don't have an owner or current owner is unhealthy
|
|
815
|
+
if (!_this.ownerConnection || !_this.ownerConnection.isHealthy()) {
|
|
816
|
+
_this.logger.info('ClusterService', "Evaluating ownership change for " + addressStr);
|
|
817
|
+
_this.evaluateOwnershipChange(address, connection);
|
|
818
|
+
}
|
|
819
|
+
else {
|
|
820
|
+
_this.logger.debug('ClusterService', "Keeping " + addressStr + " as member connection, current owner is healthy");
|
|
821
|
+
}
|
|
822
|
+
// Trigger partition service refresh to update routing information
|
|
823
|
+
_this.client.getPartitionService().refresh();
|
|
824
|
+
}).catch(function (error) {
|
|
825
|
+
_this.logger.warn('ClusterService', "Reconnection attempt to " + addressStr + " failed:", error);
|
|
826
|
+
// Mark the address as down again, but with a shorter block duration for reconnection attempts
|
|
827
|
+
var shorterBlockDuration = Math.min(_this.addressBlockDuration / 2, 15000); // Max 15 seconds
|
|
828
|
+
_this.markAddressAsDownWithDuration(address, shorterBlockDuration);
|
|
829
|
+
});
|
|
830
|
+
};
|
|
831
|
+
/**
|
|
832
|
+
* Evaluates whether we should switch ownership to a reconnected node
|
|
833
|
+
* @param address The address of the reconnected node
|
|
834
|
+
* @param connection The connection to the reconnected node
|
|
835
|
+
*/
|
|
836
|
+
ClusterService.prototype.evaluateOwnershipChange = function (address, connection) {
|
|
837
|
+
// If we don't have an owner connection, this reconnected node becomes the owner
|
|
838
|
+
if (!this.ownerConnection) {
|
|
839
|
+
this.logger.info('ClusterService', "Promoting reconnected node " + address.toString() + " to owner status");
|
|
840
|
+
this.promoteToOwner(connection, address);
|
|
841
|
+
return;
|
|
842
|
+
}
|
|
843
|
+
// If our current owner connection is having issues, consider switching
|
|
844
|
+
if (this.ownerConnection && !this.ownerConnection.isAlive()) {
|
|
845
|
+
this.logger.info('ClusterService', "Current owner is unhealthy, switching to reconnected node " + address.toString());
|
|
846
|
+
this.promoteToOwner(connection, address);
|
|
847
|
+
return;
|
|
848
|
+
}
|
|
849
|
+
// Don't switch ownership if current owner is healthy
|
|
850
|
+
this.logger.debug('ClusterService', "Current owner is healthy, keeping " + address.toString() + " as member connection");
|
|
851
|
+
};
|
|
852
|
+
/**
|
|
853
|
+
* Promotes a connection to owner status
|
|
854
|
+
* @param connection The connection to promote
|
|
855
|
+
* @param address The address of the promoted connection
|
|
856
|
+
*/
|
|
857
|
+
ClusterService.prototype.promoteToOwner = function (connection, address) {
|
|
858
|
+
try {
|
|
859
|
+
// Close the old owner connection if it exists
|
|
860
|
+
if (this.ownerConnection && this.ownerConnection !== connection) {
|
|
861
|
+
this.logger.info('ClusterService', "Closing previous owner connection to " + this.ownerConnection.getAddress().toString());
|
|
862
|
+
this.client.getConnectionManager().destroyConnection(this.ownerConnection.getAddress());
|
|
863
|
+
}
|
|
864
|
+
// Set the new owner connection
|
|
865
|
+
connection.setAuthenticatedAsOwner(true);
|
|
866
|
+
this.ownerConnection = connection;
|
|
867
|
+
this.logger.info('ClusterService', "Successfully promoted " + address.toString() + " to owner status");
|
|
868
|
+
// Refresh partition information with the new owner
|
|
869
|
+
this.client.getPartitionService().refresh();
|
|
870
|
+
}
|
|
871
|
+
catch (error) {
|
|
872
|
+
this.logger.error('ClusterService', "Failed to promote " + address.toString() + " to owner:", error);
|
|
873
|
+
// If promotion fails, mark the address as down again
|
|
874
|
+
this.markAddressAsDown(address);
|
|
875
|
+
}
|
|
876
|
+
};
|
|
877
|
+
/**
|
|
878
|
+
* Marks an address as down with a custom block duration
|
|
879
|
+
* @param address The address to mark as down
|
|
880
|
+
* @param blockDuration The duration to block the address (in milliseconds)
|
|
881
|
+
*/
|
|
882
|
+
ClusterService.prototype.markAddressAsDownWithDuration = function (address, blockDuration) {
|
|
883
|
+
var _this = this;
|
|
884
|
+
var addressStr = address.toString();
|
|
885
|
+
var now = Date.now();
|
|
886
|
+
// Don't block if we already have a healthy connection to this address
|
|
887
|
+
if (this.client.getConnectionManager().hasConnection(address)) {
|
|
888
|
+
this.logger.debug('ClusterService', "Not blocking " + addressStr + " as we have a healthy connection");
|
|
889
|
+
return;
|
|
890
|
+
}
|
|
891
|
+
// Don't block if this would leave us with no available nodes
|
|
892
|
+
var totalDownAddresses = this.downAddresses.size;
|
|
893
|
+
var totalMembers = this.members.length;
|
|
894
|
+
if (totalDownAddresses >= totalMembers - 1) {
|
|
895
|
+
this.logger.warn('ClusterService', "Not blocking " + addressStr + " as it would leave us with no available nodes");
|
|
896
|
+
return;
|
|
897
|
+
}
|
|
898
|
+
this.downAddresses.set(addressStr, now);
|
|
899
|
+
this.logger.warn('ClusterService', "Marked address " + addressStr + " as down, will be blocked for " + blockDuration + "ms");
|
|
900
|
+
// Schedule cleanup of this address after block duration
|
|
901
|
+
setTimeout(function () {
|
|
902
|
+
if (_this.downAddresses.has(addressStr)) {
|
|
903
|
+
_this.logger.info('ClusterService', "Unblocking address " + addressStr + " after block duration");
|
|
904
|
+
_this.downAddresses.delete(addressStr);
|
|
905
|
+
}
|
|
906
|
+
}, blockDuration);
|
|
907
|
+
};
|
|
908
|
+
/**
|
|
909
|
+
* Handles ownership change when failover occurs
|
|
910
|
+
* @param newOwnerAddress The address of the new owner
|
|
911
|
+
* @param newOwnerConnection The connection to the new owner
|
|
912
|
+
*/
|
|
913
|
+
ClusterService.prototype.handleOwnershipChange = function (newOwnerAddress, newOwnerConnection) {
|
|
914
|
+
this.logger.info('ClusterService', "Handling ownership change to " + newOwnerAddress.toString());
|
|
915
|
+
// Update owner connection
|
|
916
|
+
this.ownerConnection = newOwnerConnection;
|
|
917
|
+
// Note: Owner UUID will be updated when authentication completes
|
|
918
|
+
// For now, we'll keep the existing owner UUID
|
|
919
|
+
// Clear any stale partition information
|
|
920
|
+
this.client.getPartitionService().clearPartitionTable();
|
|
921
|
+
// Refresh partition information with the new owner
|
|
922
|
+
this.client.getPartitionService().refresh();
|
|
923
|
+
this.logger.info('ClusterService', "Ownership change completed, new owner: " + newOwnerAddress.toString());
|
|
924
|
+
};
|
|
925
|
+
/**
|
|
926
|
+
* Gets the failover manager for external access
|
|
927
|
+
* @returns The failover manager instance
|
|
928
|
+
*/
|
|
929
|
+
ClusterService.prototype.getFailoverManager = function () {
|
|
930
|
+
return this.failoverManager;
|
|
931
|
+
};
|
|
932
|
+
ClusterService.prototype.shutdown = function () {
|
|
933
|
+
if (this.reconnectionTask) {
|
|
934
|
+
clearInterval(this.reconnectionTask);
|
|
935
|
+
this.reconnectionTask = null;
|
|
936
|
+
}
|
|
937
|
+
this.downAddresses.clear();
|
|
938
|
+
};
|
|
415
939
|
return ClusterService;
|
|
416
940
|
}());
|
|
417
941
|
exports.ClusterService = ClusterService;
|