@topgunbuild/server 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +97 -0
- package/dist/index.d.mts +573 -3
- package/dist/index.d.ts +573 -3
- package/dist/index.js +1786 -262
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1697 -183
- package/dist/index.mjs.map +1 -1
- package/package.json +12 -12
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as _topgunbuild_core from '@topgunbuild/core';
|
|
2
|
-
import { Timestamp, LWWRecord, ORMapRecord, Principal, EventJournalImpl, EventJournalConfig, JournalEvent, PermissionPolicy, ConsistencyLevel, ReplicationConfig, LWWMap, ORMap, PermissionType, MigrationConfig, MigrationStatus, MigrationMetrics, PartitionMap, PartitionInfo, PartitionChange, ReplicationLag, ReplicationHealth, ReplicationResult, EntryProcessorDef, EntryProcessorResult, HLC, MergeRejection, ConflictResolverDef, MergeContext, MergeResult, IndexedLWWMap, IndexedORMap } from '@topgunbuild/core';
|
|
2
|
+
import { Timestamp, LWWRecord, ORMapRecord, Principal, EventJournalImpl, EventJournalConfig, JournalEvent, PermissionPolicy, ConsistencyLevel, ReplicationConfig, LWWMap, ORMap, PermissionType, MigrationConfig, MigrationStatus, MigrationMetrics, PartitionMap, PartitionInfo, PartitionChange, ReplicationLag, ReplicationHealth, ReplicationResult, ClusterReadOptions, MerkleTree, EntryProcessorDef, EntryProcessorResult, HLC, MergeRejection, ConflictResolverDef, MergeContext, MergeResult, IndexedLWWMap, IndexedORMap } from '@topgunbuild/core';
|
|
3
3
|
import { WebSocket } from 'ws';
|
|
4
4
|
import { Pool, PoolConfig } from 'pg';
|
|
5
5
|
import pino from 'pino';
|
|
@@ -1902,6 +1902,10 @@ declare class ServerCoordinator {
|
|
|
1902
1902
|
private conflictResolverHandler;
|
|
1903
1903
|
private eventJournalService?;
|
|
1904
1904
|
private journalSubscriptions;
|
|
1905
|
+
private partitionReassigner?;
|
|
1906
|
+
private readReplicaHandler?;
|
|
1907
|
+
private merkleTreeManager?;
|
|
1908
|
+
private repairScheduler?;
|
|
1905
1909
|
private readonly _nodeId;
|
|
1906
1910
|
private _actualPort;
|
|
1907
1911
|
private _actualClusterPort;
|
|
@@ -1943,6 +1947,23 @@ declare class ServerCoordinator {
|
|
|
1943
1947
|
getTaskletSchedulerStats(): TaskletSchedulerStats;
|
|
1944
1948
|
/** Get tasklet scheduler for scheduling long-running operations */
|
|
1945
1949
|
getTaskletScheduler(): TaskletScheduler;
|
|
1950
|
+
/**
|
|
1951
|
+
* Phase 10.02: Graceful cluster departure
|
|
1952
|
+
*
|
|
1953
|
+
* Notifies the cluster that this node is leaving and allows time for:
|
|
1954
|
+
* 1. Pending replication to complete
|
|
1955
|
+
* 2. Other nodes to detect departure
|
|
1956
|
+
* 3. Partition reassignment to begin
|
|
1957
|
+
*/
|
|
1958
|
+
private gracefulClusterDeparture;
|
|
1959
|
+
/**
|
|
1960
|
+
* Get list of partition IDs owned by this node
|
|
1961
|
+
*/
|
|
1962
|
+
private getOwnedPartitions;
|
|
1963
|
+
/**
|
|
1964
|
+
* Wait for replication pipeline to flush pending operations
|
|
1965
|
+
*/
|
|
1966
|
+
private waitForReplicationFlush;
|
|
1946
1967
|
shutdown(): Promise<void>;
|
|
1947
1968
|
private handleConnection;
|
|
1948
1969
|
private handleMessage;
|
|
@@ -2052,6 +2073,16 @@ declare class ServerCoordinator {
|
|
|
2052
2073
|
* Use this for queries to avoid returning empty results during initial load.
|
|
2053
2074
|
*/
|
|
2054
2075
|
getMapAsync(name: string, typeHint?: 'LWW' | 'OR'): Promise<LWWMap<string, any> | ORMap<string, any>>;
|
|
2076
|
+
/**
|
|
2077
|
+
* Phase 10.04: Get local record for anti-entropy repair
|
|
2078
|
+
* Returns the LWWRecord for a key, used by RepairScheduler
|
|
2079
|
+
*/
|
|
2080
|
+
private getLocalRecord;
|
|
2081
|
+
/**
|
|
2082
|
+
* Phase 10.04: Apply repaired record from anti-entropy repair
|
|
2083
|
+
* Used by RepairScheduler to apply resolved conflicts
|
|
2084
|
+
*/
|
|
2085
|
+
private applyRepairRecord;
|
|
2055
2086
|
private loadMapFromStorage;
|
|
2056
2087
|
private startGarbageCollection;
|
|
2057
2088
|
/**
|
|
@@ -2347,6 +2378,31 @@ interface FailureDetectorConfig {
|
|
|
2347
2378
|
/** Initial heartbeat interval estimate (ms). Default: 1000 */
|
|
2348
2379
|
initialHeartbeatIntervalMs: number;
|
|
2349
2380
|
}
|
|
2381
|
+
declare const DEFAULT_FAILURE_DETECTOR_CONFIG: FailureDetectorConfig;
|
|
2382
|
+
interface NodeState {
|
|
2383
|
+
/** Last heartbeat timestamp */
|
|
2384
|
+
lastHeartbeat: number;
|
|
2385
|
+
/** Heartbeat interval history for phi calculation */
|
|
2386
|
+
intervalHistory: number[];
|
|
2387
|
+
/** Whether node is currently suspected */
|
|
2388
|
+
isSuspected: boolean;
|
|
2389
|
+
/** Timestamp when suspicion started */
|
|
2390
|
+
suspicionStartTime?: number;
|
|
2391
|
+
/** Whether failure has been confirmed */
|
|
2392
|
+
isConfirmedFailed: boolean;
|
|
2393
|
+
}
|
|
2394
|
+
interface FailureDetectorEvents {
|
|
2395
|
+
nodeSuspected: {
|
|
2396
|
+
nodeId: string;
|
|
2397
|
+
phi: number;
|
|
2398
|
+
};
|
|
2399
|
+
nodeRecovered: {
|
|
2400
|
+
nodeId: string;
|
|
2401
|
+
};
|
|
2402
|
+
nodeConfirmedFailed: {
|
|
2403
|
+
nodeId: string;
|
|
2404
|
+
};
|
|
2405
|
+
}
|
|
2350
2406
|
declare class FailureDetector extends EventEmitter {
|
|
2351
2407
|
private config;
|
|
2352
2408
|
private nodeStates;
|
|
@@ -2460,7 +2516,7 @@ interface ClusterMember {
|
|
|
2460
2516
|
isSelf: boolean;
|
|
2461
2517
|
}
|
|
2462
2518
|
interface ClusterMessage {
|
|
2463
|
-
type: 'HELLO' | 'OP_FORWARD' | 'PARTITION_UPDATE' | 'HEARTBEAT' | 'CLUSTER_EVENT' | 'CLUSTER_QUERY_EXEC' | 'CLUSTER_QUERY_RESP' | 'CLUSTER_GC_REPORT' | 'CLUSTER_GC_COMMIT' | 'CLUSTER_LOCK_REQ' | 'CLUSTER_LOCK_RELEASE' | 'CLUSTER_LOCK_GRANTED' | 'CLUSTER_LOCK_RELEASED' | 'CLUSTER_CLIENT_DISCONNECTED' | 'CLUSTER_TOPIC_PUB';
|
|
2519
|
+
type: 'HELLO' | 'MEMBER_LIST' | 'OP_FORWARD' | 'PARTITION_UPDATE' | 'HEARTBEAT' | 'CLUSTER_EVENT' | 'CLUSTER_QUERY_EXEC' | 'CLUSTER_QUERY_RESP' | 'CLUSTER_GC_REPORT' | 'CLUSTER_GC_COMMIT' | 'CLUSTER_LOCK_REQ' | 'CLUSTER_LOCK_RELEASE' | 'CLUSTER_LOCK_GRANTED' | 'CLUSTER_LOCK_RELEASED' | 'CLUSTER_CLIENT_DISCONNECTED' | 'CLUSTER_TOPIC_PUB' | 'CLUSTER_MERKLE_ROOT_REQ' | 'CLUSTER_MERKLE_ROOT_RESP' | 'CLUSTER_MERKLE_BUCKETS_REQ' | 'CLUSTER_MERKLE_BUCKETS_RESP' | 'CLUSTER_MERKLE_KEYS_REQ' | 'CLUSTER_MERKLE_KEYS_RESP' | 'CLUSTER_REPAIR_DATA_REQ' | 'CLUSTER_REPAIR_DATA_RESP';
|
|
2464
2520
|
senderId: string;
|
|
2465
2521
|
payload: any;
|
|
2466
2522
|
}
|
|
@@ -2501,6 +2557,21 @@ declare class ClusterManager extends EventEmitter {
|
|
|
2501
2557
|
* Handle incoming heartbeat from a peer.
|
|
2502
2558
|
*/
|
|
2503
2559
|
private handleHeartbeat;
|
|
2560
|
+
/**
|
|
2561
|
+
* Send current member list to a specific node (gossip protocol).
|
|
2562
|
+
* Called when a new node joins to propagate cluster topology.
|
|
2563
|
+
*/
|
|
2564
|
+
private sendMemberList;
|
|
2565
|
+
/**
|
|
2566
|
+
* Broadcast member list to all connected nodes.
|
|
2567
|
+
* Called when cluster membership changes.
|
|
2568
|
+
*/
|
|
2569
|
+
private broadcastMemberList;
|
|
2570
|
+
/**
|
|
2571
|
+
* Handle incoming member list from a peer (gossip protocol).
|
|
2572
|
+
* Attempts to connect to unknown members.
|
|
2573
|
+
*/
|
|
2574
|
+
private handleMemberList;
|
|
2504
2575
|
/**
|
|
2505
2576
|
* Handle confirmed node failure.
|
|
2506
2577
|
*/
|
|
@@ -3187,6 +3258,505 @@ declare class ClusterCoordinator extends EventEmitter {
|
|
|
3187
3258
|
private setupEventHandlers;
|
|
3188
3259
|
}
|
|
3189
3260
|
|
|
3261
|
+
/**
|
|
3262
|
+
* PartitionReassigner - Automatic Partition Failover
|
|
3263
|
+
*
|
|
3264
|
+
* Handles automatic reassignment of partitions when nodes fail:
|
|
3265
|
+
* - Promotes backup nodes to owners
|
|
3266
|
+
* - Assigns new backups from remaining cluster
|
|
3267
|
+
* - Coordinates with MigrationManager for data transfer
|
|
3268
|
+
* - Broadcasts partition table updates
|
|
3269
|
+
*
|
|
3270
|
+
* This is Phase 10.02 of the TopGun cluster enhancements.
|
|
3271
|
+
*/
|
|
3272
|
+
|
|
3273
|
+
interface PartitionReassignerConfig {
|
|
3274
|
+
/** Delay before reassigning partitions after failure detection (ms). Default: 1000 */
|
|
3275
|
+
reassignmentDelayMs: number;
|
|
3276
|
+
/** Maximum concurrent partition transfers. Default: 10 */
|
|
3277
|
+
maxConcurrentTransfers: number;
|
|
3278
|
+
/** Enable automatic backup promotion. Default: true */
|
|
3279
|
+
autoPromoteBackups: boolean;
|
|
3280
|
+
/** Enable automatic new backup assignment. Default: true */
|
|
3281
|
+
autoAssignNewBackups: boolean;
|
|
3282
|
+
}
|
|
3283
|
+
declare const DEFAULT_REASSIGNER_CONFIG: PartitionReassignerConfig;
|
|
3284
|
+
interface ReassignmentEvent {
|
|
3285
|
+
type: 'backup-promoted' | 'new-backup-assigned' | 'reassignment-complete';
|
|
3286
|
+
partitionId: number;
|
|
3287
|
+
previousOwner?: string;
|
|
3288
|
+
newOwner?: string;
|
|
3289
|
+
backups?: string[];
|
|
3290
|
+
failedNodeId?: string;
|
|
3291
|
+
}
|
|
3292
|
+
interface FailoverStatus {
|
|
3293
|
+
inProgress: boolean;
|
|
3294
|
+
failedNodeId?: string;
|
|
3295
|
+
partitionsReassigned: number;
|
|
3296
|
+
partitionsPending: number;
|
|
3297
|
+
startedAt?: number;
|
|
3298
|
+
completedAt?: number;
|
|
3299
|
+
}
|
|
3300
|
+
declare class PartitionReassigner extends EventEmitter {
|
|
3301
|
+
private config;
|
|
3302
|
+
private clusterManager;
|
|
3303
|
+
private partitionService;
|
|
3304
|
+
private failoverInProgress;
|
|
3305
|
+
private currentFailedNode?;
|
|
3306
|
+
private reassignmentStartTime?;
|
|
3307
|
+
private partitionsReassigned;
|
|
3308
|
+
private pendingReassignments;
|
|
3309
|
+
private reassignmentTimer?;
|
|
3310
|
+
constructor(clusterManager: ClusterManager, partitionService: PartitionService, config?: Partial<PartitionReassignerConfig>);
|
|
3311
|
+
private setupEventHandlers;
|
|
3312
|
+
/**
|
|
3313
|
+
* Handle a node failure - initiates failover process
|
|
3314
|
+
*/
|
|
3315
|
+
private handleNodeFailure;
|
|
3316
|
+
/**
|
|
3317
|
+
* Handle a graceful node departure
|
|
3318
|
+
*/
|
|
3319
|
+
private handleNodeDeparture;
|
|
3320
|
+
/**
|
|
3321
|
+
* Execute the failover process for a failed node
|
|
3322
|
+
*/
|
|
3323
|
+
private executeFailover;
|
|
3324
|
+
/**
|
|
3325
|
+
* Find all partitions that need reassignment
|
|
3326
|
+
*/
|
|
3327
|
+
private findOrphanedPartitions;
|
|
3328
|
+
/**
|
|
3329
|
+
* Reassign a single partition
|
|
3330
|
+
*/
|
|
3331
|
+
private reassignPartition;
|
|
3332
|
+
/**
|
|
3333
|
+
* Select backup nodes for a partition
|
|
3334
|
+
*/
|
|
3335
|
+
private selectBackups;
|
|
3336
|
+
/**
|
|
3337
|
+
* Complete the failover process
|
|
3338
|
+
*/
|
|
3339
|
+
private completeFailover;
|
|
3340
|
+
/**
|
|
3341
|
+
* Get current failover status
|
|
3342
|
+
*/
|
|
3343
|
+
getStatus(): FailoverStatus;
|
|
3344
|
+
/**
|
|
3345
|
+
* Check if failover is in progress
|
|
3346
|
+
*/
|
|
3347
|
+
isFailoverInProgress(): boolean;
|
|
3348
|
+
/**
|
|
3349
|
+
* Force immediate reassignment (for testing/manual intervention)
|
|
3350
|
+
*/
|
|
3351
|
+
forceReassignment(failedNodeId: string): void;
|
|
3352
|
+
/**
|
|
3353
|
+
* Stop any pending reassignment
|
|
3354
|
+
*/
|
|
3355
|
+
stop(): void;
|
|
3356
|
+
}
|
|
3357
|
+
|
|
3358
|
+
/**
|
|
3359
|
+
* ReadReplicaHandler - Read Scaling via Replicas
|
|
3360
|
+
*
|
|
3361
|
+
* Phase 10.03: Enables reading from backup nodes to:
|
|
3362
|
+
* - Scale read throughput linearly with replicas
|
|
3363
|
+
* - Reduce latency by reading from nearest replica
|
|
3364
|
+
* - Provide availability during owner unavailability
|
|
3365
|
+
*
|
|
3366
|
+
* Supports three consistency levels for reads:
|
|
3367
|
+
* - STRONG: Read from partition owner (current behavior)
|
|
3368
|
+
* - EVENTUAL: Read from any replica (owner or backup)
|
|
3369
|
+
* - LOCAL: Read from local node if it's a replica
|
|
3370
|
+
*/
|
|
3371
|
+
|
|
3372
|
+
interface ReadReplicaConfig {
|
|
3373
|
+
/** Default consistency for reads. Default: STRONG */
|
|
3374
|
+
defaultConsistency: ConsistencyLevel;
|
|
3375
|
+
/** Maximum staleness for eventual reads in ms. Default: 5000 */
|
|
3376
|
+
maxStalenessMs: number;
|
|
3377
|
+
/** Prefer local replica over remote. Default: true */
|
|
3378
|
+
preferLocalReplica: boolean;
|
|
3379
|
+
/** Load balancing strategy for replica selection. Default: 'latency-based' */
|
|
3380
|
+
loadBalancing: 'round-robin' | 'least-connections' | 'latency-based';
|
|
3381
|
+
}
|
|
3382
|
+
declare const DEFAULT_READ_REPLICA_CONFIG: ReadReplicaConfig;
|
|
3383
|
+
interface ReadResult<T> {
|
|
3384
|
+
value: T | null;
|
|
3385
|
+
version?: Timestamp;
|
|
3386
|
+
source: string;
|
|
3387
|
+
isOwner: boolean;
|
|
3388
|
+
staleness?: number;
|
|
3389
|
+
}
|
|
3390
|
+
interface ReadRequest {
|
|
3391
|
+
mapName: string;
|
|
3392
|
+
key: string;
|
|
3393
|
+
options?: ClusterReadOptions;
|
|
3394
|
+
}
|
|
3395
|
+
declare class ReadReplicaHandler extends EventEmitter {
|
|
3396
|
+
private config;
|
|
3397
|
+
private partitionService;
|
|
3398
|
+
private clusterManager;
|
|
3399
|
+
private lagTracker?;
|
|
3400
|
+
private nodeId;
|
|
3401
|
+
private roundRobinCounters;
|
|
3402
|
+
constructor(partitionService: PartitionService, clusterManager: ClusterManager, nodeId: string, lagTracker?: LagTracker, config?: Partial<ReadReplicaConfig>);
|
|
3403
|
+
/**
|
|
3404
|
+
* Determine if a read request can be served locally
|
|
3405
|
+
*/
|
|
3406
|
+
canServeLocally(request: ReadRequest): boolean;
|
|
3407
|
+
/**
|
|
3408
|
+
* Determine which node should handle the read
|
|
3409
|
+
*/
|
|
3410
|
+
selectReadNode(request: ReadRequest): string | null;
|
|
3411
|
+
/**
|
|
3412
|
+
* Select replica using configured load balancing strategy
|
|
3413
|
+
*/
|
|
3414
|
+
private selectByStrategy;
|
|
3415
|
+
/**
|
|
3416
|
+
* Round-robin selection
|
|
3417
|
+
*/
|
|
3418
|
+
private selectRoundRobin;
|
|
3419
|
+
/**
|
|
3420
|
+
* Latency-based selection using lag tracker
|
|
3421
|
+
*/
|
|
3422
|
+
private selectByLatency;
|
|
3423
|
+
/**
|
|
3424
|
+
* Get estimated staleness for a node in ms
|
|
3425
|
+
*/
|
|
3426
|
+
private getNodeStaleness;
|
|
3427
|
+
/**
|
|
3428
|
+
* Check if a node is alive in the cluster
|
|
3429
|
+
*/
|
|
3430
|
+
private isNodeAlive;
|
|
3431
|
+
/**
|
|
3432
|
+
* Select first alive backup from list
|
|
3433
|
+
*/
|
|
3434
|
+
private selectAliveBackup;
|
|
3435
|
+
/**
|
|
3436
|
+
* Create read response metadata
|
|
3437
|
+
*/
|
|
3438
|
+
createReadMetadata(key: string, options?: ClusterReadOptions): {
|
|
3439
|
+
source: string;
|
|
3440
|
+
isOwner: boolean;
|
|
3441
|
+
consistency: ConsistencyLevel;
|
|
3442
|
+
};
|
|
3443
|
+
/**
|
|
3444
|
+
* Check if local node should forward read to owner
|
|
3445
|
+
*/
|
|
3446
|
+
shouldForwardRead(request: ReadRequest): boolean;
|
|
3447
|
+
/**
|
|
3448
|
+
* Get metrics for monitoring
|
|
3449
|
+
*/
|
|
3450
|
+
getMetrics(): {
|
|
3451
|
+
defaultConsistency: ConsistencyLevel;
|
|
3452
|
+
preferLocalReplica: boolean;
|
|
3453
|
+
loadBalancing: string;
|
|
3454
|
+
roundRobinPartitions: number;
|
|
3455
|
+
};
|
|
3456
|
+
}
|
|
3457
|
+
|
|
3458
|
+
/**
|
|
3459
|
+
* MerkleTreeManager - Per-Partition Merkle Tree Management
|
|
3460
|
+
*
|
|
3461
|
+
* Phase 10.04: Manages Merkle trees for each partition to enable:
|
|
3462
|
+
* - Efficient delta sync between nodes
|
|
3463
|
+
* - Anti-entropy repair detection
|
|
3464
|
+
* - Incremental updates on writes
|
|
3465
|
+
*
|
|
3466
|
+
* Each partition maintains its own Merkle tree for independent
|
|
3467
|
+
* consistency checking and repair.
|
|
3468
|
+
*/
|
|
3469
|
+
|
|
3470
|
+
interface MerkleTreeManagerConfig {
|
|
3471
|
+
/** Tree depth for Merkle trees. Default: 3 */
|
|
3472
|
+
treeDepth: number;
|
|
3473
|
+
/** Enable automatic tree updates on write. Default: true */
|
|
3474
|
+
autoUpdate: boolean;
|
|
3475
|
+
/** Lazy initialization of trees. Default: true */
|
|
3476
|
+
lazyInit: boolean;
|
|
3477
|
+
}
|
|
3478
|
+
declare const DEFAULT_MERKLE_TREE_CONFIG: MerkleTreeManagerConfig;
|
|
3479
|
+
interface MerkleComparisonResult {
|
|
3480
|
+
partitionId: number;
|
|
3481
|
+
localRoot: number;
|
|
3482
|
+
remoteRoot: number;
|
|
3483
|
+
needsSync: boolean;
|
|
3484
|
+
differingBuckets: string[];
|
|
3485
|
+
}
|
|
3486
|
+
interface PartitionMerkleInfo {
|
|
3487
|
+
partitionId: number;
|
|
3488
|
+
rootHash: number;
|
|
3489
|
+
keyCount: number;
|
|
3490
|
+
lastUpdated: number;
|
|
3491
|
+
}
|
|
3492
|
+
declare class MerkleTreeManager extends EventEmitter {
|
|
3493
|
+
private config;
|
|
3494
|
+
private trees;
|
|
3495
|
+
private keyCounts;
|
|
3496
|
+
private lastUpdated;
|
|
3497
|
+
private nodeId;
|
|
3498
|
+
constructor(nodeId: string, config?: Partial<MerkleTreeManagerConfig>);
|
|
3499
|
+
/**
|
|
3500
|
+
* Get or create a Merkle tree for a partition
|
|
3501
|
+
*/
|
|
3502
|
+
getTree(partitionId: number): MerkleTree;
|
|
3503
|
+
/**
|
|
3504
|
+
* Build tree for a partition from existing data
|
|
3505
|
+
*/
|
|
3506
|
+
buildTree(partitionId: number, records: Map<string, LWWRecord<any>>): void;
|
|
3507
|
+
/**
|
|
3508
|
+
* Incrementally update tree when a record changes
|
|
3509
|
+
*/
|
|
3510
|
+
updateRecord(partitionId: number, key: string, record: LWWRecord<any>): void;
|
|
3511
|
+
/**
|
|
3512
|
+
* Remove a key from the tree (e.g., after GC)
|
|
3513
|
+
*/
|
|
3514
|
+
removeRecord(partitionId: number, key: string): void;
|
|
3515
|
+
/**
|
|
3516
|
+
* Get the path prefix for a key in the Merkle tree
|
|
3517
|
+
*/
|
|
3518
|
+
private getKeyPath;
|
|
3519
|
+
/**
|
|
3520
|
+
* Get root hash for a partition
|
|
3521
|
+
*/
|
|
3522
|
+
getRootHash(partitionId: number): number;
|
|
3523
|
+
/**
|
|
3524
|
+
* Compare local tree with remote root hash
|
|
3525
|
+
*/
|
|
3526
|
+
compareWithRemote(partitionId: number, remoteRoot: number): MerkleComparisonResult;
|
|
3527
|
+
/**
|
|
3528
|
+
* Find buckets that differ between local and remote tree
|
|
3529
|
+
* Note: This is a simplified version - full implementation would
|
|
3530
|
+
* need to exchange bucket hashes with the remote node
|
|
3531
|
+
*/
|
|
3532
|
+
private findDifferingBuckets;
|
|
3533
|
+
/**
|
|
3534
|
+
* Recursively collect all leaf bucket paths
|
|
3535
|
+
*/
|
|
3536
|
+
private collectLeafBuckets;
|
|
3537
|
+
/**
|
|
3538
|
+
* Get bucket hashes for a partition at a given path
|
|
3539
|
+
*/
|
|
3540
|
+
getBuckets(partitionId: number, path: string): Record<string, number>;
|
|
3541
|
+
/**
|
|
3542
|
+
* Get keys in a specific bucket
|
|
3543
|
+
*/
|
|
3544
|
+
getKeysInBucket(partitionId: number, path: string): string[];
|
|
3545
|
+
/**
|
|
3546
|
+
* Get all keys across all buckets for a partition
|
|
3547
|
+
*/
|
|
3548
|
+
getAllKeys(partitionId: number): string[];
|
|
3549
|
+
/**
|
|
3550
|
+
* Recursively collect all keys from the tree
|
|
3551
|
+
*/
|
|
3552
|
+
private collectAllKeys;
|
|
3553
|
+
/**
|
|
3554
|
+
* Get info about all managed partitions
|
|
3555
|
+
*/
|
|
3556
|
+
getPartitionInfos(): PartitionMerkleInfo[];
|
|
3557
|
+
/**
|
|
3558
|
+
* Get info for a specific partition
|
|
3559
|
+
*/
|
|
3560
|
+
getPartitionInfo(partitionId: number): PartitionMerkleInfo | null;
|
|
3561
|
+
/**
|
|
3562
|
+
* Clear tree for a partition (e.g., after migration)
|
|
3563
|
+
*/
|
|
3564
|
+
clearPartition(partitionId: number): void;
|
|
3565
|
+
/**
|
|
3566
|
+
* Clear all trees
|
|
3567
|
+
*/
|
|
3568
|
+
clearAll(): void;
|
|
3569
|
+
/**
|
|
3570
|
+
* Get metrics for monitoring
|
|
3571
|
+
*/
|
|
3572
|
+
getMetrics(): {
|
|
3573
|
+
totalPartitions: number;
|
|
3574
|
+
totalKeys: number;
|
|
3575
|
+
averageKeysPerPartition: number;
|
|
3576
|
+
};
|
|
3577
|
+
/**
|
|
3578
|
+
* Serialize tree state for network transfer
|
|
3579
|
+
*/
|
|
3580
|
+
serializeTree(partitionId: number): {
|
|
3581
|
+
rootHash: number;
|
|
3582
|
+
buckets: Record<string, Record<string, number>>;
|
|
3583
|
+
} | null;
|
|
3584
|
+
private collectBucketsAtDepth;
|
|
3585
|
+
}
|
|
3586
|
+
|
|
3587
|
+
/**
|
|
3588
|
+
* RepairScheduler - Anti-Entropy Repair System
|
|
3589
|
+
*
|
|
3590
|
+
* Phase 10.04: Proactively detects and repairs data inconsistencies:
|
|
3591
|
+
* - Periodic scanning of partitions
|
|
3592
|
+
* - Merkle tree-based difference detection
|
|
3593
|
+
* - LWW conflict resolution
|
|
3594
|
+
* - Throttled repair execution
|
|
3595
|
+
*
|
|
3596
|
+
* Based on Cassandra/Dynamo anti-entropy patterns.
|
|
3597
|
+
*/
|
|
3598
|
+
|
|
3599
|
+
interface RepairConfig {
|
|
3600
|
+
/** Enable anti-entropy repair. Default: true */
|
|
3601
|
+
enabled: boolean;
|
|
3602
|
+
/** Interval between full scans in ms. Default: 3600000 (1 hour) */
|
|
3603
|
+
scanIntervalMs: number;
|
|
3604
|
+
/** Keys per repair batch. Default: 1000 */
|
|
3605
|
+
repairBatchSize: number;
|
|
3606
|
+
/** Maximum concurrent partition repairs. Default: 2 */
|
|
3607
|
+
maxConcurrentRepairs: number;
|
|
3608
|
+
/** Delay between batches in ms. Default: 100 */
|
|
3609
|
+
throttleMs: number;
|
|
3610
|
+
/** Prioritize recently modified partitions. Default: true */
|
|
3611
|
+
prioritizeRecent: boolean;
|
|
3612
|
+
/** Timeout for network requests in ms. Default: 5000 */
|
|
3613
|
+
requestTimeoutMs: number;
|
|
3614
|
+
}
|
|
3615
|
+
declare const DEFAULT_REPAIR_CONFIG: RepairConfig;
|
|
3616
|
+
interface RepairTask {
|
|
3617
|
+
partitionId: number;
|
|
3618
|
+
replicaNodeId: string;
|
|
3619
|
+
priority: 'high' | 'normal' | 'low';
|
|
3620
|
+
scheduledAt: number;
|
|
3621
|
+
}
|
|
3622
|
+
interface RepairResult {
|
|
3623
|
+
partitionId: number;
|
|
3624
|
+
replicaNodeId: string;
|
|
3625
|
+
keysScanned: number;
|
|
3626
|
+
keysRepaired: number;
|
|
3627
|
+
durationMs: number;
|
|
3628
|
+
success: boolean;
|
|
3629
|
+
error?: string;
|
|
3630
|
+
}
|
|
3631
|
+
interface RepairMetrics {
|
|
3632
|
+
scansCompleted: number;
|
|
3633
|
+
repairsExecuted: number;
|
|
3634
|
+
keysRepaired: number;
|
|
3635
|
+
errorsEncountered: number;
|
|
3636
|
+
lastScanTime?: number;
|
|
3637
|
+
averageRepairDurationMs: number;
|
|
3638
|
+
}
|
|
3639
|
+
type RecordGetter = (key: string) => LWWRecord<any> | undefined;
|
|
3640
|
+
type RecordSetter = (key: string, record: LWWRecord<any>) => void;
|
|
3641
|
+
declare class RepairScheduler extends EventEmitter {
|
|
3642
|
+
private config;
|
|
3643
|
+
private merkleManager;
|
|
3644
|
+
private clusterManager;
|
|
3645
|
+
private partitionService;
|
|
3646
|
+
private nodeId;
|
|
3647
|
+
private repairQueue;
|
|
3648
|
+
private activeRepairs;
|
|
3649
|
+
private scanTimer?;
|
|
3650
|
+
private processTimer?;
|
|
3651
|
+
private started;
|
|
3652
|
+
private pendingRequests;
|
|
3653
|
+
private metrics;
|
|
3654
|
+
private getRecord?;
|
|
3655
|
+
private setRecord?;
|
|
3656
|
+
constructor(merkleManager: MerkleTreeManager, clusterManager: ClusterManager, partitionService: PartitionService, nodeId: string, config?: Partial<RepairConfig>);
|
|
3657
|
+
/**
|
|
3658
|
+
* Set data access callbacks
|
|
3659
|
+
*/
|
|
3660
|
+
setDataAccessors(getRecord: RecordGetter, setRecord: RecordSetter): void;
|
|
3661
|
+
/**
|
|
3662
|
+
* Setup network message handlers
|
|
3663
|
+
*/
|
|
3664
|
+
private setupNetworkHandlers;
|
|
3665
|
+
/**
|
|
3666
|
+
* Handle incoming cluster messages
|
|
3667
|
+
*/
|
|
3668
|
+
private handleClusterMessage;
|
|
3669
|
+
private handleMerkleRootReq;
|
|
3670
|
+
private handleMerkleBucketsReq;
|
|
3671
|
+
private handleMerkleKeysReq;
|
|
3672
|
+
private handleRepairDataReq;
|
|
3673
|
+
private handleResponse;
|
|
3674
|
+
/**
|
|
3675
|
+
* Start the repair scheduler
|
|
3676
|
+
*/
|
|
3677
|
+
start(): void;
|
|
3678
|
+
/**
|
|
3679
|
+
* Stop the repair scheduler
|
|
3680
|
+
*/
|
|
3681
|
+
stop(): void;
|
|
3682
|
+
/**
|
|
3683
|
+
* Schedule a full scan of all owned partitions
|
|
3684
|
+
*/
|
|
3685
|
+
scheduleFullScan(): void;
|
|
3686
|
+
/**
|
|
3687
|
+
* Schedule repair for a specific partition
|
|
3688
|
+
*/
|
|
3689
|
+
schedulePartitionRepair(partitionId: number, priority?: 'high' | 'normal' | 'low'): void;
|
|
3690
|
+
/**
|
|
3691
|
+
* Sort repair queue by priority
|
|
3692
|
+
*/
|
|
3693
|
+
private sortRepairQueue;
|
|
3694
|
+
/**
|
|
3695
|
+
* Process the repair queue
|
|
3696
|
+
*/
|
|
3697
|
+
private processRepairQueue;
|
|
3698
|
+
/**
|
|
3699
|
+
* Execute repair for a partition-replica pair
|
|
3700
|
+
*/
|
|
3701
|
+
private executeRepair;
|
|
3702
|
+
/**
|
|
3703
|
+
* Send a request and wait for response
|
|
3704
|
+
*/
|
|
3705
|
+
private sendRequest;
|
|
3706
|
+
/**
|
|
3707
|
+
* Request Merkle root from remote node
|
|
3708
|
+
*/
|
|
3709
|
+
private requestRemoteMerkleRoot;
|
|
3710
|
+
/**
|
|
3711
|
+
* Find keys that differ between local and remote using bucket exchange
|
|
3712
|
+
*/
|
|
3713
|
+
private findDifferences;
|
|
3714
|
+
/**
|
|
3715
|
+
* Repair a single key
|
|
3716
|
+
*/
|
|
3717
|
+
private repairKey;
|
|
3718
|
+
/**
|
|
3719
|
+
* Resolve conflict between two records using LWW
|
|
3720
|
+
*/
|
|
3721
|
+
resolveConflict<T>(a: LWWRecord<T> | undefined, b: LWWRecord<T> | undefined): LWWRecord<T> | null;
|
|
3722
|
+
/**
|
|
3723
|
+
* Compare two timestamps
|
|
3724
|
+
*/
|
|
3725
|
+
private compareTimestamps;
|
|
3726
|
+
/**
|
|
3727
|
+
* Get partitions owned by this node
|
|
3728
|
+
*/
|
|
3729
|
+
private getOwnedPartitions;
|
|
3730
|
+
/**
|
|
3731
|
+
* Get partitions where this node is a backup
|
|
3732
|
+
*/
|
|
3733
|
+
private getReplicaPartitions;
|
|
3734
|
+
/**
|
|
3735
|
+
* Update average repair duration
|
|
3736
|
+
*/
|
|
3737
|
+
private updateAverageRepairDuration;
|
|
3738
|
+
/**
|
|
3739
|
+
* Get repair metrics
|
|
3740
|
+
*/
|
|
3741
|
+
getMetrics(): RepairMetrics;
|
|
3742
|
+
/**
|
|
3743
|
+
* Get repair queue status
|
|
3744
|
+
*/
|
|
3745
|
+
getQueueStatus(): {
|
|
3746
|
+
queueLength: number;
|
|
3747
|
+
activeRepairs: number;
|
|
3748
|
+
maxConcurrent: number;
|
|
3749
|
+
};
|
|
3750
|
+
/**
|
|
3751
|
+
* Force immediate repair for a partition
|
|
3752
|
+
*/
|
|
3753
|
+
forceRepair(partitionId: number): void;
|
|
3754
|
+
/**
|
|
3755
|
+
* Sleep utility
|
|
3756
|
+
*/
|
|
3757
|
+
private sleep;
|
|
3758
|
+
}
|
|
3759
|
+
|
|
3190
3760
|
/**
|
|
3191
3761
|
* Configuration for the processor sandbox.
|
|
3192
3762
|
*/
|
|
@@ -3872,4 +4442,4 @@ declare class MapFactory {
|
|
|
3872
4442
|
getConfig(): ServerIndexConfig;
|
|
3873
4443
|
}
|
|
3874
4444
|
|
|
3875
|
-
export { BufferPool, type BufferPoolConfig, type BufferPoolStats, type ClusterConfig, ClusterCoordinator, type ClusterCoordinatorConfig, type ClusterCoordinatorEvents, ClusterManager, type ClusterMember, type ClusterMessage, type CoalescingPreset, type CoalescingWriterMetrics, type CoalescingWriterOptions, ConflictResolverHandler, type ConflictResolverHandlerConfig, ConflictResolverService, type ConflictResolverServiceConfig, type ConnectionContext, ConnectionRateLimiter, DEFAULT_CLUSTER_COORDINATOR_CONFIG, DEFAULT_CONFLICT_RESOLVER_CONFIG, DEFAULT_INDEX_CONFIG, DEFAULT_JOURNAL_SERVICE_CONFIG, DEFAULT_LAG_TRACKER_CONFIG, DEFAULT_SANDBOX_CONFIG, EntryProcessorHandler, type EntryProcessorHandlerConfig, EventJournalService, type EventJournalServiceConfig, type ExportOptions, FilterTasklet, ForEachTasklet, type IInterceptor, type IServerStorage, type IndexDefinition, IteratorTasklet, type IteratorTaskletConfig, type LagInfo, LagTracker, type LagTrackerConfig, LockManager, type Logger, MapFactory, type MapIndexConfig, MapTasklet, MapWithResolver, type MapWithResolverConfig, MemoryServerAdapter, type MergeWithResolverResult, MigrationManager, type NativeModuleStatus, type NativeStats, type ORMapTombstones, type ORMapValue, ObjectPool, type ObjectPoolConfig, type ObjectPoolStats, type OpContext, type PartitionDistribution, PartitionService, type PartitionServiceConfig, type PartitionServiceEvents, type PooledEventPayload, type PooledMessage, type PooledRecord, type PooledTimestamp, PostgresAdapter, type PostgresAdapterOptions, ProcessorSandbox, type ProcessorSandboxConfig, type ProgressState, RateLimitInterceptor, type RateLimiterConfig, type RateLimiterStats, ReduceTasklet, ReplicationPipeline, SecurityManager, ServerCoordinator, type ServerCoordinatorConfig, type ServerIndexConfig, type ServerOp, type SetWithResolverResult, type StorageValue, type Tasklet, TaskletScheduler, type TaskletSchedulerConfig, type TaskletSchedulerStats, TimestampInterceptor, coalescingPresets, createEventPayloadPool, createMessagePool, createRecordPool, createTimestampPool, getCoalescingPreset, getGlobalBufferPool, getGlobalEventPayloadPool, getGlobalMessagePool, getGlobalRecordPool, getGlobalTimestampPool, getNativeModuleStatus, getNativeStats, logNativeStatus, logger, mergeWithDefaults, setGlobalBufferPool, setGlobalEventPayloadPool, setGlobalMessagePool, setGlobalRecordPool, setGlobalTimestampPool, validateIndexConfig };
|
|
4445
|
+
export { BufferPool, type BufferPoolConfig, type BufferPoolStats, type ClusterConfig, ClusterCoordinator, type ClusterCoordinatorConfig, type ClusterCoordinatorEvents, ClusterManager, type ClusterMember, type ClusterMessage, type CoalescingPreset, type CoalescingWriterMetrics, type CoalescingWriterOptions, ConflictResolverHandler, type ConflictResolverHandlerConfig, ConflictResolverService, type ConflictResolverServiceConfig, type ConnectionContext, ConnectionRateLimiter, DEFAULT_CLUSTER_COORDINATOR_CONFIG, DEFAULT_CONFLICT_RESOLVER_CONFIG, DEFAULT_FAILURE_DETECTOR_CONFIG, DEFAULT_INDEX_CONFIG, DEFAULT_JOURNAL_SERVICE_CONFIG, DEFAULT_LAG_TRACKER_CONFIG, DEFAULT_MERKLE_TREE_CONFIG, DEFAULT_READ_REPLICA_CONFIG, DEFAULT_REASSIGNER_CONFIG, DEFAULT_REPAIR_CONFIG, DEFAULT_SANDBOX_CONFIG, EntryProcessorHandler, type EntryProcessorHandlerConfig, EventJournalService, type EventJournalServiceConfig, type ExportOptions, type FailoverStatus, FailureDetector, type FailureDetectorConfig, type FailureDetectorEvents, FilterTasklet, ForEachTasklet, type IInterceptor, type IServerStorage, type IndexDefinition, IteratorTasklet, type IteratorTaskletConfig, type LagInfo, LagTracker, type LagTrackerConfig, LockManager, type Logger, MapFactory, type MapIndexConfig, MapTasklet, MapWithResolver, type MapWithResolverConfig, MemoryServerAdapter, type MergeWithResolverResult, type MerkleComparisonResult, MerkleTreeManager, type MerkleTreeManagerConfig, MigrationManager, type NativeModuleStatus, type NativeStats, type NodeState, type ORMapTombstones, type ORMapValue, ObjectPool, type ObjectPoolConfig, type ObjectPoolStats, type OpContext, type PartitionDistribution, type PartitionMerkleInfo, PartitionReassigner, type PartitionReassignerConfig, PartitionService, type PartitionServiceConfig, type PartitionServiceEvents, type PooledEventPayload, type PooledMessage, type PooledRecord, type PooledTimestamp, PostgresAdapter, type PostgresAdapterOptions, ProcessorSandbox, type ProcessorSandboxConfig, type ProgressState, RateLimitInterceptor, type RateLimiterConfig, type RateLimiterStats, type ReadReplicaConfig, ReadReplicaHandler, type ReadRequest, type ReadResult, type ReassignmentEvent, ReduceTasklet, type RepairConfig, type RepairMetrics, type RepairResult, RepairScheduler, type RepairTask, ReplicationPipeline, SecurityManager, ServerCoordinator, type ServerCoordinatorConfig, type ServerIndexConfig, type ServerOp, type SetWithResolverResult, type StorageValue, type Tasklet, TaskletScheduler, type TaskletSchedulerConfig, type TaskletSchedulerStats, TimestampInterceptor, coalescingPresets, createEventPayloadPool, createMessagePool, createRecordPool, createTimestampPool, getCoalescingPreset, getGlobalBufferPool, getGlobalEventPayloadPool, getGlobalMessagePool, getGlobalRecordPool, getGlobalTimestampPool, getNativeModuleStatus, getNativeStats, logNativeStatus, logger, mergeWithDefaults, setGlobalBufferPool, setGlobalEventPayloadPool, setGlobalMessagePool, setGlobalRecordPool, setGlobalTimestampPool, validateIndexConfig };
|