@topgunbuild/server 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,7 +1,8 @@
1
- import { Timestamp, LWWRecord, ORMapRecord, Principal, PermissionPolicy, LWWMap, ORMap, PermissionType } from '@topgunbuild/core';
1
+ import { Timestamp, LWWRecord, ORMapRecord, Principal, PermissionPolicy, ConsistencyLevel, ReplicationConfig, LWWMap, ORMap, PermissionType, MigrationConfig, MigrationStatus, MigrationMetrics, PartitionMap, PartitionInfo, PartitionChange, ReplicationLag, ReplicationHealth, ReplicationResult } from '@topgunbuild/core';
2
2
  import { WebSocket } from 'ws';
3
3
  import { PoolConfig, Pool } from 'pg';
4
4
  import pino from 'pino';
5
+ import { EventEmitter } from 'events';
5
6
 
6
7
  /**
7
8
  * TaskletScheduler — Cooperative multitasking for long-running operations.
@@ -1601,16 +1602,30 @@ interface CoalescingWriterMetrics {
1601
1602
  * - Larger batch size = higher throughput, higher latency
1602
1603
  * - Longer delay = more messages per batch, higher latency
1603
1604
  * - Larger maxBatchBytes = handles larger payloads, more memory
1605
+ *
1606
+ * NOTE: A/B testing (Dec 2024) showed maxDelayMs is the primary bottleneck:
1607
+ * - 10ms delay: ~10K ops/sec, p50=11ms
1608
+ * - 1ms delay: ~18K ops/sec, p50=8ms (+80% throughput)
1609
+ * - 0ms (disabled): ~18K ops/sec, p50=2ms (best latency)
1604
1610
  */
1605
1611
  declare const coalescingPresets: {
1606
1612
  /**
1607
- * Conservative defaults - good for low-latency workloads.
1608
- * Minimizes batching delay at the cost of more network calls.
1609
- * Use for: gaming, real-time chat, interactive applications.
1613
+ * Low latency - optimized for minimal response time.
1614
+ * Best for: gaming, real-time chat, interactive applications.
1615
+ * Benchmark: p50=2ms, ~18K ops/sec
1616
+ */
1617
+ readonly lowLatency: {
1618
+ readonly maxBatchSize: 100;
1619
+ readonly maxDelayMs: 1;
1620
+ readonly maxBatchBytes: 65536;
1621
+ };
1622
+ /**
1623
+ * Conservative - good balance of latency and batching.
1624
+ * Use for: general purpose with latency sensitivity.
1610
1625
  */
1611
1626
  readonly conservative: {
1612
1627
  readonly maxBatchSize: 100;
1613
- readonly maxDelayMs: 5;
1628
+ readonly maxDelayMs: 2;
1614
1629
  readonly maxBatchBytes: 65536;
1615
1630
  };
1616
1631
  /**
@@ -1620,17 +1635,18 @@ declare const coalescingPresets: {
1620
1635
  */
1621
1636
  readonly balanced: {
1622
1637
  readonly maxBatchSize: 300;
1623
- readonly maxDelayMs: 8;
1638
+ readonly maxDelayMs: 2;
1624
1639
  readonly maxBatchBytes: 131072;
1625
1640
  };
1626
1641
  /**
1627
1642
  * High throughput - optimized for write-heavy workloads.
1628
1643
  * Higher batching for better network utilization.
1629
1644
  * Use for: data ingestion, logging, IoT data streams.
1645
+ * Benchmark: p50=7ms, ~18K ops/sec
1630
1646
  */
1631
1647
  readonly highThroughput: {
1632
1648
  readonly maxBatchSize: 500;
1633
- readonly maxDelayMs: 10;
1649
+ readonly maxDelayMs: 2;
1634
1650
  readonly maxBatchBytes: 262144;
1635
1651
  };
1636
1652
  /**
@@ -1640,7 +1656,7 @@ declare const coalescingPresets: {
1640
1656
  */
1641
1657
  readonly aggressive: {
1642
1658
  readonly maxBatchSize: 1000;
1643
- readonly maxDelayMs: 15;
1659
+ readonly maxDelayMs: 5;
1644
1660
  readonly maxBatchBytes: 524288;
1645
1661
  };
1646
1662
  };
@@ -1721,6 +1737,12 @@ interface ServerCoordinatorConfig {
1721
1737
  workerPoolConfig?: Partial<WorkerPoolConfig>;
1722
1738
  /** Default timeout for Write Concern acknowledgments in ms (default: 5000) */
1723
1739
  writeAckTimeout?: number;
1740
+ /** Enable replication to backup nodes (default: true when cluster has peers) */
1741
+ replicationEnabled?: boolean;
1742
+ /** Default consistency level for replication (default: EVENTUAL) */
1743
+ defaultConsistency?: ConsistencyLevel;
1744
+ /** Replication configuration */
1745
+ replicationConfig?: Partial<ReplicationConfig>;
1724
1746
  }
1725
1747
  declare class ServerCoordinator {
1726
1748
  private httpServer;
@@ -1736,6 +1758,7 @@ declare class ServerCoordinator {
1736
1758
  private queryRegistry;
1737
1759
  private cluster;
1738
1760
  private partitionService;
1761
+ private replicationPipeline?;
1739
1762
  private lockManager;
1740
1763
  private topicManager;
1741
1764
  private securityManager;
@@ -1803,6 +1826,11 @@ declare class ServerCoordinator {
1803
1826
  private handleConnection;
1804
1827
  private handleMessage;
1805
1828
  private updateClientHlc;
1829
+ /**
1830
+ * Broadcast partition map to all connected and authenticated clients.
1831
+ * Called when partition topology changes (node join/leave/failover).
1832
+ */
1833
+ private broadcastPartitionMap;
1806
1834
  private broadcast;
1807
1835
  /**
1808
1836
  * === OPTIMIZATION 2 & 3: Batched Broadcast with Serialization Caching ===
@@ -1836,6 +1864,11 @@ declare class ServerCoordinator {
1836
1864
  * Broadcast event to cluster members (excluding self).
1837
1865
  */
1838
1866
  private broadcastToCluster;
1867
+ /**
1868
+ * Apply replicated operation from another node (callback for ReplicationPipeline)
1869
+ * This is called when we receive a replicated operation as a backup node
1870
+ */
1871
+ private applyReplicatedOperation;
1839
1872
  /**
1840
1873
  * Build OpContext for interceptors.
1841
1874
  */
@@ -2147,4 +2180,872 @@ declare function getNativeStats(sharedMemoryManager?: SharedMemoryManager): Nati
2147
2180
  */
2148
2181
  declare function logNativeStatus(): void;
2149
2182
 
2150
- export { BufferPool, type BufferPoolConfig, type BufferPoolStats, type CoalescingPreset, type CoalescingWriterMetrics, type CoalescingWriterOptions, type ConnectionContext, ConnectionRateLimiter, FilterTasklet, ForEachTasklet, type IInterceptor, type IServerStorage, IteratorTasklet, type IteratorTaskletConfig, type Logger, MapTasklet, MemoryServerAdapter, type NativeModuleStatus, type NativeStats, type ORMapTombstones, type ORMapValue, ObjectPool, type ObjectPoolConfig, type ObjectPoolStats, type OpContext, type PooledEventPayload, type PooledMessage, type PooledRecord, type PooledTimestamp, PostgresAdapter, type PostgresAdapterOptions, type ProgressState, RateLimitInterceptor, type RateLimiterConfig, type RateLimiterStats, ReduceTasklet, SecurityManager, ServerCoordinator, type ServerCoordinatorConfig, type ServerOp, type StorageValue, type Tasklet, TaskletScheduler, type TaskletSchedulerConfig, type TaskletSchedulerStats, TimestampInterceptor, coalescingPresets, createEventPayloadPool, createMessagePool, createRecordPool, createTimestampPool, getCoalescingPreset, getGlobalBufferPool, getGlobalEventPayloadPool, getGlobalMessagePool, getGlobalRecordPool, getGlobalTimestampPool, getNativeModuleStatus, getNativeStats, logNativeStatus, logger, setGlobalBufferPool, setGlobalEventPayloadPool, setGlobalMessagePool, setGlobalRecordPool, setGlobalTimestampPool };
2183
+ /**
2184
+ * FailureDetector - Phi Accrual Failure Detector
2185
+ *
2186
+ * Implements the Phi Accrual Failure Detection algorithm for distributed systems.
2187
+ * Based on the paper: "The φ Accrual Failure Detector" by Hayashibara et al.
2188
+ *
2189
+ * The detector provides a suspicion level (phi) rather than binary alive/dead status,
2190
+ * allowing the application to make decisions based on configurable thresholds.
2191
+ *
2192
+ * Hazelcast equivalent: com.hazelcast.internal.cluster.fd.PhiAccrualFailureDetector
2193
+ */
2194
+
2195
+ interface FailureDetectorConfig {
2196
+ /** Interval between heartbeat checks (ms). Default: 1000 */
2197
+ heartbeatIntervalMs: number;
2198
+ /** Time after which a node is suspected if no heartbeat received (ms). Default: 5000 */
2199
+ suspicionTimeoutMs: number;
2200
+ /** Time after suspicion before confirming failure (ms). Default: 10000 */
2201
+ confirmationTimeoutMs: number;
2202
+ /** Phi threshold above which a node is considered suspected. Default: 8 */
2203
+ phiThreshold: number;
2204
+ /** Minimum samples required for accurate phi calculation. Default: 10 */
2205
+ minSamples: number;
2206
+ /** Maximum samples to keep in history. Default: 100 */
2207
+ maxSamples: number;
2208
+ /** Initial heartbeat interval estimate (ms). Default: 1000 */
2209
+ initialHeartbeatIntervalMs: number;
2210
+ }
2211
+ declare class FailureDetector extends EventEmitter {
2212
+ private config;
2213
+ private nodeStates;
2214
+ private monitoringNodes;
2215
+ private checkTimer?;
2216
+ private confirmationTimers;
2217
+ private started;
2218
+ constructor(config?: Partial<FailureDetectorConfig>);
2219
+ /**
2220
+ * Start the failure detector monitoring loop.
2221
+ */
2222
+ start(): void;
2223
+ /**
2224
+ * Stop the failure detector and clean up.
2225
+ */
2226
+ stop(): void;
2227
+ /**
2228
+ * Start monitoring a node.
2229
+ */
2230
+ startMonitoring(nodeId: string): void;
2231
+ /**
2232
+ * Stop monitoring a node.
2233
+ */
2234
+ stopMonitoring(nodeId: string): void;
2235
+ /**
2236
+ * Record a heartbeat from a node.
2237
+ * This updates the node's state and clears any suspicion.
2238
+ */
2239
+ recordHeartbeat(nodeId: string): void;
2240
+ /**
2241
+ * Check all monitored nodes for failure.
2242
+ */
2243
+ private checkAllNodes;
2244
+ /**
2245
+ * Schedule failure confirmation after suspicion timeout.
2246
+ */
2247
+ private scheduleConfirmation;
2248
+ /**
2249
+ * Confirm node failure after confirmation timeout.
2250
+ */
2251
+ private confirmFailure;
2252
+ /**
2253
+ * Calculate the phi value for a node using the Phi Accrual algorithm.
2254
+ *
2255
+ * Phi = -log10(P_later(t_now - t_last))
2256
+ *
2257
+ * where P_later is the probability that a heartbeat will arrive later than expected.
2258
+ */
2259
+ calculatePhi(nodeId: string): number;
2260
+ /**
2261
+ * Calculate mean of an array of numbers.
2262
+ */
2263
+ private calculateMean;
2264
+ /**
2265
+ * Calculate variance of an array of numbers.
2266
+ */
2267
+ private calculateVariance;
2268
+ /**
2269
+ * Get list of currently suspected nodes.
2270
+ */
2271
+ getSuspectedNodes(): string[];
2272
+ /**
2273
+ * Get list of confirmed failed nodes.
2274
+ */
2275
+ getConfirmedFailedNodes(): string[];
2276
+ /**
2277
+ * Check if a specific node is suspected.
2278
+ */
2279
+ isSuspected(nodeId: string): boolean;
2280
+ /**
2281
+ * Check if a specific node's failure is confirmed.
2282
+ */
2283
+ isConfirmedFailed(nodeId: string): boolean;
2284
+ /**
2285
+ * Get the current phi value for a node.
2286
+ */
2287
+ getPhi(nodeId: string): number;
2288
+ /**
2289
+ * Get all monitored nodes.
2290
+ */
2291
+ getMonitoredNodes(): string[];
2292
+ /**
2293
+ * Get metrics for monitoring.
2294
+ */
2295
+ getMetrics(): {
2296
+ monitoredNodes: number;
2297
+ suspectedNodes: number;
2298
+ confirmedFailedNodes: number;
2299
+ };
2300
+ }
2301
+
2302
+ interface ClusterConfig {
2303
+ nodeId: string;
2304
+ host: string;
2305
+ port: number;
2306
+ peers: string[];
2307
+ discovery?: 'manual' | 'kubernetes';
2308
+ serviceName?: string;
2309
+ discoveryInterval?: number;
2310
+ tls?: ClusterTLSConfig;
2311
+ /** Heartbeat interval in milliseconds. Default: 1000 */
2312
+ heartbeatIntervalMs?: number;
2313
+ /** Failure detection configuration */
2314
+ failureDetection?: Partial<FailureDetectorConfig>;
2315
+ }
2316
+ interface ClusterMember {
2317
+ nodeId: string;
2318
+ host: string;
2319
+ port: number;
2320
+ socket: WebSocket;
2321
+ isSelf: boolean;
2322
+ }
2323
+ interface ClusterMessage {
2324
+ type: 'HELLO' | 'OP_FORWARD' | 'PARTITION_UPDATE' | 'HEARTBEAT' | 'CLUSTER_EVENT' | 'CLUSTER_QUERY_EXEC' | 'CLUSTER_QUERY_RESP' | 'CLUSTER_GC_REPORT' | 'CLUSTER_GC_COMMIT' | 'CLUSTER_LOCK_REQ' | 'CLUSTER_LOCK_RELEASE' | 'CLUSTER_LOCK_GRANTED' | 'CLUSTER_LOCK_RELEASED' | 'CLUSTER_CLIENT_DISCONNECTED' | 'CLUSTER_TOPIC_PUB';
2325
+ senderId: string;
2326
+ payload: any;
2327
+ }
2328
+ declare class ClusterManager extends EventEmitter {
2329
+ readonly config: ClusterConfig;
2330
+ private server?;
2331
+ private members;
2332
+ private pendingConnections;
2333
+ private reconnectIntervals;
2334
+ private discoveryTimer?;
2335
+ private heartbeatTimer?;
2336
+ private failureDetector;
2337
+ constructor(config: ClusterConfig);
2338
+ /**
2339
+ * Get the failure detector instance.
2340
+ */
2341
+ getFailureDetector(): FailureDetector;
2342
+ private _actualPort;
2343
+ /** Get the actual port the cluster is listening on */
2344
+ get port(): number;
2345
+ start(): Promise<number>;
2346
+ /** Called when server is ready - registers self and initiates peer connections */
2347
+ private onServerReady;
2348
+ stop(): void;
2349
+ /**
2350
+ * Start sending heartbeats to all peers.
2351
+ */
2352
+ private startHeartbeat;
2353
+ /**
2354
+ * Stop sending heartbeats.
2355
+ */
2356
+ private stopHeartbeat;
2357
+ /**
2358
+ * Send heartbeat to all connected peers.
2359
+ */
2360
+ private sendHeartbeatToAll;
2361
+ /**
2362
+ * Handle incoming heartbeat from a peer.
2363
+ */
2364
+ private handleHeartbeat;
2365
+ /**
2366
+ * Handle confirmed node failure.
2367
+ */
2368
+ private handleNodeFailure;
2369
+ private connectToPeers;
2370
+ private startDiscovery;
2371
+ private scheduleReconnect;
2372
+ private connectToPeerWithBackoff;
2373
+ private connectToPeer;
2374
+ private _connectToPeerInternal;
2375
+ private handleSocket;
2376
+ send(nodeId: string, type: ClusterMessage['type'], payload: any): void;
2377
+ sendToNode(nodeId: string, message: any): void;
2378
+ getMembers(): string[];
2379
+ isLocal(nodeId: string): boolean;
2380
+ private buildClusterTLSOptions;
2381
+ }
2382
+
2383
+ /**
2384
+ * MigrationManager - Manages gradual partition rebalancing
2385
+ *
2386
+ * Phase 4 Task 03: Parallel Partition Sync
2387
+ *
2388
+ * Features:
2389
+ * - Gradual rebalancing with configurable batch size
2390
+ * - State machine for migration lifecycle
2391
+ * - Backpressure via chunk acknowledgments
2392
+ * - Retry logic for failed migrations
2393
+ * - Metrics and observability
2394
+ */
2395
+
2396
+ declare class MigrationManager extends EventEmitter {
2397
+ private readonly config;
2398
+ private readonly clusterManager;
2399
+ private readonly partitionService;
2400
+ private activeMigrations;
2401
+ private migrationQueue;
2402
+ private incomingMigrations;
2403
+ private pendingChunkAcks;
2404
+ private pendingVerifications;
2405
+ private metrics;
2406
+ private batchTimer;
2407
+ private dataCollector;
2408
+ private dataStorer;
2409
+ constructor(clusterManager: ClusterManager, partitionService: PartitionService, config?: Partial<MigrationConfig>);
2410
+ /**
2411
+ * Set the data collector callback
2412
+ * Called to collect all records for a partition before migration
2413
+ */
2414
+ setDataCollector(collector: (partitionId: number) => Promise<Uint8Array[]>): void;
2415
+ /**
2416
+ * Set the data storer callback
2417
+ * Called to store received records after successful migration
2418
+ */
2419
+ setDataStorer(storer: (partitionId: number, data: Uint8Array[]) => Promise<void>): void;
2420
+ /**
2421
+ * Plan migration for topology change
2422
+ */
2423
+ planMigration(oldDistribution: Map<number, PartitionDistribution>, newDistribution: Map<number, PartitionDistribution>): void;
2424
+ /**
2425
+ * Start batch processing timer
2426
+ */
2427
+ private startBatchProcessing;
2428
+ /**
2429
+ * Stop batch processing
2430
+ */
2431
+ private stopBatchProcessing;
2432
+ /**
2433
+ * Start next batch of migrations
2434
+ */
2435
+ startNextBatch(): Promise<void>;
2436
+ /**
2437
+ * Start migration for a single partition
2438
+ */
2439
+ private startPartitionMigration;
2440
+ /**
2441
+ * Split records into chunks
2442
+ */
2443
+ private chunkify;
2444
+ /**
2445
+ * Calculate checksum for a chunk using native xxhash
2446
+ */
2447
+ private calculateChecksum;
2448
+ /**
2449
+ * Calculate checksum for all partition records using streaming xxhash
2450
+ */
2451
+ private calculatePartitionChecksum;
2452
+ /**
2453
+ * Wait for chunk acknowledgment
2454
+ */
2455
+ private waitForChunkAck;
2456
+ /**
2457
+ * Wait for migration verification
2458
+ */
2459
+ private waitForVerification;
2460
+ /**
2461
+ * Handle successful migration completion
2462
+ */
2463
+ private onMigrationComplete;
2464
+ /**
2465
+ * Handle migration failure
2466
+ */
2467
+ private onMigrationFailed;
2468
+ /**
2469
+ * Handle MIGRATION_START message
2470
+ */
2471
+ private handleMigrationStart;
2472
+ /**
2473
+ * Handle MIGRATION_CHUNK message
2474
+ */
2475
+ private handleMigrationChunk;
2476
+ /**
2477
+ * Handle MIGRATION_COMPLETE message
2478
+ */
2479
+ private handleMigrationComplete;
2480
+ /**
2481
+ * Handle MIGRATION_CHUNK_ACK message
2482
+ */
2483
+ private handleMigrationChunkAck;
2484
+ /**
2485
+ * Handle MIGRATION_VERIFY message
2486
+ */
2487
+ private handleMigrationVerify;
2488
+ /**
2489
+ * Reassemble chunks into continuous data
2490
+ */
2491
+ private reassemble;
2492
+ /**
2493
+ * Deserialize records from chunk data
2494
+ */
2495
+ private deserializeRecords;
2496
+ /**
2497
+ * Setup cluster message handlers
2498
+ */
2499
+ private setupMessageHandlers;
2500
+ /**
2501
+ * Check if a partition is currently migrating
2502
+ */
2503
+ isActive(partitionId: number): boolean;
2504
+ /**
2505
+ * Get migration status
2506
+ */
2507
+ getStatus(): MigrationStatus;
2508
+ /**
2509
+ * Get migration metrics
2510
+ */
2511
+ getMetrics(): MigrationMetrics;
2512
+ /**
2513
+ * Cancel all active and queued migrations
2514
+ */
2515
+ cancelAll(): Promise<void>;
2516
+ /**
2517
+ * Cleanup resources (sync version for backwards compatibility)
2518
+ */
2519
+ close(): void;
2520
+ /**
2521
+ * Async cleanup - waits for cancellation to complete
2522
+ */
2523
+ closeAsync(): Promise<void>;
2524
+ }
2525
+
2526
+ interface PartitionDistribution {
2527
+ owner: string;
2528
+ backups: string[];
2529
+ }
2530
+ interface PartitionServiceEvents {
2531
+ 'rebalanced': (map: PartitionMap, changes: PartitionChange[]) => void;
2532
+ 'partitionMoved': (info: {
2533
+ partitionId: number;
2534
+ previousOwner: string;
2535
+ newOwner: string;
2536
+ version: number;
2537
+ }) => void;
2538
+ }
2539
+ interface PartitionServiceConfig {
2540
+ /** Enable gradual rebalancing (default: false for backward compatibility) */
2541
+ gradualRebalancing: boolean;
2542
+ /** Migration configuration */
2543
+ migration: Partial<MigrationConfig>;
2544
+ }
2545
+ declare class PartitionService extends EventEmitter {
2546
+ private cluster;
2547
+ private partitions;
2548
+ private readonly PARTITION_COUNT;
2549
+ private readonly BACKUP_COUNT;
2550
+ private mapVersion;
2551
+ private lastRebalanceTime;
2552
+ private config;
2553
+ private migrationManager;
2554
+ constructor(cluster: ClusterManager, config?: Partial<PartitionServiceConfig>);
2555
+ /**
2556
+ * Handle membership change
2557
+ */
2558
+ private onMembershipChange;
2559
+ getPartitionId(key: string): number;
2560
+ getDistribution(key: string): PartitionDistribution;
2561
+ getOwner(key: string): string;
2562
+ isLocalOwner(key: string): boolean;
2563
+ isLocalBackup(key: string): boolean;
2564
+ isRelated(key: string): boolean;
2565
+ /**
2566
+ * Get current partition map version
2567
+ */
2568
+ getMapVersion(): number;
2569
+ /**
2570
+ * Generate full PartitionMap for client consumption
2571
+ */
2572
+ getPartitionMap(): PartitionMap;
2573
+ /**
2574
+ * Get partition info by ID
2575
+ */
2576
+ getPartitionInfo(partitionId: number): PartitionInfo | null;
2577
+ /**
2578
+ * Get owner node for a partition ID
2579
+ */
2580
+ getPartitionOwner(partitionId: number): string | null;
2581
+ private rebalance;
2582
+ /**
2583
+ * Perform gradual rebalancing using MigrationManager
2584
+ */
2585
+ private rebalanceGradual;
2586
+ /**
2587
+ * Set partition owner (called after migration completes)
2588
+ */
2589
+ setOwner(partitionId: number, nodeId: string): void;
2590
+ /**
2591
+ * Get backups for a partition
2592
+ */
2593
+ getBackups(partitionId: number): string[];
2594
+ /**
2595
+ * Get migration status
2596
+ */
2597
+ getMigrationStatus(): MigrationStatus | null;
2598
+ /**
2599
+ * Check if partition is currently migrating
2600
+ */
2601
+ isMigrating(partitionId: number): boolean;
2602
+ /**
2603
+ * Check if any partition is currently migrating
2604
+ */
2605
+ isRebalancing(): boolean;
2606
+ /**
2607
+ * Get MigrationManager for configuration
2608
+ */
2609
+ getMigrationManager(): MigrationManager | null;
2610
+ /**
2611
+ * Cancel all migrations
2612
+ */
2613
+ cancelMigrations(): Promise<void>;
2614
+ }
2615
+
2616
+ /**
2617
+ * LagTracker - Monitors replication lag across cluster nodes
2618
+ *
2619
+ * Phase 4 Task 04: Async Replication Pipeline
2620
+ *
2621
+ * Features:
2622
+ * - Tracks replication lag per node
2623
+ * - Maintains historical lag data for percentile calculations
2624
+ * - Identifies unhealthy and laggy nodes
2625
+ * - Provides health metrics for monitoring
2626
+ */
2627
+
2628
+ interface LagInfo {
2629
+ current: number;
2630
+ history: number[];
2631
+ lastUpdate: number;
2632
+ pendingOps: number;
2633
+ }
2634
+ interface LagTrackerConfig {
2635
+ /** Number of lag samples to keep in history (default: 100) */
2636
+ historySize: number;
2637
+ /** Threshold in ms for considering a node laggy (default: 5000) */
2638
+ laggyThresholdMs: number;
2639
+ /** Threshold in ms for considering a node unhealthy (default: 30000) */
2640
+ unhealthyThresholdMs: number;
2641
+ }
2642
+ declare const DEFAULT_LAG_TRACKER_CONFIG: LagTrackerConfig;
2643
+ declare class LagTracker {
2644
+ private readonly config;
2645
+ private lagByNode;
2646
+ constructor(config?: Partial<LagTrackerConfig>);
2647
+ /**
2648
+ * Update lag measurement for a node
2649
+ */
2650
+ update(nodeId: string, lagMs: number): void;
2651
+ /**
2652
+ * Record acknowledgment from a node (lag effectively becomes 0)
2653
+ */
2654
+ recordAck(nodeId: string): void;
2655
+ /**
2656
+ * Increment pending operations counter for a node
2657
+ */
2658
+ incrementPending(nodeId: string): void;
2659
+ /**
2660
+ * Get lag statistics for a specific node
2661
+ */
2662
+ getLag(nodeId: string): ReplicationLag;
2663
+ /**
2664
+ * Get pending operations count for a node
2665
+ */
2666
+ getPendingOps(nodeId: string): number;
2667
+ /**
2668
+ * Get overall replication health status
2669
+ */
2670
+ getHealth(): ReplicationHealth;
2671
+ /**
2672
+ * Get average lag across all tracked nodes
2673
+ */
2674
+ getAverageLag(): number;
2675
+ /**
2676
+ * Check if a specific node is considered healthy
2677
+ */
2678
+ isNodeHealthy(nodeId: string): boolean;
2679
+ /**
2680
+ * Check if a specific node is considered laggy
2681
+ */
2682
+ isNodeLaggy(nodeId: string): boolean;
2683
+ /**
2684
+ * Remove a node from tracking
2685
+ */
2686
+ removeNode(nodeId: string): void;
2687
+ /**
2688
+ * Get all tracked node IDs
2689
+ */
2690
+ getTrackedNodes(): string[];
2691
+ /**
2692
+ * Get raw lag info for a node (for advanced monitoring)
2693
+ */
2694
+ getRawLagInfo(nodeId: string): LagInfo | undefined;
2695
+ /**
2696
+ * Clear all tracking data
2697
+ */
2698
+ clear(): void;
2699
+ /**
2700
+ * Export metrics in Prometheus format
2701
+ */
2702
+ toPrometheusMetrics(): string;
2703
+ }
2704
+
2705
+ /**
2706
+ * ReplicationPipeline - Manages async replication with configurable consistency levels
2707
+ *
2708
+ * Phase 4 Task 04: Async Replication Pipeline
2709
+ *
2710
+ * Features:
2711
+ * - Three consistency levels: STRONG, QUORUM, EVENTUAL
2712
+ * - Async replication queue for high throughput
2713
+ * - Backpressure handling with queue limits
2714
+ * - Retry logic for failed replications
2715
+ * - Integration with LagTracker for monitoring
2716
+ * - Pluggable operation applier for storage integration
2717
+ */
2718
+
2719
+ /**
2720
+ * Callback to apply replicated operation to local storage
2721
+ * @param operation - The operation to apply
2722
+ * @param opId - Unique operation ID
2723
+ * @param sourceNode - Node that originated the operation
2724
+ * @returns Promise<boolean> - true if applied successfully
2725
+ */
2726
+ type OperationApplier = (operation: unknown, opId: string, sourceNode: string) => Promise<boolean>;
2727
+ declare class ReplicationPipeline extends EventEmitter {
2728
+ private readonly config;
2729
+ private readonly clusterManager;
2730
+ private readonly partitionService;
2731
+ private readonly lagTracker;
2732
+ private readonly nodeId;
2733
+ private replicationQueue;
2734
+ private pendingAcks;
2735
+ private queueProcessorTimer;
2736
+ private operationApplier;
2737
+ constructor(clusterManager: ClusterManager, partitionService: PartitionService, config?: Partial<ReplicationConfig>);
2738
+ /**
2739
+ * Set the operation applier callback
2740
+ * This is called when replicated operations are received from other nodes
2741
+ */
2742
+ setOperationApplier(applier: OperationApplier): void;
2743
+ /**
2744
+ * Replicate operation to backup nodes
2745
+ */
2746
+ replicate(operation: unknown, opId: string, key: string, options?: {
2747
+ consistency?: ConsistencyLevel;
2748
+ timeout?: number;
2749
+ }): Promise<ReplicationResult>;
2750
+ /**
2751
+ * STRONG: Wait for all replicas to acknowledge
2752
+ */
2753
+ private replicateStrong;
2754
+ /**
2755
+ * QUORUM: Wait for majority of replicas
2756
+ */
2757
+ private replicateQuorum;
2758
+ /**
2759
+ * EVENTUAL: Fire-and-forget with queue
2760
+ */
2761
+ private replicateEventual;
2762
+ /**
2763
+ * Add task to replication queue
2764
+ */
2765
+ private enqueue;
2766
+ /**
2767
+ * Start queue processor
2768
+ */
2769
+ private startQueueProcessor;
2770
+ /**
2771
+ * Stop queue processor
2772
+ */
2773
+ private stopQueueProcessor;
2774
+ /**
2775
+ * Process replication queue for a node
2776
+ */
2777
+ private processQueue;
2778
+ /**
2779
+ * Send replication message to a node
2780
+ */
2781
+ private sendReplication;
2782
+ /**
2783
+ * Setup cluster message handlers
2784
+ */
2785
+ private setupMessageHandlers;
2786
+ /**
2787
+ * Handle incoming replication request (on backup node)
2788
+ */
2789
+ private handleReplication;
2790
+ /**
2791
+ * Handle incoming batch replication (on backup node)
2792
+ */
2793
+ private handleReplicationBatch;
2794
+ /**
2795
+ * Handle replication acknowledgment (on owner node)
2796
+ */
2797
+ private handleReplicationAck;
2798
+ /**
2799
+ * Handle batch acknowledgment (on owner node)
2800
+ */
2801
+ private handleReplicationBatchAck;
2802
+ /**
2803
+ * Get replication lag for a specific node
2804
+ */
2805
+ getLag(nodeId: string): ReplicationLag;
2806
+ /**
2807
+ * Get overall replication health
2808
+ */
2809
+ getHealth(): ReplicationHealth;
2810
+ /**
2811
+ * Get queue size for a specific node
2812
+ */
2813
+ getQueueSize(nodeId: string): number;
2814
+ /**
2815
+ * Get total pending operations across all nodes
2816
+ */
2817
+ getTotalPending(): number;
2818
+ /**
2819
+ * Check if a node is considered synced (low lag)
2820
+ */
2821
+ isSynced(nodeId: string, maxLagMs?: number): boolean;
2822
+ /**
2823
+ * Get LagTracker for advanced monitoring
2824
+ */
2825
+ getLagTracker(): LagTracker;
2826
+ /**
2827
+ * Export metrics in Prometheus format
2828
+ */
2829
+ toPrometheusMetrics(): string;
2830
+ /**
2831
+ * Cleanup resources
2832
+ */
2833
+ close(): void;
2834
+ }
2835
+
2836
+ declare class LockManager extends EventEmitter {
2837
+ private locks;
2838
+ private checkInterval;
2839
+ private static readonly MIN_TTL;
2840
+ private static readonly MAX_TTL;
2841
+ constructor();
2842
+ stop(): void;
2843
+ acquire(name: string, clientId: string, requestId: string, ttl: number): {
2844
+ granted: boolean;
2845
+ fencingToken?: number;
2846
+ error?: string;
2847
+ };
2848
+ release(name: string, clientId: string, fencingToken: number): boolean;
2849
+ handleClientDisconnect(clientId: string): void;
2850
+ private grantLock;
2851
+ private processNext;
2852
+ private cleanupExpiredLocks;
2853
+ }
2854
+
2855
+ /**
2856
+ * ClusterCoordinator - Unified cluster integration layer
2857
+ *
2858
+ * Phase 4 Task 06: System Integration
2859
+ *
2860
+ * Coordinates all cluster components:
2861
+ * - ClusterManager: P2P WebSocket mesh
2862
+ * - PartitionService: Consistent hashing & routing
2863
+ * - MigrationManager: Gradual rebalancing
2864
+ * - ReplicationPipeline: Async replication with consistency levels
2865
+ * - LagTracker: Replication health monitoring
2866
+ */
2867
+
2868
+ interface ClusterCoordinatorConfig {
2869
+ /** Cluster node configuration */
2870
+ cluster: ClusterConfig;
2871
+ /** Enable gradual partition rebalancing (default: true) */
2872
+ gradualRebalancing: boolean;
2873
+ /** Migration configuration for gradual rebalancing */
2874
+ migration: Partial<MigrationConfig>;
2875
+ /** Replication configuration */
2876
+ replication: Partial<ReplicationConfig>;
2877
+ /** Enable async replication pipeline (default: true) */
2878
+ replicationEnabled: boolean;
2879
+ /** Data collector callback for migrations */
2880
+ dataCollector?: (partitionId: number) => Promise<Uint8Array[]>;
2881
+ /** Data storer callback for incoming migrations */
2882
+ dataStorer?: (partitionId: number, data: Uint8Array[]) => Promise<void>;
2883
+ }
2884
+ declare const DEFAULT_CLUSTER_COORDINATOR_CONFIG: Omit<ClusterCoordinatorConfig, 'cluster'>;
2885
+ interface ClusterCoordinatorEvents {
2886
+ 'started': () => void;
2887
+ 'stopped': () => void;
2888
+ 'member:joined': (nodeId: string) => void;
2889
+ 'member:left': (nodeId: string) => void;
2890
+ 'partition:rebalanced': (map: PartitionMap, changes: PartitionChange[]) => void;
2891
+ 'partition:moved': (info: {
2892
+ partitionId: number;
2893
+ previousOwner: string;
2894
+ newOwner: string;
2895
+ version: number;
2896
+ }) => void;
2897
+ 'migration:started': (partitionId: number, targetNode: string) => void;
2898
+ 'migration:completed': (partitionId: number) => void;
2899
+ 'migration:failed': (partitionId: number, error: Error) => void;
2900
+ 'replication:unhealthy': (nodeId: string) => void;
2901
+ 'replication:healthy': (nodeId: string) => void;
2902
+ 'error': (error: Error) => void;
2903
+ }
2904
+ declare class ClusterCoordinator extends EventEmitter {
2905
+ private readonly config;
2906
+ private clusterManager;
2907
+ private partitionService;
2908
+ private replicationPipeline;
2909
+ private lagTracker;
2910
+ private started;
2911
+ private actualPort;
2912
+ constructor(config: ClusterCoordinatorConfig);
2913
+ /**
2914
+ * Start the cluster coordinator
2915
+ */
2916
+ start(): Promise<number>;
2917
+ /**
2918
+ * Stop the cluster coordinator
2919
+ */
2920
+ stop(): Promise<void>;
2921
+ /**
2922
+ * Get local node ID
2923
+ */
2924
+ getNodeId(): string;
2925
+ /**
2926
+ * Get cluster port
2927
+ */
2928
+ getPort(): number;
2929
+ /**
2930
+ * Get all cluster members
2931
+ */
2932
+ getMembers(): string[];
2933
+ /**
2934
+ * Check if this is the local node
2935
+ */
2936
+ isLocal(nodeId: string): boolean;
2937
+ /**
2938
+ * Check if coordinator is started
2939
+ */
2940
+ isStarted(): boolean;
2941
+ /**
2942
+ * Get current partition map
2943
+ */
2944
+ getPartitionMap(): PartitionMap;
2945
+ /**
2946
+ * Get partition map version
2947
+ */
2948
+ getPartitionMapVersion(): number;
2949
+ /**
2950
+ * Get partition ID for a key
2951
+ */
2952
+ getPartitionId(key: string): number;
2953
+ /**
2954
+ * Get owner node for a key
2955
+ */
2956
+ getOwner(key: string): string;
2957
+ /**
2958
+ * Check if this node owns the key
2959
+ */
2960
+ isLocalOwner(key: string): boolean;
2961
+ /**
2962
+ * Check if this node is a backup for the key
2963
+ */
2964
+ isLocalBackup(key: string): boolean;
2965
+ /**
2966
+ * Get backup nodes for a partition
2967
+ */
2968
+ getBackups(partitionId: number): string[];
2969
+ /**
2970
+ * Check if partition is currently migrating
2971
+ */
2972
+ isMigrating(partitionId: number): boolean;
2973
+ /**
2974
+ * Check if any rebalancing is in progress
2975
+ */
2976
+ isRebalancing(): boolean;
2977
+ /**
2978
+ * Get migration status
2979
+ */
2980
+ getMigrationStatus(): MigrationStatus | null;
2981
+ /**
2982
+ * Get migration metrics
2983
+ */
2984
+ getMigrationMetrics(): MigrationMetrics | null;
2985
+ /**
2986
+ * Cancel all active migrations
2987
+ */
2988
+ cancelMigrations(): Promise<void>;
2989
+ /**
2990
+ * Set data collector for migrations
2991
+ */
2992
+ setDataCollector(collector: (partitionId: number) => Promise<Uint8Array[]>): void;
2993
+ /**
2994
+ * Set data storer for incoming migrations
2995
+ */
2996
+ setDataStorer(storer: (partitionId: number, data: Uint8Array[]) => Promise<void>): void;
2997
+ /**
2998
+ * Replicate an operation to backup nodes
2999
+ */
3000
+ replicate(operation: unknown, opId: string, key: string, options?: {
3001
+ consistency?: ConsistencyLevel;
3002
+ timeout?: number;
3003
+ }): Promise<ReplicationResult>;
3004
+ /**
3005
+ * Get replication health status
3006
+ */
3007
+ getReplicationHealth(): ReplicationHealth;
3008
+ /**
3009
+ * Get replication lag for a specific node
3010
+ */
3011
+ getReplicationLag(nodeId: string): ReplicationLag;
3012
+ /**
3013
+ * Check if a node is healthy for replication
3014
+ */
3015
+ isNodeHealthy(nodeId: string): boolean;
3016
+ /**
3017
+ * Check if a node is laggy
3018
+ */
3019
+ isNodeLaggy(nodeId: string): boolean;
3020
+ /**
3021
+ * Send message to a specific node
3022
+ */
3023
+ send(nodeId: string, message: unknown): void;
3024
+ /**
3025
+ * Broadcast message to all nodes
3026
+ */
3027
+ broadcast(message: unknown): void;
3028
+ /**
3029
+ * Get underlying ClusterManager
3030
+ */
3031
+ getClusterManager(): ClusterManager;
3032
+ /**
3033
+ * Get underlying PartitionService
3034
+ */
3035
+ getPartitionService(): PartitionService;
3036
+ /**
3037
+ * Get underlying ReplicationPipeline
3038
+ */
3039
+ getReplicationPipeline(): ReplicationPipeline | null;
3040
+ /**
3041
+ * Get underlying LagTracker
3042
+ */
3043
+ getLagTracker(): LagTracker;
3044
+ /**
3045
+ * Get all metrics in Prometheus format
3046
+ */
3047
+ getPrometheusMetrics(): string;
3048
+ private setupEventHandlers;
3049
+ }
3050
+
3051
+ export { BufferPool, type BufferPoolConfig, type BufferPoolStats, type ClusterConfig, ClusterCoordinator, type ClusterCoordinatorConfig, type ClusterCoordinatorEvents, ClusterManager, type ClusterMember, type ClusterMessage, type CoalescingPreset, type CoalescingWriterMetrics, type CoalescingWriterOptions, type ConnectionContext, ConnectionRateLimiter, DEFAULT_CLUSTER_COORDINATOR_CONFIG, DEFAULT_LAG_TRACKER_CONFIG, FilterTasklet, ForEachTasklet, type IInterceptor, type IServerStorage, IteratorTasklet, type IteratorTaskletConfig, type LagInfo, LagTracker, type LagTrackerConfig, LockManager, type Logger, MapTasklet, MemoryServerAdapter, MigrationManager, type NativeModuleStatus, type NativeStats, type ORMapTombstones, type ORMapValue, ObjectPool, type ObjectPoolConfig, type ObjectPoolStats, type OpContext, type PartitionDistribution, PartitionService, type PartitionServiceConfig, type PartitionServiceEvents, type PooledEventPayload, type PooledMessage, type PooledRecord, type PooledTimestamp, PostgresAdapter, type PostgresAdapterOptions, type ProgressState, RateLimitInterceptor, type RateLimiterConfig, type RateLimiterStats, ReduceTasklet, ReplicationPipeline, SecurityManager, ServerCoordinator, type ServerCoordinatorConfig, type ServerOp, type StorageValue, type Tasklet, TaskletScheduler, type TaskletSchedulerConfig, type TaskletSchedulerStats, TimestampInterceptor, coalescingPresets, createEventPayloadPool, createMessagePool, createRecordPool, createTimestampPool, getCoalescingPreset, getGlobalBufferPool, getGlobalEventPayloadPool, getGlobalMessagePool, getGlobalRecordPool, getGlobalTimestampPool, getNativeModuleStatus, getNativeStats, logNativeStatus, logger, setGlobalBufferPool, setGlobalEventPayloadPool, setGlobalMessagePool, setGlobalRecordPool, setGlobalTimestampPool };