@optimystic/db-p2p 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/{readme.md → README.md} +7 -0
  2. package/dist/index.min.js +31 -30
  3. package/dist/index.min.js.map +4 -4
  4. package/dist/src/cluster/cluster-repo.d.ts +27 -0
  5. package/dist/src/cluster/cluster-repo.d.ts.map +1 -1
  6. package/dist/src/cluster/cluster-repo.js +139 -18
  7. package/dist/src/cluster/cluster-repo.js.map +1 -1
  8. package/dist/src/cluster/service.d.ts +13 -2
  9. package/dist/src/cluster/service.d.ts.map +1 -1
  10. package/dist/src/cluster/service.js +17 -7
  11. package/dist/src/cluster/service.js.map +1 -1
  12. package/dist/src/index.d.ts +1 -1
  13. package/dist/src/index.d.ts.map +1 -1
  14. package/dist/src/index.js +1 -1
  15. package/dist/src/index.js.map +1 -1
  16. package/dist/src/libp2p-node.d.ts +13 -2
  17. package/dist/src/libp2p-node.d.ts.map +1 -1
  18. package/dist/src/libp2p-node.js +35 -16
  19. package/dist/src/libp2p-node.js.map +1 -1
  20. package/dist/src/protocol-client.d.ts.map +1 -1
  21. package/dist/src/protocol-client.js +8 -7
  22. package/dist/src/protocol-client.js.map +1 -1
  23. package/dist/src/repo/cluster-coordinator.d.ts +7 -2
  24. package/dist/src/repo/cluster-coordinator.d.ts.map +1 -1
  25. package/dist/src/repo/cluster-coordinator.js +18 -3
  26. package/dist/src/repo/cluster-coordinator.js.map +1 -1
  27. package/dist/src/repo/coordinator-repo.d.ts +26 -3
  28. package/dist/src/repo/coordinator-repo.d.ts.map +1 -1
  29. package/dist/src/repo/coordinator-repo.js +117 -22
  30. package/dist/src/repo/coordinator-repo.js.map +1 -1
  31. package/dist/src/repo/service.d.ts +13 -2
  32. package/dist/src/repo/service.d.ts.map +1 -1
  33. package/dist/src/repo/service.js +25 -12
  34. package/dist/src/repo/service.js.map +1 -1
  35. package/dist/src/storage/memory-storage.d.ts +15 -0
  36. package/dist/src/storage/memory-storage.d.ts.map +1 -1
  37. package/dist/src/storage/memory-storage.js +23 -4
  38. package/dist/src/storage/memory-storage.js.map +1 -1
  39. package/dist/src/storage/storage-repo.d.ts.map +1 -1
  40. package/dist/src/storage/storage-repo.js.map +1 -1
  41. package/dist/src/sync/service.d.ts.map +1 -1
  42. package/dist/src/sync/service.js +7 -2
  43. package/dist/src/sync/service.js.map +1 -1
  44. package/package.json +27 -21
  45. package/src/cluster/cluster-repo.ts +836 -711
  46. package/src/cluster/service.ts +44 -31
  47. package/src/index.ts +1 -1
  48. package/src/libp2p-key-network.ts +334 -334
  49. package/src/libp2p-node.ts +371 -339
  50. package/src/network/network-manager-service.ts +334 -334
  51. package/src/protocol-client.ts +53 -54
  52. package/src/repo/client.ts +112 -112
  53. package/src/repo/cluster-coordinator.ts +613 -592
  54. package/src/repo/coordinator-repo.ts +269 -137
  55. package/src/repo/service.ts +237 -219
  56. package/src/storage/block-storage.ts +182 -182
  57. package/src/storage/memory-storage.ts +24 -5
  58. package/src/storage/storage-repo.ts +321 -320
  59. package/src/sync/service.ts +7 -6
  60. package/dist/src/storage/file-storage.d.ts +0 -30
  61. package/dist/src/storage/file-storage.d.ts.map +0 -1
  62. package/dist/src/storage/file-storage.js +0 -127
  63. package/dist/src/storage/file-storage.js.map +0 -1
  64. package/src/storage/file-storage.ts +0 -163
@@ -1,711 +1,836 @@
1
- import type { IRepo, ClusterRecord, Signature, RepoMessage, ITransactionValidator } from "@optimystic/db-core";
2
- import type { ICluster } from "@optimystic/db-core";
3
- import type { IPeerNetwork } from "@optimystic/db-core";
4
- import { blockIdsForTransforms } from "@optimystic/db-core";
5
- import { ClusterClient } from "./client.js";
6
- import type { PeerId } from "@libp2p/interface";
7
- import { peerIdFromString } from "@libp2p/peer-id";
8
- import { sha256 } from "multiformats/hashes/sha2";
9
- import { toString as uint8ArrayToString } from 'uint8arrays/to-string';
10
- import { createLogger } from '../logger.js'
11
- import type { PartitionDetector } from "./partition-detector.js";
12
- import type { FretService } from "p2p-fret";
13
-
14
- const log = createLogger('cluster-member')
15
-
16
- /** State of a transaction in the cluster */
17
- enum TransactionPhase {
18
- Promising, // Collecting promises from peers
19
- OurPromiseNeeded, // We need to provide our promise
20
- OurCommitNeeded, // We need to provide our commit
21
- Consensus, // Transaction has reached consensus
22
- Rejected, // Transaction was rejected
23
- Propagating // Transaction is being propagated
24
- }
25
-
26
- interface TransactionState {
27
- record: ClusterRecord;
28
- promiseTimeout?: NodeJS.Timeout;
29
- resolutionTimeout?: NodeJS.Timeout;
30
- lastUpdate: number;
31
- }
32
-
33
- interface ClusterMemberComponents {
34
- storageRepo: IRepo;
35
- peerNetwork: IPeerNetwork;
36
- peerId: PeerId;
37
- protocolPrefix?: string;
38
- partitionDetector?: PartitionDetector;
39
- fretService?: FretService;
40
- validator?: ITransactionValidator;
41
- }
42
-
43
- export function clusterMember(components: ClusterMemberComponents): ClusterMember {
44
- return new ClusterMember(
45
- components.storageRepo,
46
- components.peerNetwork,
47
- components.peerId,
48
- components.protocolPrefix,
49
- components.partitionDetector,
50
- components.fretService,
51
- components.validator
52
- );
53
- }
54
-
55
- /**
56
- * Handles cluster-side operations, managing promises and commits for cluster updates
57
- * and coordinating with the local storage repo.
58
- */
59
- export class ClusterMember implements ICluster {
60
- // Track active transactions by their message hash
61
- private activeTransactions: Map<string, TransactionState> = new Map();
62
- // Queue of transactions to clean up
63
- private cleanupQueue: string[] = [];
64
- // Serialize concurrent updates for the same transaction
65
- private pendingUpdates: Map<string, Promise<ClusterRecord>> = new Map();
66
-
67
- constructor(
68
- private readonly storageRepo: IRepo,
69
- private readonly peerNetwork: IPeerNetwork,
70
- private readonly peerId: PeerId,
71
- private readonly protocolPrefix?: string,
72
- private readonly partitionDetector?: PartitionDetector,
73
- private readonly fretService?: FretService,
74
- private readonly validator?: ITransactionValidator
75
- ) {
76
- // Periodically clean up expired transactions
77
- setInterval(() => this.queueExpiredTransactions(), 60000);
78
- // Process cleanup queue
79
- setInterval(() => this.processCleanupQueue(), 1000);
80
- }
81
-
82
- /**
83
- * Handles an incoming cluster update, managing the two-phase commit process
84
- * and coordinating with the local storage repo
85
- */
86
- async update(record: ClusterRecord): Promise<ClusterRecord> {
87
- // Serialize concurrent updates for the same transaction
88
- const existingUpdate = this.pendingUpdates.get(record.messageHash);
89
- if (existingUpdate) {
90
- log('cluster-member:concurrent-update-wait', { messageHash: record.messageHash });
91
- await existingUpdate;
92
- // After waiting, continue processing with the new incoming record
93
- // to ensure proper merging of promises/commits from coordinator
94
- }
95
-
96
- // Create a promise for this update operation
97
- const updatePromise = this.processUpdate(record);
98
- this.pendingUpdates.set(record.messageHash, updatePromise);
99
-
100
- try {
101
- const result = await updatePromise;
102
- return result;
103
- } finally {
104
- // Remove from pending updates after a short delay to allow concurrent calls to see it
105
- setTimeout(() => {
106
- this.pendingUpdates.delete(record.messageHash);
107
- }, 100);
108
- }
109
- }
110
-
111
- private async processUpdate(record: ClusterRecord): Promise<ClusterRecord> {
112
- const ourId = this.peerId.toString();
113
- const inboundPhase = record.commits[ourId] ? 'commit' : record.promises[ourId] ? 'promise' : 'initial';
114
- log('cluster-member:incoming', {
115
- messageHash: record.messageHash,
116
- phase: inboundPhase,
117
- peerCount: Object.keys(record.peers).length,
118
- promiseCount: Object.keys(record.promises).length,
119
- commitCount: Object.keys(record.commits).length,
120
- existingTransaction: this.activeTransactions.has(record.messageHash)
121
- });
122
-
123
- // Report network size hint to FRET if provided
124
- if (this.fretService && record.networkSizeHint && record.networkSizeConfidence) {
125
- try {
126
- this.fretService.reportNetworkSize(
127
- record.networkSizeHint,
128
- record.networkSizeConfidence,
129
- 'cluster'
130
- );
131
- } catch (err) {
132
- // Ignore errors reporting to FRET
133
- }
134
- }
135
-
136
- // Validate the incoming record
137
- await this.validateRecord(record);
138
-
139
- const existingState = this.activeTransactions.get(record.messageHash);
140
- let currentRecord = existingState?.record || record;
141
- if (existingState) {
142
- log('cluster-member:merge-start', {
143
- messageHash: record.messageHash,
144
- existingPromises: Object.keys(existingState.record.promises ?? {}),
145
- existingCommits: Object.keys(existingState.record.commits ?? {}),
146
- incomingPromises: Object.keys(record.promises ?? {}),
147
- incomingCommits: Object.keys(record.commits ?? {})
148
- });
149
- }
150
-
151
- // If we have an existing record, merge the signatures
152
- if (existingState) {
153
- currentRecord = await this.mergeRecords(existingState.record, record);
154
- log('cluster-member:merge-complete', {
155
- messageHash: record.messageHash,
156
- mergedPromises: Object.keys(currentRecord.promises ?? {}),
157
- mergedCommits: Object.keys(currentRecord.commits ?? {})
158
- });
159
- }
160
-
161
- // Get the current transaction state
162
- const phase = await this.getTransactionPhase(currentRecord);
163
- log('cluster-member:phase', {
164
- messageHash: record.messageHash,
165
- phase,
166
- promises: Object.keys(currentRecord.promises ?? {}),
167
- commits: Object.keys(currentRecord.commits ?? {})
168
- });
169
- let shouldPersist = true;
170
-
171
- // Handle the transaction based on its state
172
- switch (phase) {
173
- case TransactionPhase.OurPromiseNeeded:
174
- log('cluster-member:action-promise', {
175
- messageHash: record.messageHash
176
- });
177
- currentRecord = await this.handlePromiseNeeded(currentRecord);
178
- log('cluster-member:action-promise-complete', {
179
- messageHash: record.messageHash,
180
- promises: Object.keys(currentRecord.promises ?? {})
181
- });
182
- break;
183
- case TransactionPhase.OurCommitNeeded:
184
- log('cluster-member:action-commit', {
185
- messageHash: record.messageHash
186
- });
187
- currentRecord = await this.handleCommitNeeded(currentRecord);
188
- log('cluster-member:action-commit-complete', {
189
- messageHash: record.messageHash,
190
- commits: Object.keys(currentRecord.commits ?? {})
191
- });
192
- // After adding our commit, clear the transaction - the coordinator will handle consensus
193
- shouldPersist = false;
194
- break;
195
- case TransactionPhase.Consensus:
196
- log('cluster-member:action-consensus', {
197
- messageHash: record.messageHash
198
- });
199
- await this.handleConsensus(currentRecord);
200
- // Don't call clearTransaction here - it happens in handleConsensus
201
- shouldPersist = false;
202
- break;
203
- case TransactionPhase.Rejected:
204
- log('cluster-member:action-rejected', {
205
- messageHash: record.messageHash
206
- });
207
- // Don't call clearTransaction here - it happens in handleRejection
208
- await this.handleRejection(currentRecord);
209
- shouldPersist = false;
210
- break;
211
- case TransactionPhase.Propagating:
212
- // Transaction is complete and propagating - clean it up
213
- log('cluster-member:phase-propagating', {
214
- messageHash: record.messageHash
215
- });
216
- shouldPersist = false;
217
- break;
218
- case TransactionPhase.Promising:
219
- // Still collecting promises from peers - if we haven't added ours and there's no conflict, add it
220
- // This state shouldn't normally be reached since OurPromiseNeeded is checked first
221
- log('cluster-member:phase-promising-blocked', {
222
- messageHash: record.messageHash
223
- });
224
- break;
225
- }
226
-
227
- if (shouldPersist) {
228
- // Update transaction state
229
- const timeouts = this.setupTimeouts(currentRecord);
230
- this.activeTransactions.set(record.messageHash, {
231
- record: currentRecord,
232
- lastUpdate: Date.now(),
233
- promiseTimeout: timeouts.promiseTimeout,
234
- resolutionTimeout: timeouts.resolutionTimeout
235
- });
236
- log('cluster-member:state-persist', {
237
- messageHash: record.messageHash,
238
- storedPromises: Object.keys(currentRecord.promises ?? {}),
239
- storedCommits: Object.keys(currentRecord.commits ?? {})
240
- });
241
- } else {
242
- log('cluster-member:state-clear', {
243
- messageHash: record.messageHash
244
- });
245
- this.clearTransaction(record.messageHash);
246
- }
247
-
248
- // Skip propagation - the coordinator manages distribution
249
- // await this.propagateIfNeeded(currentRecord);
250
-
251
- log('cluster-member:update-complete', {
252
- messageHash: record.messageHash,
253
- promiseCount: Object.keys(currentRecord.promises).length,
254
- commitCount: Object.keys(currentRecord.commits).length
255
- });
256
- return currentRecord;
257
- }
258
-
259
- /**
260
- * Merges two records, validating that non-signature fields match
261
- */
262
- private async mergeRecords(existing: ClusterRecord, incoming: ClusterRecord): Promise<ClusterRecord> {
263
- log('cluster-member:merge-records', {
264
- messageHash: existing.messageHash,
265
- existingPromises: Object.keys(existing.promises ?? {}),
266
- existingCommits: Object.keys(existing.commits ?? {}),
267
- incomingPromises: Object.keys(incoming.promises ?? {}),
268
- incomingCommits: Object.keys(incoming.commits ?? {})
269
- });
270
- // Verify that immutable fields match
271
- if (existing.messageHash !== incoming.messageHash) {
272
- throw new Error('Message hash mismatch');
273
- }
274
- if (JSON.stringify(existing.message) !== JSON.stringify(incoming.message)) {
275
- throw new Error('Message content mismatch');
276
- }
277
- if (JSON.stringify(existing.peers) !== JSON.stringify(incoming.peers)) {
278
- throw new Error('Peers mismatch');
279
- }
280
-
281
- // Merge signatures, keeping the most recent valid ones
282
- return {
283
- ...existing,
284
- promises: { ...existing.promises, ...incoming.promises },
285
- commits: { ...existing.commits, ...incoming.commits }
286
- };
287
- }
288
-
289
- private async validateRecord(record: ClusterRecord): Promise<void> {
290
- // TODO: Fix hash validation logic to match coordinator's hash generation
291
- // The coordinator creates the hash from the message, but this tries to re-hash the hash itself
292
-
293
- // Validate signatures
294
- await this.validateSignatures(record);
295
-
296
- // Validate expiration
297
- if (record.message.expiration && record.message.expiration < Date.now()) {
298
- throw new Error('Transaction expired');
299
- }
300
- }
301
-
302
- private async computeMessageHash(record: ClusterRecord): Promise<string> {
303
- const msgBytes = new TextEncoder().encode(record.messageHash + JSON.stringify(record.message));
304
- const hashBytes = await sha256.digest(msgBytes);
305
- return uint8ArrayToString(hashBytes.digest, 'base64url');
306
- }
307
-
308
- private async validateSignatures(record: ClusterRecord): Promise<void> {
309
- // Validate promise signatures
310
- const promiseHash = await this.computePromiseHash(record);
311
- for (const [peerId, signature] of Object.entries(record.promises)) {
312
- if (!await this.verifySignature(peerId, promiseHash, signature)) {
313
- throw new Error(`Invalid promise signature from ${peerId}`);
314
- }
315
- }
316
-
317
- // Validate commit signatures
318
- const commitHash = await this.computeCommitHash(record);
319
- for (const [peerId, signature] of Object.entries(record.commits)) {
320
- if (!await this.verifySignature(peerId, commitHash, signature)) {
321
- throw new Error(`Invalid commit signature from ${peerId}`);
322
- }
323
- }
324
- }
325
-
326
- private async computePromiseHash(record: ClusterRecord): Promise<string> {
327
- const msgBytes = new TextEncoder().encode(record.messageHash + JSON.stringify(record.message));
328
- const hashBytes = await sha256.digest(msgBytes);
329
- return uint8ArrayToString(hashBytes.digest, 'base64url');
330
- }
331
-
332
- private async computeCommitHash(record: ClusterRecord): Promise<string> {
333
- const msgBytes = new TextEncoder().encode(record.messageHash + JSON.stringify(record.message) + JSON.stringify(record.promises));
334
- const hashBytes = await sha256.digest(msgBytes);
335
- return uint8ArrayToString(hashBytes.digest, 'base64url');
336
- }
337
-
338
- private async verifySignature(peerId: string, hash: string, signature: Signature): Promise<boolean> {
339
- // TODO: Implement actual signature verification
340
- return true;
341
- }
342
-
343
- private async getTransactionPhase(record: ClusterRecord): Promise<TransactionPhase> {
344
- const peerCount = Object.keys(record.peers).length;
345
- const promiseCount = Object.keys(record.promises).length;
346
- const commitCount = Object.keys(record.commits).length;
347
- const ourId = this.peerId.toString();
348
-
349
- // Check for rejections
350
- const rejectedPromises = Object.values(record.promises).filter(s => s.type === 'reject');
351
- const rejectedCommits = Object.values(record.commits).filter(s => s.type === 'reject');
352
- if (rejectedPromises.length > 0 || this.hasMajority(rejectedCommits.length, peerCount)) {
353
- return TransactionPhase.Rejected;
354
- }
355
-
356
- // Check if we need to promise
357
- if (!record.promises[ourId] && !this.hasConflict(record)) {
358
- return TransactionPhase.OurPromiseNeeded;
359
- }
360
-
361
- // Check if still collecting promises
362
- if (promiseCount < peerCount) {
363
- return TransactionPhase.Promising;
364
- }
365
-
366
- // Check if we need to commit
367
- if (promiseCount === peerCount && !record.commits[ourId]) {
368
- return TransactionPhase.OurCommitNeeded;
369
- }
370
-
371
- // Check for consensus
372
- const approvedCommits = Object.values(record.commits).filter(s => s.type === 'approve');
373
- if (this.hasMajority(approvedCommits.length, peerCount)) {
374
- return TransactionPhase.Consensus;
375
- }
376
-
377
- return TransactionPhase.Propagating;
378
- }
379
-
380
- private hasMajority(count: number, total: number): boolean {
381
- return count > total / 2;
382
- }
383
-
384
- private async handlePromiseNeeded(record: ClusterRecord): Promise<ClusterRecord> {
385
- // Validate pend operations if we have a validator
386
- const validationResult = await this.validatePendOperations(record);
387
-
388
- const signature: Signature = validationResult.valid
389
- ? { type: 'approve', signature: 'approved' }
390
- : { type: 'reject', signature: 'rejected', rejectReason: validationResult.reason };
391
-
392
- if (!validationResult.valid) {
393
- log('cluster-member:validation-rejected', {
394
- messageHash: record.messageHash,
395
- reason: validationResult.reason
396
- });
397
- }
398
-
399
- return {
400
- ...record,
401
- promises: {
402
- ...record.promises,
403
- [this.peerId.toString()]: signature
404
- }
405
- };
406
- }
407
-
408
- /**
409
- * Validates pend operations in a cluster record using the transaction validator.
410
- * Returns success if no validator is configured (backwards compatibility).
411
- */
412
- private async validatePendOperations(record: ClusterRecord): Promise<{ valid: boolean; reason?: string }> {
413
- if (!this.validator) {
414
- return { valid: true };
415
- }
416
-
417
- // Find pend operations in the message
418
- for (const operation of record.message.operations) {
419
- if ('pend' in operation) {
420
- const pendRequest = operation.pend;
421
- // Only validate if we have a transaction and operationsHash
422
- if (pendRequest.transaction && pendRequest.operationsHash) {
423
- const result = await this.validator.validate(pendRequest.transaction, pendRequest.operationsHash);
424
- if (!result.valid) {
425
- return { valid: false, reason: result.reason };
426
- }
427
- }
428
- }
429
- }
430
-
431
- return { valid: true };
432
- }
433
-
434
- private async handleCommitNeeded(record: ClusterRecord): Promise<ClusterRecord> {
435
- if (this.hasLocalCommit(record)) {
436
- return record;
437
- }
438
- const signature: Signature = {
439
- type: 'approve',
440
- signature: 'committed' // TODO: Actually sign the commit hash
441
- };
442
-
443
- return {
444
- ...record,
445
- commits: {
446
- ...record.commits,
447
- [this.peerId.toString()]: signature
448
- }
449
- };
450
- }
451
-
452
- private async handleConsensus(record: ClusterRecord): Promise<void> {
453
- // Execute the operations only if we haven't already
454
- const state = this.activeTransactions.get(record.messageHash);
455
- if (!this.hasLocalCommit(state?.record ?? record)) {
456
- for (const operation of record.message.operations) {
457
- if ('get' in operation) {
458
- await this.storageRepo.get(operation.get);
459
- } else if ('pend' in operation) {
460
- await this.storageRepo.pend(operation.pend);
461
- } else if ('commit' in operation) {
462
- await this.storageRepo.commit(operation.commit);
463
- } else if ('cancel' in operation) {
464
- await this.storageRepo.cancel(operation.cancel.actionRef);
465
- }
466
- }
467
- }
468
- // Don't clear here - will be cleared by shouldPersist = false in the main flow
469
- }
470
-
471
- private async handleRejection(record: ClusterRecord): Promise<void> {
472
- // Clean up any resources - will be cleared by shouldPersist = false in the main flow
473
- }
474
-
475
- private setupTimeouts(record: ClusterRecord): { promiseTimeout?: NodeJS.Timeout; resolutionTimeout?: NodeJS.Timeout } {
476
- if (!record.message.expiration) {
477
- return {};
478
- }
479
-
480
- return {
481
- promiseTimeout: setTimeout(
482
- () => this.handleExpiration(record.messageHash),
483
- record.message.expiration - Date.now()
484
- ),
485
- resolutionTimeout: setTimeout(
486
- () => this.resolveWithPeers(record.messageHash),
487
- record.message.expiration + 5000 - Date.now()
488
- )
489
- };
490
- }
491
-
492
- private hasConflict(record: ClusterRecord): boolean {
493
- const now = Date.now();
494
- const staleThresholdMs = 2000; // 2 seconds - allow more time for distributed consensus
495
-
496
- for (const [existingHash, state] of Array.from(this.activeTransactions.entries())) {
497
- if (existingHash === record.messageHash) {
498
- continue;
499
- }
500
-
501
- // Clean up stale transactions that have been around too long
502
- if (now - state.lastUpdate > staleThresholdMs) {
503
- log('cluster-member:stale-cleanup', {
504
- messageHash: existingHash,
505
- age: now - state.lastUpdate
506
- });
507
- this.clearTransaction(existingHash);
508
- continue;
509
- }
510
-
511
- if (this.operationsConflict(state.record.message.operations, record.message.operations)) {
512
- // Use race resolution to determine winner
513
- const resolution = this.resolveRace(state.record, record);
514
-
515
- if (resolution === 'keep-existing') {
516
- log('cluster-member:race-keep-existing', {
517
- existing: existingHash,
518
- incoming: record.messageHash
519
- });
520
- return true; // Reject incoming
521
- } else {
522
- // Accept incoming, abort existing
523
- log('cluster-member:race-accept-incoming', {
524
- existing: existingHash,
525
- incoming: record.messageHash
526
- });
527
- this.clearTransaction(existingHash);
528
- continue; // Check other conflicts
529
- }
530
- }
531
- }
532
-
533
- return false; // No blocking conflicts
534
- }
535
-
536
- /**
537
- * Resolve race between two conflicting transactions.
538
- * Transaction with more promises wins. If tied, higher hash wins.
539
- */
540
- private resolveRace(existing: ClusterRecord, incoming: ClusterRecord): 'keep-existing' | 'accept-incoming' {
541
- const existingCount = Object.keys(existing.promises).length;
542
- const incomingCount = Object.keys(incoming.promises).length;
543
-
544
- // Transaction with more promises wins
545
- if (existingCount > incomingCount) {
546
- return 'keep-existing';
547
- }
548
- if (incomingCount > existingCount) {
549
- return 'accept-incoming';
550
- }
551
-
552
- // Tie-breaker: higher message hash wins (deterministic)
553
- return existing.messageHash > incoming.messageHash ? 'keep-existing' : 'accept-incoming';
554
- }
555
-
556
- private operationsConflict(ops1: RepoMessage['operations'], ops2: RepoMessage['operations']): boolean {
557
- // Check if one is a commit for the same action as a pend - these don't conflict
558
- const actionId1 = this.getActionId(ops1);
559
- const actionId2 = this.getActionId(ops2);
560
- if (actionId1 && actionId2 && actionId1 === actionId2) {
561
- // Same action - commit is resolving the pend, not conflicting
562
- return false;
563
- }
564
-
565
- const blocks1 = new Set(this.getAffectedBlockIds(ops1));
566
- const blocks2 = new Set(this.getAffectedBlockIds(ops2));
567
-
568
- for (const block of Array.from(blocks1)) {
569
- if (blocks2.has(block)) {
570
- log('cluster-member:conflict-detected', {
571
- blocks1: Array.from(blocks1),
572
- blocks2: Array.from(blocks2),
573
- conflictingBlock: block
574
- });
575
- return true;
576
- }
577
- }
578
-
579
- return false;
580
- }
581
-
582
- private getActionId(operations: RepoMessage['operations']): string | undefined {
583
- for (const operation of operations) {
584
- if ('pend' in operation) {
585
- return operation.pend.actionId;
586
- } else if ('commit' in operation) {
587
- return operation.commit.actionId;
588
- } else if ('cancel' in operation) {
589
- return operation.cancel.actionRef.actionId;
590
- }
591
- }
592
- return undefined;
593
- }
594
-
595
- private getAffectedBlockIds(operations: RepoMessage['operations']): string[] {
596
- const blockIds = new Set<string>();
597
-
598
- for (const operation of operations) {
599
- if ('get' in operation) {
600
- operation.get.blockIds.forEach(id => blockIds.add(id));
601
- } else if ('pend' in operation) {
602
- // Use blockIdsForTransforms to correctly extract block IDs from Transforms structure
603
- blockIdsForTransforms(operation.pend.transforms).forEach(id => blockIds.add(id));
604
- } else if ('commit' in operation) {
605
- operation.commit.blockIds.forEach(id => blockIds.add(id));
606
- } else if ('cancel' in operation) {
607
- operation.cancel.actionRef.blockIds.forEach(id => blockIds.add(id));
608
- }
609
- }
610
-
611
- return Array.from(blockIds);
612
- }
613
-
614
- private async propagateIfNeeded(record: ClusterRecord): Promise<void> {
615
- const promises = [];
616
- for (const [peerId, peer] of Object.entries(record.peers)) {
617
- if (peerId === this.peerId.toString()) continue;
618
-
619
- try {
620
- const client = ClusterClient.create(peerIdFromString(peerId), this.peerNetwork, this.protocolPrefix);
621
- promises.push(client.update(record));
622
- } catch (error) {
623
- console.error(`Failed to propagate to peer ${peerId}:`, error);
624
- }
625
- }
626
- await Promise.allSettled(promises);
627
- }
628
-
629
- private async handleExpiration(messageHash: string): Promise<void> {
630
- const state = this.activeTransactions.get(messageHash);
631
- if (!state) return;
632
-
633
- if (!state.record.promises[this.peerId.toString()]) {
634
- const signature: Signature = {
635
- type: 'reject',
636
- signature: 'rejected',
637
- rejectReason: 'Transaction expired'
638
- };
639
-
640
- const updatedRecord = {
641
- ...state.record,
642
- promises: {
643
- ...state.record.promises,
644
- [this.peerId.toString()]: signature
645
- }
646
- };
647
-
648
- this.activeTransactions.set(messageHash, {
649
- ...state,
650
- record: updatedRecord
651
- });
652
-
653
- await this.propagateIfNeeded(updatedRecord);
654
- }
655
- }
656
-
657
- private async resolveWithPeers(messageHash: string): Promise<void> {
658
- // This method is disabled - the coordinator handles all retry logic
659
- // Keeping the skeleton in case we need peer-initiated recovery in the future
660
- log('cluster-member:resolve-skipped', { messageHash, reason: 'coordinator-handles-retry' });
661
- }
662
-
663
- private queueExpiredTransactions(): void {
664
- const now = Date.now();
665
- for (const [messageHash, state] of Array.from(this.activeTransactions.entries())) {
666
- if (state.record.message.expiration && state.record.message.expiration < now) {
667
- this.cleanupQueue.push(messageHash);
668
- }
669
- }
670
- }
671
-
672
- private async processCleanupQueue(): Promise<void> {
673
- while (this.cleanupQueue.length > 0) {
674
- const messageHash = this.cleanupQueue.shift();
675
- if (!messageHash) continue;
676
-
677
- const state = this.activeTransactions.get(messageHash);
678
- if (!state) continue;
679
-
680
- const phase = await this.getTransactionPhase(state.record);
681
- if (phase !== TransactionPhase.Consensus && phase !== TransactionPhase.Rejected) {
682
- this.activeTransactions.delete(messageHash);
683
- }
684
- }
685
- }
686
-
687
- private hasLocalCommit(record: ClusterRecord): boolean {
688
- const ourId = this.peerId.toString();
689
- return Boolean(record.commits[ourId]);
690
- }
691
-
692
- private clearTransaction(messageHash: string): void {
693
- const state = this.activeTransactions.get(messageHash);
694
- if (!state) {
695
- log('cluster-member:clear-miss', { messageHash });
696
- return;
697
- }
698
- if (state.promiseTimeout) {
699
- clearTimeout(state.promiseTimeout);
700
- }
701
- if (state.resolutionTimeout) {
702
- clearTimeout(state.resolutionTimeout);
703
- }
704
- this.activeTransactions.delete(messageHash);
705
- log('cluster-member:clear-done', {
706
- messageHash,
707
- remaining: Array.from(this.activeTransactions.keys())
708
- });
709
- }
710
- }
711
-
1
+ import type { IRepo, ClusterRecord, Signature, RepoMessage, ITransactionValidator } from "@optimystic/db-core";
2
+ import type { ICluster } from "@optimystic/db-core";
3
+ import type { IPeerNetwork } from "@optimystic/db-core";
4
+ import { blockIdsForTransforms } from "@optimystic/db-core";
5
+ import { ClusterClient } from "./client.js";
6
+ import type { PeerId } from "@libp2p/interface";
7
+ import { peerIdFromString } from "@libp2p/peer-id";
8
+ import { sha256 } from "multiformats/hashes/sha2";
9
+ import { base58btc } from "multiformats/bases/base58";
10
+ import { toString as uint8ArrayToString } from 'uint8arrays/to-string';
11
+ import { createLogger } from '../logger.js'
12
+ import type { PartitionDetector } from "./partition-detector.js";
13
+ import type { FretService } from "p2p-fret";
14
+
15
+ const log = createLogger('cluster-member')
16
+
17
+ /** State of a transaction in the cluster */
18
+ enum TransactionPhase {
19
+ Promising, // Collecting promises from peers
20
+ OurPromiseNeeded, // We need to provide our promise
21
+ OurCommitNeeded, // We need to provide our commit
22
+ Consensus, // Transaction has reached consensus
23
+ Rejected, // Transaction was rejected
24
+ Propagating // Transaction is being propagated
25
+ }
26
+
27
+ interface TransactionState {
28
+ record: ClusterRecord;
29
+ promiseTimeout?: NodeJS.Timeout;
30
+ resolutionTimeout?: NodeJS.Timeout;
31
+ lastUpdate: number;
32
+ }
33
+
34
+ interface ClusterMemberComponents {
35
+ storageRepo: IRepo;
36
+ peerNetwork: IPeerNetwork;
37
+ peerId: PeerId;
38
+ protocolPrefix?: string;
39
+ partitionDetector?: PartitionDetector;
40
+ fretService?: FretService;
41
+ validator?: ITransactionValidator;
42
+ }
43
+
44
+ export function clusterMember(components: ClusterMemberComponents): ClusterMember {
45
+ return new ClusterMember(
46
+ components.storageRepo,
47
+ components.peerNetwork,
48
+ components.peerId,
49
+ components.protocolPrefix,
50
+ components.partitionDetector,
51
+ components.fretService,
52
+ components.validator
53
+ );
54
+ }
55
+
56
+ // How long to keep executed transaction records (10 minutes)
57
+ const ExecutedTransactionTtlMs = 10 * 60 * 1000;
58
+
59
+ /**
60
+ * Handles cluster-side operations, managing promises and commits for cluster updates
61
+ * and coordinating with the local storage repo.
62
+ */
63
+ export class ClusterMember implements ICluster {
64
+ // Track active transactions by their message hash
65
+ private activeTransactions: Map<string, TransactionState> = new Map();
66
+ // Track executed consensus transactions to prevent duplicate execution (messageHash -> executedAt timestamp)
67
+ private executedTransactions: Map<string, number> = new Map();
68
+ // Queue of transactions to clean up
69
+ private cleanupQueue: string[] = [];
70
+ // Serialize concurrent updates for the same transaction
71
+ private pendingUpdates: Map<string, Promise<ClusterRecord>> = new Map();
72
+
73
+ constructor(
74
+ private readonly storageRepo: IRepo,
75
+ private readonly peerNetwork: IPeerNetwork,
76
+ private readonly peerId: PeerId,
77
+ private readonly protocolPrefix?: string,
78
+ private readonly partitionDetector?: PartitionDetector,
79
+ private readonly fretService?: FretService,
80
+ private readonly validator?: ITransactionValidator
81
+ ) {
82
+ // Periodically clean up expired transactions
83
+ setInterval(() => this.queueExpiredTransactions(), 60000);
84
+ // Process cleanup queue
85
+ setInterval(() => this.processCleanupQueue(), 1000);
86
+ }
87
+
88
+ /**
89
+ * Checks if a transaction's operations were already executed during consensus.
90
+ * Used by the coordinator to avoid duplicate execution in CoordinatorRepo.
91
+ */
92
+ wasTransactionExecuted(messageHash: string): boolean {
93
+ return this.executedTransactions.has(messageHash);
94
+ }
95
+
96
+ /**
97
+ * Handles an incoming cluster update, managing the two-phase commit process
98
+ * and coordinating with the local storage repo
99
+ */
100
+ async update(record: ClusterRecord): Promise<ClusterRecord> {
101
+ // Serialize concurrent updates for the same transaction
102
+ const existingUpdate = this.pendingUpdates.get(record.messageHash);
103
+ if (existingUpdate) {
104
+ log('cluster-member:concurrent-update-wait', { messageHash: record.messageHash });
105
+ await existingUpdate;
106
+ // After waiting, continue processing with the new incoming record
107
+ // to ensure proper merging of promises/commits from coordinator
108
+ }
109
+
110
+ // Create a promise for this update operation
111
+ const updatePromise = this.processUpdate(record);
112
+ this.pendingUpdates.set(record.messageHash, updatePromise);
113
+
114
+ try {
115
+ const result = await updatePromise;
116
+ return result;
117
+ } finally {
118
+ // Remove from pending updates after a short delay to allow concurrent calls to see it
119
+ setTimeout(() => {
120
+ this.pendingUpdates.delete(record.messageHash);
121
+ }, 100);
122
+ }
123
+ }
124
+
125
+ private async processUpdate(record: ClusterRecord): Promise<ClusterRecord> {
126
+ const ourId = this.peerId.toString();
127
+ const inboundPhase = record.commits[ourId] ? 'commit' : record.promises[ourId] ? 'promise' : 'initial';
128
+ log('cluster-member:incoming', {
129
+ messageHash: record.messageHash,
130
+ phase: inboundPhase,
131
+ peerCount: Object.keys(record.peers).length,
132
+ promiseCount: Object.keys(record.promises).length,
133
+ commitCount: Object.keys(record.commits).length,
134
+ existingTransaction: this.activeTransactions.has(record.messageHash)
135
+ });
136
+
137
+ // Report network size hint to FRET if provided
138
+ if (this.fretService && record.networkSizeHint && record.networkSizeConfidence) {
139
+ try {
140
+ this.fretService.reportNetworkSize(
141
+ record.networkSizeHint,
142
+ record.networkSizeConfidence,
143
+ 'cluster'
144
+ );
145
+ } catch (err) {
146
+ // Ignore errors reporting to FRET
147
+ }
148
+ }
149
+
150
+ // Validate the incoming record
151
+ await this.validateRecord(record);
152
+
153
+ const existingState = this.activeTransactions.get(record.messageHash);
154
+ let currentRecord = existingState?.record || record;
155
+ if (existingState) {
156
+ log('cluster-member:merge-start', {
157
+ messageHash: record.messageHash,
158
+ existingPromises: Object.keys(existingState.record.promises ?? {}),
159
+ existingCommits: Object.keys(existingState.record.commits ?? {}),
160
+ incomingPromises: Object.keys(record.promises ?? {}),
161
+ incomingCommits: Object.keys(record.commits ?? {})
162
+ });
163
+ }
164
+
165
+ // If we have an existing record, merge the signatures
166
+ if (existingState) {
167
+ currentRecord = await this.mergeRecords(existingState.record, record);
168
+ log('cluster-member:merge-complete', {
169
+ messageHash: record.messageHash,
170
+ mergedPromises: Object.keys(currentRecord.promises ?? {}),
171
+ mergedCommits: Object.keys(currentRecord.commits ?? {})
172
+ });
173
+ }
174
+
175
+ // Get the current transaction state
176
+ const phase = await this.getTransactionPhase(currentRecord);
177
+ log('cluster-member:phase', {
178
+ messageHash: record.messageHash,
179
+ phase,
180
+ promises: Object.keys(currentRecord.promises ?? {}),
181
+ commits: Object.keys(currentRecord.commits ?? {})
182
+ });
183
+ let shouldPersist = true;
184
+
185
+ // Handle the transaction based on its state
186
+ switch (phase) {
187
+ case TransactionPhase.OurPromiseNeeded:
188
+ log('cluster-member:action-promise', {
189
+ messageHash: record.messageHash
190
+ });
191
+ currentRecord = await this.handlePromiseNeeded(currentRecord);
192
+ log('cluster-member:action-promise-complete', {
193
+ messageHash: record.messageHash,
194
+ promises: Object.keys(currentRecord.promises ?? {})
195
+ });
196
+ break;
197
+ case TransactionPhase.OurCommitNeeded:
198
+ log('cluster-member:action-commit', {
199
+ messageHash: record.messageHash
200
+ });
201
+ currentRecord = await this.handleCommitNeeded(currentRecord);
202
+ log('cluster-member:action-commit-complete', {
203
+ messageHash: record.messageHash,
204
+ commits: Object.keys(currentRecord.commits ?? {})
205
+ });
206
+ // After adding our commit, check if we now have consensus and execute if so
207
+ {
208
+ const newPhase = await this.getTransactionPhase(currentRecord);
209
+ if (newPhase === TransactionPhase.Consensus) {
210
+ log('cluster-member:action-consensus-after-commit', {
211
+ messageHash: record.messageHash
212
+ });
213
+ await this.handleConsensus(currentRecord);
214
+ }
215
+ }
216
+ shouldPersist = false;
217
+ break;
218
+ case TransactionPhase.Consensus:
219
+ log('cluster-member:action-consensus', {
220
+ messageHash: record.messageHash
221
+ });
222
+ // If the incoming record already had our commit, we already executed
223
+ // (idempotency for duplicate consensus messages)
224
+ if (inboundPhase !== 'commit') {
225
+ await this.handleConsensus(currentRecord);
226
+ } else {
227
+ log('cluster-member:consensus-skip-already-committed', {
228
+ messageHash: record.messageHash
229
+ });
230
+ }
231
+ // Don't call clearTransaction here - it happens in handleConsensus
232
+ shouldPersist = false;
233
+ break;
234
+ case TransactionPhase.Rejected:
235
+ log('cluster-member:action-rejected', {
236
+ messageHash: record.messageHash
237
+ });
238
+ // Don't call clearTransaction here - it happens in handleRejection
239
+ await this.handleRejection(currentRecord);
240
+ shouldPersist = false;
241
+ break;
242
+ case TransactionPhase.Propagating:
243
+ // Transaction is complete and propagating - clean it up
244
+ log('cluster-member:phase-propagating', {
245
+ messageHash: record.messageHash
246
+ });
247
+ shouldPersist = false;
248
+ break;
249
+ case TransactionPhase.Promising:
250
+ // Still collecting promises from peers - if we haven't added ours and there's no conflict, add it
251
+ // This state shouldn't normally be reached since OurPromiseNeeded is checked first
252
+ log('cluster-member:phase-promising-blocked', {
253
+ messageHash: record.messageHash
254
+ });
255
+ break;
256
+ }
257
+
258
+ if (shouldPersist) {
259
+ // Update transaction state
260
+ const timeouts = this.setupTimeouts(currentRecord);
261
+ this.activeTransactions.set(record.messageHash, {
262
+ record: currentRecord,
263
+ lastUpdate: Date.now(),
264
+ promiseTimeout: timeouts.promiseTimeout,
265
+ resolutionTimeout: timeouts.resolutionTimeout
266
+ });
267
+ log('cluster-member:state-persist', {
268
+ messageHash: record.messageHash,
269
+ storedPromises: Object.keys(currentRecord.promises ?? {}),
270
+ storedCommits: Object.keys(currentRecord.commits ?? {})
271
+ });
272
+ } else {
273
+ log('cluster-member:state-clear', {
274
+ messageHash: record.messageHash
275
+ });
276
+ this.clearTransaction(record.messageHash);
277
+ }
278
+
279
+ // Skip propagation - the coordinator manages distribution
280
+ // await this.propagateIfNeeded(currentRecord);
281
+
282
+ log('cluster-member:update-complete', {
283
+ messageHash: record.messageHash,
284
+ promiseCount: Object.keys(currentRecord.promises).length,
285
+ commitCount: Object.keys(currentRecord.commits).length
286
+ });
287
+ return currentRecord;
288
+ }
289
+
290
+ /**
291
+ * Merges two records, validating that non-signature fields match
292
+ */
293
+ private async mergeRecords(existing: ClusterRecord, incoming: ClusterRecord): Promise<ClusterRecord> {
294
+ log('cluster-member:merge-records', {
295
+ messageHash: existing.messageHash,
296
+ existingPromises: Object.keys(existing.promises ?? {}),
297
+ existingCommits: Object.keys(existing.commits ?? {}),
298
+ incomingPromises: Object.keys(incoming.promises ?? {}),
299
+ incomingCommits: Object.keys(incoming.commits ?? {})
300
+ });
301
+ // Verify that immutable fields match
302
+ if (existing.messageHash !== incoming.messageHash) {
303
+ throw new Error('Message hash mismatch');
304
+ }
305
+ if (JSON.stringify(existing.message) !== JSON.stringify(incoming.message)) {
306
+ throw new Error('Message content mismatch');
307
+ }
308
+ if (JSON.stringify(existing.peers) !== JSON.stringify(incoming.peers)) {
309
+ throw new Error('Peers mismatch');
310
+ }
311
+
312
+ // Merge signatures, keeping the most recent valid ones
313
+ return {
314
+ ...existing,
315
+ promises: { ...existing.promises, ...incoming.promises },
316
+ commits: { ...existing.commits, ...incoming.commits }
317
+ };
318
+ }
319
+
320
+ private async validateRecord(record: ClusterRecord): Promise<void> {
321
+ // Validate message hash matches the message content
322
+ const expectedHash = await this.computeMessageHash(record.message);
323
+ if (expectedHash !== record.messageHash) {
324
+ throw new Error(`Message hash mismatch: expected=${expectedHash}, received=${record.messageHash}`);
325
+ }
326
+
327
+ // Validate signatures
328
+ await this.validateSignatures(record);
329
+
330
+ // Validate expiration
331
+ if (record.message.expiration && record.message.expiration < Date.now()) {
332
+ throw new Error('Transaction expired');
333
+ }
334
+ }
335
+
336
+ /**
337
+ * Compute message hash using the same algorithm as the coordinator.
338
+ * Must match cluster-coordinator.ts createMessageHash().
339
+ */
340
+ private async computeMessageHash(message: RepoMessage): Promise<string> {
341
+ const msgBytes = new TextEncoder().encode(JSON.stringify(message));
342
+ const hashBytes = await sha256.digest(msgBytes);
343
+ return base58btc.encode(hashBytes.digest);
344
+ }
345
+
346
+ private async validateSignatures(record: ClusterRecord): Promise<void> {
347
+ // Validate promise signatures
348
+ const promiseHash = await this.computePromiseHash(record);
349
+ for (const [peerId, signature] of Object.entries(record.promises)) {
350
+ if (!await this.verifySignature(peerId, promiseHash, signature)) {
351
+ throw new Error(`Invalid promise signature from ${peerId}`);
352
+ }
353
+ }
354
+
355
+ // Validate commit signatures
356
+ const commitHash = await this.computeCommitHash(record);
357
+ for (const [peerId, signature] of Object.entries(record.commits)) {
358
+ if (!await this.verifySignature(peerId, commitHash, signature)) {
359
+ throw new Error(`Invalid commit signature from ${peerId}`);
360
+ }
361
+ }
362
+ }
363
+
364
+ private async computePromiseHash(record: ClusterRecord): Promise<string> {
365
+ const msgBytes = new TextEncoder().encode(record.messageHash + JSON.stringify(record.message));
366
+ const hashBytes = await sha256.digest(msgBytes);
367
+ return uint8ArrayToString(hashBytes.digest, 'base64url');
368
+ }
369
+
370
+ private async computeCommitHash(record: ClusterRecord): Promise<string> {
371
+ const msgBytes = new TextEncoder().encode(record.messageHash + JSON.stringify(record.message) + JSON.stringify(record.promises));
372
+ const hashBytes = await sha256.digest(msgBytes);
373
+ return uint8ArrayToString(hashBytes.digest, 'base64url');
374
+ }
375
+
376
+ private async verifySignature(peerId: string, hash: string, signature: Signature): Promise<boolean> {
377
+ // TODO: Implement actual signature verification
378
+ return true;
379
+ }
380
+
381
+ private async getTransactionPhase(record: ClusterRecord): Promise<TransactionPhase> {
382
+ const peerCount = Object.keys(record.peers).length;
383
+ const promiseCount = Object.keys(record.promises).length;
384
+ const commitCount = Object.keys(record.commits).length;
385
+ const ourId = this.peerId.toString();
386
+
387
+ // Check for rejections
388
+ const rejectedPromises = Object.values(record.promises).filter(s => s.type === 'reject');
389
+ const rejectedCommits = Object.values(record.commits).filter(s => s.type === 'reject');
390
+ if (rejectedPromises.length > 0 || this.hasMajority(rejectedCommits.length, peerCount)) {
391
+ return TransactionPhase.Rejected;
392
+ }
393
+
394
+ // Check if we need to promise
395
+ if (!record.promises[ourId] && !this.hasConflict(record)) {
396
+ return TransactionPhase.OurPromiseNeeded;
397
+ }
398
+
399
+ // Check if still collecting promises
400
+ if (promiseCount < peerCount) {
401
+ return TransactionPhase.Promising;
402
+ }
403
+
404
+ // Check if we need to commit
405
+ if (promiseCount === peerCount && !record.commits[ourId]) {
406
+ return TransactionPhase.OurCommitNeeded;
407
+ }
408
+
409
+ // Check for consensus
410
+ const approvedCommits = Object.values(record.commits).filter(s => s.type === 'approve');
411
+ if (this.hasMajority(approvedCommits.length, peerCount)) {
412
+ return TransactionPhase.Consensus;
413
+ }
414
+
415
+ return TransactionPhase.Propagating;
416
+ }
417
+
418
+ private hasMajority(count: number, total: number): boolean {
419
+ return count > total / 2;
420
+ }
421
+
422
+ private async handlePromiseNeeded(record: ClusterRecord): Promise<ClusterRecord> {
423
+ // Validate pend operations if we have a validator
424
+ const validationResult = await this.validatePendOperations(record);
425
+
426
+ const signature: Signature = validationResult.valid
427
+ ? { type: 'approve', signature: 'approved' }
428
+ : { type: 'reject', signature: 'rejected', rejectReason: validationResult.reason };
429
+
430
+ if (!validationResult.valid) {
431
+ log('cluster-member:validation-rejected', {
432
+ messageHash: record.messageHash,
433
+ reason: validationResult.reason
434
+ });
435
+ }
436
+
437
+ return {
438
+ ...record,
439
+ promises: {
440
+ ...record.promises,
441
+ [this.peerId.toString()]: signature
442
+ }
443
+ };
444
+ }
445
+
446
+ /**
447
+ * Validates pend operations in a cluster record using the transaction validator.
448
+ * Also checks for stale revisions to prevent consensus on operations that would fail.
449
+ * Returns success if no validator is configured (backwards compatibility).
450
+ */
451
+ private async validatePendOperations(record: ClusterRecord): Promise<{ valid: boolean; reason?: string }> {
452
+ // Find pend operations in the message
453
+ for (const operation of record.message.operations) {
454
+ if ('pend' in operation) {
455
+ const pendRequest = operation.pend;
456
+
457
+ // Check for stale revisions before allowing consensus
458
+ if (pendRequest.rev !== undefined) {
459
+ const blockIds = blockIdsForTransforms(pendRequest.transforms);
460
+ // Get block states to check latest revisions
461
+ const blockResults = await this.storageRepo.get({ blockIds });
462
+ for (const blockId of blockIds) {
463
+ const blockResult = blockResults[blockId];
464
+ const latestRev = blockResult?.state?.latest?.rev;
465
+ if (latestRev !== undefined && latestRev >= pendRequest.rev) {
466
+ log('cluster-member:validation-stale-revision', {
467
+ messageHash: record.messageHash,
468
+ blockId,
469
+ requestedRev: pendRequest.rev,
470
+ latestRev
471
+ });
472
+ return { valid: false, reason: `stale revision: block ${blockId} at rev ${latestRev}, requested rev ${pendRequest.rev}` };
473
+ }
474
+ }
475
+ }
476
+
477
+ // Run custom validator if configured
478
+ if (this.validator && pendRequest.transaction && pendRequest.operationsHash) {
479
+ const result = await this.validator.validate(pendRequest.transaction, pendRequest.operationsHash);
480
+ if (!result.valid) {
481
+ return { valid: false, reason: result.reason };
482
+ }
483
+ }
484
+ }
485
+ }
486
+
487
+ return { valid: true };
488
+ }
489
+
490
+ private async handleCommitNeeded(record: ClusterRecord): Promise<ClusterRecord> {
491
+ if (this.hasLocalCommit(record)) {
492
+ return record;
493
+ }
494
+ const signature: Signature = {
495
+ type: 'approve',
496
+ signature: 'committed' // TODO: Actually sign the commit hash
497
+ };
498
+
499
+ return {
500
+ ...record,
501
+ commits: {
502
+ ...record.commits,
503
+ [this.peerId.toString()]: signature
504
+ }
505
+ };
506
+ }
507
+
508
+ /**
509
+ * Executes operations after consensus is reached.
510
+ *
511
+ * @warning This method executes on ALL cluster peers, not just the coordinator.
512
+ * Each peer independently applies the operations to its local storage.
513
+ *
514
+ * @pitfall **Check-then-act race** - Must check AND mark as executed atomically
515
+ * (before any `await`) to prevent duplicate execution. JavaScript's single-threaded
516
+ * nature makes synchronous check-and-set atomic.
517
+ *
518
+ * @pitfall **Independent node storage** - Each node has its own storage. After consensus,
519
+ * each node applies operations locally. Nodes must fetch missing blocks from cluster
520
+ * peers via `restoreCallback` if they don't have prior revisions.
521
+ *
522
+ * @see docs/internals.md "Check-Then-Act Race in Consensus" and "Independent Node Storage" pitfalls
523
+ */
524
+ private async handleConsensus(record: ClusterRecord): Promise<void> {
525
+ // Check-and-set ATOMICALLY to prevent race condition where multiple calls
526
+ // pass the check before any completes. Since JavaScript is single-threaded,
527
+ // this synchronous check-and-set is atomic before any await.
528
+ if (this.executedTransactions.has(record.messageHash)) {
529
+ log('cluster-member:consensus-already-executed', { messageHash: record.messageHash });
530
+ return;
531
+ }
532
+ // Mark as executing IMMEDIATELY before any async operations
533
+ this.executedTransactions.set(record.messageHash, Date.now());
534
+
535
+ try {
536
+ // Execute the operations - check return values for failures
537
+ for (const operation of record.message.operations) {
538
+ if ('get' in operation) {
539
+ await this.storageRepo.get(operation.get);
540
+ } else if ('pend' in operation) {
541
+ const result = await this.storageRepo.pend(operation.pend);
542
+ if (!result.success) {
543
+ log('cluster-member:consensus-pend-failed', {
544
+ messageHash: record.messageHash,
545
+ actionId: operation.pend.actionId,
546
+ reason: result.reason,
547
+ hasMissing: !!result.missing?.length,
548
+ hasPending: !!result.pending?.length
549
+ });
550
+ throw new Error(`Consensus pend failed for action ${operation.pend.actionId}: ${result.reason ?? 'stale revision'}`);
551
+ }
552
+ } else if ('commit' in operation) {
553
+ const result = await this.storageRepo.commit(operation.commit);
554
+ if (!result.success) {
555
+ log('cluster-member:consensus-commit-failed', {
556
+ messageHash: record.messageHash,
557
+ actionId: operation.commit.actionId,
558
+ reason: result.reason,
559
+ hasMissing: !!result.missing?.length
560
+ });
561
+ throw new Error(`Consensus commit failed for action ${operation.commit.actionId}: ${result.reason ?? 'stale revision'}`);
562
+ }
563
+ } else if ('cancel' in operation) {
564
+ await this.storageRepo.cancel(operation.cancel.actionRef);
565
+ }
566
+ }
567
+ } catch (err) {
568
+ // On failure, remove from executedTransactions so it can be retried
569
+ this.executedTransactions.delete(record.messageHash);
570
+ throw err;
571
+ }
572
+ }
573
+
574
+ private async handleRejection(record: ClusterRecord): Promise<void> {
575
+ // Clean up any resources - will be cleared by shouldPersist = false in the main flow
576
+ }
577
+
578
+ private setupTimeouts(record: ClusterRecord): { promiseTimeout?: NodeJS.Timeout; resolutionTimeout?: NodeJS.Timeout } {
579
+ if (!record.message.expiration) {
580
+ return {};
581
+ }
582
+
583
+ return {
584
+ promiseTimeout: setTimeout(
585
+ () => this.handleExpiration(record.messageHash),
586
+ record.message.expiration - Date.now()
587
+ ),
588
+ resolutionTimeout: setTimeout(
589
+ () => this.resolveWithPeers(record.messageHash),
590
+ record.message.expiration + 5000 - Date.now()
591
+ )
592
+ };
593
+ }
594
+
595
+ private hasConflict(record: ClusterRecord): boolean {
596
+ const now = Date.now();
597
+ const staleThresholdMs = 2000; // 2 seconds - allow more time for distributed consensus
598
+
599
+ const incomingBlockIds = this.getAffectedBlockIds(record.message.operations);
600
+ log('cluster-member:hasConflict-check', {
601
+ messageHash: record.messageHash,
602
+ activeCount: this.activeTransactions.size,
603
+ incomingBlockIds
604
+ });
605
+
606
+ for (const [existingHash, state] of Array.from(this.activeTransactions.entries())) {
607
+ if (existingHash === record.messageHash) {
608
+ continue;
609
+ }
610
+
611
+ const existingBlockIds = this.getAffectedBlockIds(state.record.message.operations);
612
+ log('cluster-member:hasConflict-compare', {
613
+ existing: existingHash,
614
+ incoming: record.messageHash,
615
+ existingBlockIds,
616
+ incomingBlockIds
617
+ });
618
+
619
+ // Clean up stale transactions that have been around too long
620
+ if (now - state.lastUpdate > staleThresholdMs) {
621
+ log('cluster-member:stale-cleanup', {
622
+ messageHash: existingHash,
623
+ age: now - state.lastUpdate
624
+ });
625
+ this.clearTransaction(existingHash);
626
+ continue;
627
+ }
628
+
629
+ if (this.operationsConflict(state.record.message.operations, record.message.operations)) {
630
+ // Use race resolution to determine winner
631
+ const resolution = this.resolveRace(state.record, record);
632
+
633
+ if (resolution === 'keep-existing') {
634
+ log('cluster-member:race-keep-existing', {
635
+ existing: existingHash,
636
+ incoming: record.messageHash
637
+ });
638
+ return true; // Reject incoming
639
+ } else {
640
+ // Accept incoming, abort existing
641
+ log('cluster-member:race-accept-incoming', {
642
+ existing: existingHash,
643
+ incoming: record.messageHash
644
+ });
645
+ this.clearTransaction(existingHash);
646
+ continue; // Check other conflicts
647
+ }
648
+ }
649
+ }
650
+
651
+ return false; // No blocking conflicts
652
+ }
653
+
654
+ /**
655
+ * Resolve race between two conflicting transactions.
656
+ * Transaction with more promises wins. If tied, higher hash wins.
657
+ */
658
+ private resolveRace(existing: ClusterRecord, incoming: ClusterRecord): 'keep-existing' | 'accept-incoming' {
659
+ const existingCount = Object.keys(existing.promises).length;
660
+ const incomingCount = Object.keys(incoming.promises).length;
661
+
662
+ // Transaction with more promises wins
663
+ if (existingCount > incomingCount) {
664
+ return 'keep-existing';
665
+ }
666
+ if (incomingCount > existingCount) {
667
+ return 'accept-incoming';
668
+ }
669
+
670
+ // Tie-breaker: higher message hash wins (deterministic)
671
+ return existing.messageHash > incoming.messageHash ? 'keep-existing' : 'accept-incoming';
672
+ }
673
+
674
+ private operationsConflict(ops1: RepoMessage['operations'], ops2: RepoMessage['operations']): boolean {
675
+ // Check if one is a commit for the same action as a pend - these don't conflict
676
+ const actionId1 = this.getActionId(ops1);
677
+ const actionId2 = this.getActionId(ops2);
678
+ if (actionId1 && actionId2 && actionId1 === actionId2) {
679
+ // Same action - commit is resolving the pend, not conflicting
680
+ return false;
681
+ }
682
+
683
+ const blocks1 = new Set(this.getAffectedBlockIds(ops1));
684
+ const blocks2 = new Set(this.getAffectedBlockIds(ops2));
685
+
686
+ for (const block of Array.from(blocks1)) {
687
+ if (blocks2.has(block)) {
688
+ log('cluster-member:conflict-detected', {
689
+ blocks1: Array.from(blocks1),
690
+ blocks2: Array.from(blocks2),
691
+ conflictingBlock: block
692
+ });
693
+ return true;
694
+ }
695
+ }
696
+
697
+ return false;
698
+ }
699
+
700
+ private getActionId(operations: RepoMessage['operations']): string | undefined {
701
+ for (const operation of operations) {
702
+ if ('pend' in operation) {
703
+ return operation.pend.actionId;
704
+ } else if ('commit' in operation) {
705
+ return operation.commit.actionId;
706
+ } else if ('cancel' in operation) {
707
+ return operation.cancel.actionRef.actionId;
708
+ }
709
+ }
710
+ return undefined;
711
+ }
712
+
713
+ private getAffectedBlockIds(operations: RepoMessage['operations']): string[] {
714
+ const blockIds = new Set<string>();
715
+
716
+ for (const operation of operations) {
717
+ if ('get' in operation) {
718
+ operation.get.blockIds.forEach(id => blockIds.add(id));
719
+ } else if ('pend' in operation) {
720
+ // Use blockIdsForTransforms to correctly extract block IDs from Transforms structure
721
+ blockIdsForTransforms(operation.pend.transforms).forEach(id => blockIds.add(id));
722
+ } else if ('commit' in operation) {
723
+ operation.commit.blockIds.forEach(id => blockIds.add(id));
724
+ } else if ('cancel' in operation) {
725
+ operation.cancel.actionRef.blockIds.forEach(id => blockIds.add(id));
726
+ }
727
+ }
728
+
729
+ return Array.from(blockIds);
730
+ }
731
+
732
+ private async propagateIfNeeded(record: ClusterRecord): Promise<void> {
733
+ const promises = [];
734
+ for (const [peerId, peer] of Object.entries(record.peers)) {
735
+ if (peerId === this.peerId.toString()) continue;
736
+
737
+ try {
738
+ const client = ClusterClient.create(peerIdFromString(peerId), this.peerNetwork, this.protocolPrefix);
739
+ promises.push(client.update(record));
740
+ } catch (error) {
741
+ console.error(`Failed to propagate to peer ${peerId}:`, error);
742
+ }
743
+ }
744
+ await Promise.allSettled(promises);
745
+ }
746
+
747
+ private async handleExpiration(messageHash: string): Promise<void> {
748
+ const state = this.activeTransactions.get(messageHash);
749
+ if (!state) return;
750
+
751
+ if (!state.record.promises[this.peerId.toString()]) {
752
+ const signature: Signature = {
753
+ type: 'reject',
754
+ signature: 'rejected',
755
+ rejectReason: 'Transaction expired'
756
+ };
757
+
758
+ const updatedRecord = {
759
+ ...state.record,
760
+ promises: {
761
+ ...state.record.promises,
762
+ [this.peerId.toString()]: signature
763
+ }
764
+ };
765
+
766
+ this.activeTransactions.set(messageHash, {
767
+ ...state,
768
+ record: updatedRecord
769
+ });
770
+
771
+ await this.propagateIfNeeded(updatedRecord);
772
+ }
773
+ }
774
+
775
+ private async resolveWithPeers(messageHash: string): Promise<void> {
776
+ // This method is disabled - the coordinator handles all retry logic
777
+ // Keeping the skeleton in case we need peer-initiated recovery in the future
778
+ log('cluster-member:resolve-skipped', { messageHash, reason: 'coordinator-handles-retry' });
779
+ }
780
+
781
+ private queueExpiredTransactions(): void {
782
+ const now = Date.now();
783
+ for (const [messageHash, state] of Array.from(this.activeTransactions.entries())) {
784
+ if (state.record.message.expiration && state.record.message.expiration < now) {
785
+ this.cleanupQueue.push(messageHash);
786
+ }
787
+ }
788
+ // Also clean up old executed transaction records
789
+ const expirationThreshold = now - ExecutedTransactionTtlMs;
790
+ for (const [messageHash, executedAt] of Array.from(this.executedTransactions.entries())) {
791
+ if (executedAt < expirationThreshold) {
792
+ this.executedTransactions.delete(messageHash);
793
+ }
794
+ }
795
+ }
796
+
797
+ private async processCleanupQueue(): Promise<void> {
798
+ while (this.cleanupQueue.length > 0) {
799
+ const messageHash = this.cleanupQueue.shift();
800
+ if (!messageHash) continue;
801
+
802
+ const state = this.activeTransactions.get(messageHash);
803
+ if (!state) continue;
804
+
805
+ const phase = await this.getTransactionPhase(state.record);
806
+ if (phase !== TransactionPhase.Consensus && phase !== TransactionPhase.Rejected) {
807
+ this.activeTransactions.delete(messageHash);
808
+ }
809
+ }
810
+ }
811
+
812
+ private hasLocalCommit(record: ClusterRecord): boolean {
813
+ const ourId = this.peerId.toString();
814
+ return Boolean(record.commits[ourId]);
815
+ }
816
+
817
+ private clearTransaction(messageHash: string): void {
818
+ const state = this.activeTransactions.get(messageHash);
819
+ if (!state) {
820
+ log('cluster-member:clear-miss', { messageHash });
821
+ return;
822
+ }
823
+ if (state.promiseTimeout) {
824
+ clearTimeout(state.promiseTimeout);
825
+ }
826
+ if (state.resolutionTimeout) {
827
+ clearTimeout(state.resolutionTimeout);
828
+ }
829
+ this.activeTransactions.delete(messageHash);
830
+ log('cluster-member:clear-done', {
831
+ messageHash,
832
+ remaining: Array.from(this.activeTransactions.keys())
833
+ });
834
+ }
835
+ }
836
+