@vuer-ai/vuer-rtc-server 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/.env +1 -0
  2. package/S3_COMPRESSION_GUIDE.md +233 -0
  3. package/dist/archive/ArchivalService.d.ts +117 -0
  4. package/dist/archive/ArchivalService.d.ts.map +1 -0
  5. package/dist/archive/ArchivalService.js +181 -0
  6. package/dist/archive/ArchivalService.js.map +1 -0
  7. package/dist/broker/InMemoryBroker.d.ts +2 -0
  8. package/dist/broker/InMemoryBroker.d.ts.map +1 -1
  9. package/dist/broker/InMemoryBroker.js +4 -0
  10. package/dist/broker/InMemoryBroker.js.map +1 -1
  11. package/dist/compression/CompressionUtils.d.ts +57 -0
  12. package/dist/compression/CompressionUtils.d.ts.map +1 -0
  13. package/dist/compression/CompressionUtils.js +90 -0
  14. package/dist/compression/CompressionUtils.js.map +1 -0
  15. package/dist/compression/index.d.ts +7 -0
  16. package/dist/compression/index.d.ts.map +1 -0
  17. package/dist/compression/index.js +7 -0
  18. package/dist/compression/index.js.map +1 -0
  19. package/dist/journal/CoalescingService.d.ts +63 -0
  20. package/dist/journal/CoalescingService.d.ts.map +1 -0
  21. package/dist/journal/CoalescingService.js +507 -0
  22. package/dist/journal/CoalescingService.js.map +1 -0
  23. package/dist/journal/JournalRLE.d.ts +81 -0
  24. package/dist/journal/JournalRLE.d.ts.map +1 -0
  25. package/dist/journal/JournalRLE.js +199 -0
  26. package/dist/journal/JournalRLE.js.map +1 -0
  27. package/dist/journal/JournalService.d.ts +7 -3
  28. package/dist/journal/JournalService.d.ts.map +1 -1
  29. package/dist/journal/JournalService.js +152 -12
  30. package/dist/journal/JournalService.js.map +1 -1
  31. package/dist/journal/RLECompression.d.ts +73 -0
  32. package/dist/journal/RLECompression.d.ts.map +1 -0
  33. package/dist/journal/RLECompression.js +152 -0
  34. package/dist/journal/RLECompression.js.map +1 -0
  35. package/dist/journal/rle-demo.d.ts +8 -0
  36. package/dist/journal/rle-demo.d.ts.map +1 -0
  37. package/dist/journal/rle-demo.js +159 -0
  38. package/dist/journal/rle-demo.js.map +1 -0
  39. package/dist/persistence/S3ColdStorage.d.ts +62 -0
  40. package/dist/persistence/S3ColdStorage.d.ts.map +1 -0
  41. package/dist/persistence/S3ColdStorage.js +88 -0
  42. package/dist/persistence/S3ColdStorage.js.map +1 -0
  43. package/dist/persistence/S3ColdStorageIntegration.d.ts +78 -0
  44. package/dist/persistence/S3ColdStorageIntegration.d.ts.map +1 -0
  45. package/dist/persistence/S3ColdStorageIntegration.js +93 -0
  46. package/dist/persistence/S3ColdStorageIntegration.js.map +1 -0
  47. package/dist/serve.d.ts +2 -0
  48. package/dist/serve.d.ts.map +1 -1
  49. package/dist/serve.js +623 -15
  50. package/dist/serve.js.map +1 -1
  51. package/docs/RLE_COMPRESSION.md +397 -0
  52. package/examples/compression-example.ts +259 -0
  53. package/package.json +14 -14
  54. package/src/archive/ArchivalService.ts +250 -0
  55. package/src/broker/InMemoryBroker.ts +5 -0
  56. package/src/compression/CompressionUtils.ts +113 -0
  57. package/src/compression/index.ts +14 -0
  58. package/src/journal/COALESCING.md +267 -0
  59. package/src/journal/CoalescingService.ts +626 -0
  60. package/src/journal/JournalRLE.ts +265 -0
  61. package/src/journal/JournalService.ts +163 -11
  62. package/src/journal/RLECompression.ts +210 -0
  63. package/src/journal/rle-demo.ts +193 -0
  64. package/src/serve.ts +702 -15
  65. package/tests/benchmark/journal-optimization-benchmark.test.ts +482 -0
  66. package/tests/compression/compression.test.ts +343 -0
  67. package/tests/integration/repositories.test.ts +89 -0
  68. package/tests/journal/compaction-load-bug.test.ts +409 -0
  69. package/tests/journal/compaction.test.ts +42 -2
  70. package/tests/journal/journal-rle.test.ts +511 -0
  71. package/tests/journal/lww-ordering-bug.test.ts +248 -0
  72. package/tests/journal/multi-session-coalescing.test.ts +871 -0
  73. package/tests/journal/rle-compression.test.ts +526 -0
  74. package/tests/journal/text-coalescing.test.ts +210 -0
  75. package/tests/unit/s3-compression.test.ts +257 -0
  76. package/PHASE1_SUMMARY.md +0 -94
@@ -0,0 +1,265 @@
1
+ /**
2
+ * Run-Length Encoding (RLE) for Journal Storage
3
+ *
4
+ * Reduces journal storage size by:
5
+ * 1. Run-length encoding consecutive operations from the same agent
6
+ * 2. Storing agent ID only when it changes
7
+ * 3. Grouping consecutive operations into compact sequences
8
+ *
9
+ * Design:
10
+ * - Encodes a sequence of CRDTMessages into RLE-compressed format
11
+ * - Preserves all CRDT semantics (causality, ordering, timestamps)
12
+ * - Includes metadata for safe decompression
13
+ */
14
+
15
+ import type { CRDTMessage } from '@vuer-ai/vuer-rtc';
16
+
17
+ /**
18
+ * Compressed operation segment - groups consecutive ops from same agent
19
+ */
20
+ export interface RLESegment {
21
+ agentId: string; // sessionId from first message in run
22
+ count: number; // Number of messages in this run
23
+ messages: CRDTMessage[]; // The actual messages (compressed payload)
24
+ }
25
+
26
+ /**
27
+ * RLE-encoded journal format
28
+ */
29
+ export interface RLEEncodedJournal {
30
+ version: 1;
31
+ totalMessages: number; // Original count before compression
32
+ segments: RLESegment[];
33
+ metadata: {
34
+ compressionRatio: number; // (originalSize / compressedSize)
35
+ originalSize: number; // Estimated bytes of JSON
36
+ compressedSize: number; // Estimated bytes of RLE
37
+ };
38
+ }
39
+
40
+ /**
41
+ * Encode a sequence of CRDTMessages using RLE
42
+ *
43
+ * Groups consecutive messages from the same sessionId into segments.
44
+ * Each segment stores the sessionId once and all messages in that run.
45
+ */
46
+ export function encodeJournalRLE(messages: CRDTMessage[]): RLEEncodedJournal {
47
+ if (messages.length === 0) {
48
+ return {
49
+ version: 1,
50
+ totalMessages: 0,
51
+ segments: [],
52
+ metadata: {
53
+ compressionRatio: 1,
54
+ originalSize: 0,
55
+ compressedSize: 0,
56
+ },
57
+ };
58
+ }
59
+
60
+ const segments: RLESegment[] = [];
61
+ let currentSegment: RLESegment | null = null;
62
+
63
+ for (const msg of messages) {
64
+ const agentId = msg.sessionId;
65
+
66
+ // Start a new segment if agent changes
67
+ if (!currentSegment || currentSegment.agentId !== agentId) {
68
+ if (currentSegment) {
69
+ segments.push(currentSegment);
70
+ }
71
+ currentSegment = {
72
+ agentId,
73
+ count: 1,
74
+ messages: [msg],
75
+ };
76
+ } else {
77
+ // Continue current segment
78
+ currentSegment.count++;
79
+ currentSegment.messages.push(msg);
80
+ }
81
+ }
82
+
83
+ // Push final segment
84
+ if (currentSegment) {
85
+ segments.push(currentSegment);
86
+ }
87
+
88
+ // Estimate compression ratio
89
+ const originalJson = JSON.stringify(messages);
90
+ const compressedJson = JSON.stringify(segments);
91
+ const originalSize = originalJson.length;
92
+ const compressedSize = compressedJson.length;
93
+
94
+ return {
95
+ version: 1,
96
+ totalMessages: messages.length,
97
+ segments,
98
+ metadata: {
99
+ compressionRatio: originalSize > 0 ? originalSize / compressedSize : 1,
100
+ originalSize,
101
+ compressedSize,
102
+ },
103
+ };
104
+ }
105
+
106
+ /**
107
+ * Decode an RLE-encoded journal back to sequential CRDTMessages
108
+ *
109
+ * Reverses the encoding process, recovering all original messages
110
+ * in their exact original order and form.
111
+ */
112
+ export function decodeJournalRLE(encoded: RLEEncodedJournal): CRDTMessage[] {
113
+ const messages: CRDTMessage[] = [];
114
+
115
+ for (const segment of encoded.segments) {
116
+ // Verify all messages in segment have correct sessionId
117
+ for (const msg of segment.messages) {
118
+ if (msg.sessionId !== segment.agentId) {
119
+ throw new Error(
120
+ `RLE decode error: sessionId mismatch in segment. ` +
121
+ `Expected ${segment.agentId}, got ${msg.sessionId}`
122
+ );
123
+ }
124
+ messages.push(msg);
125
+ }
126
+ }
127
+
128
+ // Sanity check: verify we got back the right count
129
+ if (messages.length !== encoded.totalMessages) {
130
+ throw new Error(
131
+ `RLE decode error: expected ${encoded.totalMessages} messages, ` +
132
+ `got ${messages.length}`
133
+ );
134
+ }
135
+
136
+ return messages;
137
+ }
138
+
139
+ /**
140
+ * Verify RLE encoding preserves CRDT semantics
141
+ *
142
+ * Checks:
143
+ * 1. All messages decode correctly
144
+ * 2. Message ordering preserved
145
+ * 3. Vector clocks unchanged
146
+ * 4. Operations unchanged
147
+ * 5. Causal relationships maintained
148
+ */
149
+ export function verifyRLEIntegrity(
150
+ original: CRDTMessage[],
151
+ encoded: RLEEncodedJournal
152
+ ): { valid: boolean; errors: string[] } {
153
+ const errors: string[] = [];
154
+
155
+ // Check total count
156
+ if (encoded.totalMessages !== original.length) {
157
+ errors.push(
158
+ `Message count mismatch: expected ${original.length}, got ${encoded.totalMessages}`
159
+ );
160
+ }
161
+
162
+ // Decode and check order preservation
163
+ let decoded: CRDTMessage[] = [];
164
+ try {
165
+ decoded = decodeJournalRLE(encoded);
166
+ } catch (err) {
167
+ errors.push(`Failed to decode: ${(err as Error).message}`);
168
+ return { valid: false, errors };
169
+ }
170
+
171
+ if (decoded.length !== original.length) {
172
+ errors.push(
173
+ `Decoded length mismatch: expected ${original.length}, got ${decoded.length}`
174
+ );
175
+ }
176
+
177
+ // Check each message
178
+ for (let i = 0; i < Math.min(original.length, decoded.length); i++) {
179
+ const orig = original[i];
180
+ const dec = decoded[i];
181
+
182
+ // Check identity
183
+ if (orig.id !== dec.id) {
184
+ errors.push(`Message ${i}: id mismatch (${orig.id} vs ${dec.id})`);
185
+ }
186
+
187
+ // Check sessionId
188
+ if (orig.sessionId !== dec.sessionId) {
189
+ errors.push(`Message ${i}: sessionId mismatch`);
190
+ }
191
+
192
+ // Check vector clock (deep equality)
193
+ const origClockKeys = Object.keys(orig.clock).sort();
194
+ const decClockKeys = Object.keys(dec.clock).sort();
195
+ if (origClockKeys.length !== decClockKeys.length ||
196
+ !origClockKeys.every((k, idx) => decClockKeys[idx] === k)) {
197
+ errors.push(`Message ${i}: vector clock structure mismatch`);
198
+ } else {
199
+ for (const key of origClockKeys) {
200
+ if (orig.clock[key] !== dec.clock[key]) {
201
+ errors.push(`Message ${i}: vector clock[${key}] mismatch`);
202
+ }
203
+ }
204
+ }
205
+
206
+ // Check lamport time
207
+ if (orig.lamportTime !== dec.lamportTime) {
208
+ errors.push(`Message ${i}: lamportTime mismatch`);
209
+ }
210
+
211
+ // Check timestamp
212
+ if (orig.timestamp !== dec.timestamp) {
213
+ errors.push(`Message ${i}: timestamp mismatch`);
214
+ }
215
+
216
+ // Check operations (deep equality)
217
+ const origOpsJson = JSON.stringify(orig.ops);
218
+ const decOpsJson = JSON.stringify(dec.ops);
219
+ if (origOpsJson !== decOpsJson) {
220
+ errors.push(`Message ${i}: operations mismatch`);
221
+ }
222
+ }
223
+
224
+ return {
225
+ valid: errors.length === 0,
226
+ errors,
227
+ };
228
+ }
229
+
230
+ /**
231
+ * Measure compression statistics
232
+ *
233
+ * Provides:
234
+ * - Compression ratio (original / compressed)
235
+ * - Space savings in bytes
236
+ * - Segment distribution
237
+ */
238
+ export function getCompressionStats(encoded: RLEEncodedJournal): {
239
+ ratio: number;
240
+ percentSaved: number;
241
+ originalBytes: number;
242
+ compressedBytes: number;
243
+ savedBytes: number;
244
+ segmentCount: number;
245
+ avgMessagesPerSegment: number;
246
+ } {
247
+ const { originalSize, compressedSize, compressionRatio } = encoded.metadata;
248
+ const savedBytes = originalSize - compressedSize;
249
+ const percentSaved = originalSize > 0
250
+ ? (savedBytes / originalSize) * 100
251
+ : 0;
252
+
253
+ return {
254
+ ratio: compressionRatio,
255
+ percentSaved,
256
+ originalBytes: originalSize,
257
+ compressedBytes: compressedSize,
258
+ savedBytes,
259
+ segmentCount: encoded.segments.length,
260
+ avgMessagesPerSegment:
261
+ encoded.totalMessages > 0
262
+ ? encoded.totalMessages / encoded.segments.length
263
+ : 0,
264
+ };
265
+ }
@@ -13,16 +13,48 @@ import type { PrismaClient, Document } from '@prisma/client';
13
13
  import {
14
14
  type CRDTMessage,
15
15
  type SceneGraph,
16
+ type SceneNode,
16
17
  type Snapshot,
17
18
  type VectorClock,
18
19
  applyMessage,
19
20
  createEmptyGraph,
20
21
  OperationValidator,
22
+ TextRope,
23
+ compactRope,
24
+ toRaw,
25
+ fromRaw,
21
26
  } from '@vuer-ai/vuer-rtc';
22
27
 
23
28
  import { JournalRepository } from './JournalRepository.js';
24
29
  import { DocumentRepository } from '../persistence/DocumentRepository.js';
25
30
 
31
+ /**
32
+ * Safely serialize an object, handling circular references by removing them.
33
+ * Also strips 'parent' references which cause cycles in tree structures.
34
+ * Properly serializes TextRope instances using toRaw().
35
+ */
36
+ function safeSerialize(obj: unknown): unknown {
37
+ const seen = new WeakSet();
38
+ return JSON.parse(JSON.stringify(obj, (key, value) => {
39
+ // Skip parent references which cause cycles
40
+ if (key === 'parent') return undefined;
41
+
42
+ // Properly serialize TextRope instances using toRaw()
43
+ if (value instanceof TextRope) {
44
+ return {
45
+ _textRope: true,
46
+ raw: toRaw(value),
47
+ };
48
+ }
49
+
50
+ if (typeof value === 'object' && value !== null) {
51
+ if (seen.has(value)) return undefined; // Circular reference
52
+ seen.add(value);
53
+ }
54
+ return value;
55
+ }));
56
+ }
57
+
26
58
  /** How often the compaction loop runs (ms). */
27
59
  const COMPACTION_INTERVAL_MS = 30_000;
28
60
 
@@ -39,20 +71,94 @@ export interface DocumentState {
39
71
  journal: JournalEntry[];
40
72
  }
41
73
 
74
+ /**
75
+ * Recursively walk an object and restore TextRope instances from their raw form.
76
+ */
77
+ function restoreTextRopes(obj: any): any {
78
+ if (obj === null || obj === undefined) return obj;
79
+
80
+ // Check if this is a serialized TextRope
81
+ if (typeof obj === 'object' && obj._textRope === true && obj.raw) {
82
+ return fromRaw(obj.raw);
83
+ }
84
+
85
+ // Recursively process arrays
86
+ if (Array.isArray(obj)) {
87
+ return obj.map(restoreTextRopes);
88
+ }
89
+
90
+ // Recursively process objects
91
+ if (typeof obj === 'object') {
92
+ const result: any = {};
93
+ for (const [key, value] of Object.entries(obj)) {
94
+ result[key] = restoreTextRopes(value);
95
+ }
96
+ return result;
97
+ }
98
+
99
+ return obj;
100
+ }
101
+
42
102
  /**
43
103
  * Safely parse a Document.currentState (Json) into a Snapshot,
44
104
  * providing defaults for any missing fields.
105
+ * Restores TextRope instances from their serialized raw form.
45
106
  */
46
107
  function parseSnapshot(currentState: unknown): Snapshot {
47
108
  const raw = (currentState ?? {}) as Record<string, unknown>;
109
+
110
+ // Restore TextRope instances in the graph
111
+ const graph = restoreTextRopes(raw.graph as SceneGraph) || createEmptyGraph();
112
+
48
113
  return {
49
- graph: (raw.graph as SceneGraph) || createEmptyGraph(),
114
+ graph,
50
115
  vectorClock: (raw.vectorClock as Record<string, number>) || {},
51
116
  lamportTime: (typeof raw.lamportTime === 'number' ? raw.lamportTime : 0),
52
117
  journalIndex: (typeof raw.journalIndex === 'number' ? raw.journalIndex : 0),
53
118
  };
54
119
  }
55
120
 
121
+ /**
122
+ * Compact all TextRope instances in a SceneGraph by stripping tombstones
123
+ * and merging adjacent spans from the same agent. Returns a new graph
124
+ * with compacted ropes (the original is not mutated).
125
+ *
126
+ * This is critical for preventing B-tree depth explosion from single-char inserts.
127
+ */
128
+ function compactTextRopes(graph: SceneGraph): SceneGraph {
129
+ const nodes: Record<string, SceneNode> = {};
130
+ let anyChanged = false;
131
+
132
+ for (const key of Object.keys(graph.nodes)) {
133
+ const node = graph.nodes[key];
134
+ let nodeChanged = false;
135
+ let cloned: SceneNode | null = null;
136
+
137
+ for (const prop of Object.keys(node)) {
138
+ if (node[prop] instanceof TextRope) {
139
+ if (!cloned) {
140
+ cloned = {
141
+ ...node,
142
+ children: [...(node.children ?? [])],
143
+ };
144
+ }
145
+ cloned[prop] = compactRope(node[prop] as TextRope);
146
+ nodeChanged = true;
147
+ }
148
+ }
149
+
150
+ if (nodeChanged && cloned) {
151
+ nodes[key] = cloned;
152
+ anyChanged = true;
153
+ } else {
154
+ nodes[key] = node;
155
+ }
156
+ }
157
+
158
+ if (!anyChanged) return graph;
159
+ return { ...graph, nodes };
160
+ }
161
+
56
162
  export class JournalService {
57
163
  private journalRepo: JournalRepository;
58
164
  private documentRepo: DocumentRepository;
@@ -300,9 +406,13 @@ export class JournalService {
300
406
  /**
301
407
  * Get state for new client (snapshot + only post-snapshot journal entries).
302
408
  *
303
- * After compaction the in-memory journal should already contain only
304
- * post-snapshot entries, but we apply an explicit lamportTime filter
305
- * as a safety check for the DB-reload path.
409
+ * After compaction, we need to filter journal entries that are already
410
+ * baked into the snapshot. We use vector clock comparison (not lamportTime)
411
+ * to correctly handle out-of-order or delayed messages.
412
+ *
413
+ * A message is included if ANY component of its vector clock is greater
414
+ * than the corresponding component in the snapshot's vector clock.
415
+ * This matches the client-side filtering logic in initFromServer().
306
416
  */
307
417
  async getStateForClient(documentId: string): Promise<{
308
418
  snapshot: Snapshot;
@@ -311,9 +421,18 @@ export class JournalService {
311
421
  const state = await this.loadDocument(documentId);
312
422
  if (!state) return null;
313
423
 
314
- // Only return entries after the snapshot's lamport time
424
+ // Filter journal entries using vector clock comparison (not lamportTime)
425
+ // to handle out-of-order messages correctly after compaction
315
426
  const postSnapshotJournal = state.journal
316
- .filter((e) => e.msg.lamportTime > state.snapshot.lamportTime)
427
+ .filter((e) => {
428
+ // Include message if ANY session in its clock is ahead of snapshot
429
+ for (const [sessionId, time] of Object.entries(e.msg.clock)) {
430
+ if (time > (state.snapshot.vectorClock[sessionId] ?? 0)) {
431
+ return true;
432
+ }
433
+ }
434
+ return false; // All clock components <= snapshot, already applied
435
+ })
317
436
  .map((e) => e.msg);
318
437
 
319
438
  return {
@@ -388,6 +507,15 @@ export class JournalService {
388
507
  .slice(0, compactUpToIndex + 1)
389
508
  .map((e) => e.msg.id);
390
509
 
510
+ // Compact TextRope instances before creating snapshot
511
+ // This merges single-char items into multi-char spans, preventing B-tree depth explosion
512
+ try {
513
+ newGraph = compactTextRopes(newGraph);
514
+ } catch (err: any) {
515
+ console.error(`[compact] TextRope compaction failed for doc ${documentId}:`, err);
516
+ throw new Error(`TextRope compaction failed: ${err.message}`);
517
+ }
518
+
391
519
  // Update snapshot
392
520
  state.snapshot = {
393
521
  graph: newGraph,
@@ -399,14 +527,38 @@ export class JournalService {
399
527
  // Remove compacted entries from in-memory journal
400
528
  state.journal = state.journal.slice(compactUpToIndex + 1);
401
529
 
402
- // Persist snapshot
403
- await this.documentRepo.update(documentId, {
404
- currentState: state.snapshot as any,
405
- });
530
+ // Persist snapshot (sanitize to break circular refs from parent pointers)
531
+ let sanitizedSnapshot: unknown;
532
+ try {
533
+ sanitizedSnapshot = safeSerialize(state.snapshot);
534
+ } catch (err: any) {
535
+ console.error(`[compact] Snapshot serialization failed for doc ${documentId}:`, err);
536
+ throw new Error(`Snapshot serialization failed: ${err.message}`);
537
+ }
538
+
539
+ try {
540
+ await this.documentRepo.update(documentId, {
541
+ currentState: sanitizedSnapshot as any,
542
+ });
543
+ } catch (err: any) {
544
+ console.error(`[compact] Document update failed for doc ${documentId}:`, err);
545
+ // If document was deleted, this is not a fatal error - just clean up journal
546
+ if (err?.code === 'P2025') {
547
+ console.warn(`[compact] Document ${documentId} not found, skipping snapshot update`);
548
+ } else {
549
+ throw new Error(`Document update failed: ${err.message}`);
550
+ }
551
+ }
406
552
 
407
553
  // Delete compacted entries from DB by their batchIds
408
554
  if (compactedBatchIds.length > 0) {
409
- await this.journalRepo.deleteByIds(documentId, compactedBatchIds);
555
+ try {
556
+ const deletedCount = await this.journalRepo.deleteByIds(documentId, compactedBatchIds);
557
+ console.log(`[compact] Deleted ${deletedCount} journal batches for doc ${documentId}`);
558
+ } catch (err: any) {
559
+ console.error(`[compact] Journal batch deletion failed for doc ${documentId}:`, err);
560
+ throw new Error(`Journal batch deletion failed: ${err.message}`);
561
+ }
410
562
  }
411
563
  } finally {
412
564
  this.compactionLocks.delete(documentId);
@@ -0,0 +1,210 @@
1
+ /**
2
+ * RLE (Run-Length Encoding) Compression for Journal Storage
3
+ *
4
+ * Optimizes journal storage by:
5
+ * 1. Run-length encoding consecutive operations (e.g., 10 sequential edits from same agent)
6
+ * 2. Only storing agent/sessionId when it changes
7
+ * 3. Preserving CRDT semantics (no operations are combined or lost)
8
+ *
9
+ * Format: { sessionId, count: N, ops: [op1, op2, ...] }
10
+ * - sessionId is stored with first op of each run
11
+ * - count tracks consecutive ops from same agent
12
+ * - ops are stored as-is (no merging/combining)
13
+ */
14
+
15
+ import type { CRDTMessage } from '@vuer-ai/vuer-rtc';
16
+
17
+ /**
18
+ * A run-length encoded journal entry
19
+ */
20
+ export interface RLEJournalEntry {
21
+ sessionId: string; // Agent/session that performed this run
22
+ count: number; // Number of consecutive operations from this session
23
+ lamportTime: number; // Start lamport time of this run
24
+ endLamportTime: number; // End lamport time (start + count - 1)
25
+ ops: any[]; // Operations in this run (one per index)
26
+ timestamp: number; // Wall-clock time
27
+ }
28
+
29
+ /**
30
+ * Encode consecutive operations from the same sessionId using RLE.
31
+ * Returns a list of RLE entries where consecutive ops from same session are grouped.
32
+ *
33
+ * Example:
34
+ * Input: [msg1(sid=A), msg2(sid=A), msg3(sid=B), msg4(sid=B), msg5(sid=B)]
35
+ * Output: [
36
+ * { sessionId: A, count: 2, ops: [msg1.ops[0], msg2.ops[0]], ... },
37
+ * { sessionId: B, count: 3, ops: [msg3.ops[0], msg4.ops[0], msg5.ops[0]], ... }
38
+ * ]
39
+ */
40
+ export function encodeRLE(messages: CRDTMessage[]): RLEJournalEntry[] {
41
+ if (messages.length === 0) return [];
42
+
43
+ const encoded: RLEJournalEntry[] = [];
44
+ let currentRun: RLEJournalEntry | null = null;
45
+
46
+ for (const msg of messages) {
47
+ if (!currentRun || currentRun.sessionId !== msg.sessionId) {
48
+ // Start new run
49
+ if (currentRun) {
50
+ encoded.push(currentRun);
51
+ }
52
+ currentRun = {
53
+ sessionId: msg.sessionId,
54
+ count: 1,
55
+ lamportTime: msg.lamportTime,
56
+ endLamportTime: msg.lamportTime,
57
+ ops: [...msg.ops],
58
+ timestamp: msg.timestamp,
59
+ };
60
+ } else {
61
+ // Extend current run
62
+ currentRun.count++;
63
+ currentRun.endLamportTime = msg.lamportTime;
64
+ currentRun.ops.push(...msg.ops);
65
+ }
66
+ }
67
+
68
+ // Don't forget the last run
69
+ if (currentRun) {
70
+ encoded.push(currentRun);
71
+ }
72
+
73
+ return encoded;
74
+ }
75
+
76
+ /**
77
+ * Decode RLE-encoded entries back into original messages.
78
+ * Reconstructs each original CRDTMessage from the RLE entry's operation stream.
79
+ *
80
+ * Note: We assume operations are stored sequentially in the RLE entry,
81
+ * with each original message's operations grouped together.
82
+ * This requires coordination with the encoding to track message boundaries.
83
+ */
84
+ export function decodeRLE(
85
+ entries: RLEJournalEntry[],
86
+ opCountPerMessage: number[] // Array indicating how many ops per message
87
+ ): CRDTMessage[] {
88
+ const messages: CRDTMessage[] = [];
89
+ let globalOpIndex = 0;
90
+ let messageIdx = 0;
91
+
92
+ for (const entry of entries) {
93
+ let lamportTime = entry.lamportTime;
94
+ let entryOpIndex = 0;
95
+
96
+ for (let i = 0; i < entry.count; i++) {
97
+ // Get operations for this message using the metadata array
98
+ const opCount = opCountPerMessage[messageIdx] || 1;
99
+ const opsForMsg = entry.ops.slice(entryOpIndex, entryOpIndex + opCount);
100
+ entryOpIndex += opCount;
101
+ globalOpIndex += opCount;
102
+ messageIdx++;
103
+
104
+ // Reconstruct message
105
+ messages.push({
106
+ id: `msg-${lamportTime}`, // Note: Original IDs are lost; this is a limitation
107
+ sessionId: entry.sessionId,
108
+ clock: {}, // Vector clock info is lost in current RLE format
109
+ lamportTime,
110
+ timestamp: entry.timestamp,
111
+ ops: opsForMsg,
112
+ });
113
+
114
+ lamportTime++;
115
+ }
116
+ }
117
+
118
+ return messages;
119
+ }
120
+
121
+ /**
122
+ * Encode messages with metadata tracking for full reconstruction.
123
+ * This enhanced version preserves more information to allow perfect round-tripping.
124
+ *
125
+ * Returns both RLE entries and metadata needed for decoding.
126
+ */
127
+ export interface RLEEncodedWithMetadata {
128
+ entries: RLEJournalEntry[];
129
+ // For each original message, track how many ops it had
130
+ messageOpsCount: number[];
131
+ // Map lamport time to original message ID for deduplication
132
+ messageIds: Record<number, string>;
133
+ // Vector clocks per message
134
+ vectorClocks: Record<number, Record<string, number>>;
135
+ }
136
+
137
+ export function encodeRLEWithMetadata(
138
+ messages: CRDTMessage[]
139
+ ): RLEEncodedWithMetadata {
140
+ const encoded = encodeRLE(messages);
141
+ const messageOpsCount = messages.map((m) => m.ops.length);
142
+ const messageIds: Record<number, string> = {};
143
+ const vectorClocks: Record<number, Record<string, number>> = {};
144
+
145
+ for (const msg of messages) {
146
+ messageIds[msg.lamportTime] = msg.id;
147
+ vectorClocks[msg.lamportTime] = msg.clock;
148
+ }
149
+
150
+ return {
151
+ entries: encoded,
152
+ messageOpsCount,
153
+ messageIds,
154
+ vectorClocks,
155
+ };
156
+ }
157
+
158
+ /**
159
+ * Decode with full metadata recovery for perfect round-tripping.
160
+ */
161
+ export function decodeRLEWithMetadata(
162
+ encoded: RLEEncodedWithMetadata
163
+ ): CRDTMessage[] {
164
+ const messages: CRDTMessage[] = [];
165
+ let messageIdx = 0;
166
+
167
+ for (const entry of encoded.entries) {
168
+ let lamportTime = entry.lamportTime;
169
+ let entryOpIndex = 0;
170
+
171
+ for (let i = 0; i < entry.count; i++) {
172
+ const opCount = encoded.messageOpsCount[messageIdx] || 1;
173
+ const opsForMsg = entry.ops.slice(entryOpIndex, entryOpIndex + opCount);
174
+ entryOpIndex += opCount;
175
+ messageIdx++;
176
+
177
+ messages.push({
178
+ id: encoded.messageIds[lamportTime] || `msg-${lamportTime}`,
179
+ sessionId: entry.sessionId,
180
+ clock: encoded.vectorClocks[lamportTime] || {},
181
+ lamportTime,
182
+ timestamp: entry.timestamp,
183
+ ops: opsForMsg,
184
+ });
185
+
186
+ lamportTime++;
187
+ }
188
+ }
189
+
190
+ return messages;
191
+ }
192
+
193
+ /**
194
+ * Calculate compression ratio: how much space is saved.
195
+ * Returns { original: bytes, encoded: bytes, ratio: 0.0-1.0 }
196
+ */
197
+ export function calculateCompressionRatio(
198
+ original: CRDTMessage[],
199
+ encoded: RLEJournalEntry[]
200
+ ): { original: number; encoded: number; ratio: number } {
201
+ const originalBytes = JSON.stringify(original).length;
202
+ const encodedBytes = JSON.stringify(encoded).length;
203
+ const ratio = (originalBytes - encodedBytes) / originalBytes;
204
+
205
+ return {
206
+ original: originalBytes,
207
+ encoded: encodedBytes,
208
+ ratio,
209
+ };
210
+ }