npm - @vuer-ai/vuer-rtc-server - Versions diffs - 0.2.0 → 0.2.2 - Mend

@vuer-ai/vuer-rtc-server 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

package/.env +1 -0
package/S3_COMPRESSION_GUIDE.md +233 -0
package/dist/archive/ArchivalService.d.ts +117 -0
package/dist/archive/ArchivalService.d.ts.map +1 -0
package/dist/archive/ArchivalService.js +181 -0
package/dist/archive/ArchivalService.js.map +1 -0
package/dist/broker/InMemoryBroker.d.ts +2 -0
package/dist/broker/InMemoryBroker.d.ts.map +1 -1
package/dist/broker/InMemoryBroker.js +4 -0
package/dist/broker/InMemoryBroker.js.map +1 -1
package/dist/compression/CompressionUtils.d.ts +57 -0
package/dist/compression/CompressionUtils.d.ts.map +1 -0
package/dist/compression/CompressionUtils.js +90 -0
package/dist/compression/CompressionUtils.js.map +1 -0
package/dist/compression/index.d.ts +7 -0
package/dist/compression/index.d.ts.map +1 -0
package/dist/compression/index.js +7 -0
package/dist/compression/index.js.map +1 -0
package/dist/journal/CoalescingService.d.ts +63 -0
package/dist/journal/CoalescingService.d.ts.map +1 -0
package/dist/journal/CoalescingService.js +507 -0
package/dist/journal/CoalescingService.js.map +1 -0
package/dist/journal/JournalRLE.d.ts +81 -0
package/dist/journal/JournalRLE.d.ts.map +1 -0
package/dist/journal/JournalRLE.js +199 -0
package/dist/journal/JournalRLE.js.map +1 -0
package/dist/journal/JournalService.d.ts +7 -3
package/dist/journal/JournalService.d.ts.map +1 -1
package/dist/journal/JournalService.js +152 -12
package/dist/journal/JournalService.js.map +1 -1
package/dist/journal/RLECompression.d.ts +73 -0
package/dist/journal/RLECompression.d.ts.map +1 -0
package/dist/journal/RLECompression.js +152 -0
package/dist/journal/RLECompression.js.map +1 -0
package/dist/journal/rle-demo.d.ts +8 -0
package/dist/journal/rle-demo.d.ts.map +1 -0
package/dist/journal/rle-demo.js +159 -0
package/dist/journal/rle-demo.js.map +1 -0
package/dist/persistence/S3ColdStorage.d.ts +62 -0
package/dist/persistence/S3ColdStorage.d.ts.map +1 -0
package/dist/persistence/S3ColdStorage.js +88 -0
package/dist/persistence/S3ColdStorage.js.map +1 -0
package/dist/persistence/S3ColdStorageIntegration.d.ts +78 -0
package/dist/persistence/S3ColdStorageIntegration.d.ts.map +1 -0
package/dist/persistence/S3ColdStorageIntegration.js +93 -0
package/dist/persistence/S3ColdStorageIntegration.js.map +1 -0
package/dist/serve.d.ts +2 -0
package/dist/serve.d.ts.map +1 -1
package/dist/serve.js +623 -15
package/dist/serve.js.map +1 -1
package/docs/RLE_COMPRESSION.md +397 -0
package/examples/compression-example.ts +259 -0
package/package.json +14 -14
package/src/archive/ArchivalService.ts +250 -0
package/src/broker/InMemoryBroker.ts +5 -0
package/src/compression/CompressionUtils.ts +113 -0
package/src/compression/index.ts +14 -0
package/src/journal/COALESCING.md +267 -0
package/src/journal/CoalescingService.ts +626 -0
package/src/journal/JournalRLE.ts +265 -0
package/src/journal/JournalService.ts +163 -11
package/src/journal/RLECompression.ts +210 -0
package/src/journal/rle-demo.ts +193 -0
package/src/serve.ts +702 -15
package/tests/benchmark/journal-optimization-benchmark.test.ts +482 -0
package/tests/compression/compression.test.ts +343 -0
package/tests/integration/repositories.test.ts +89 -0
package/tests/journal/compaction-load-bug.test.ts +409 -0
package/tests/journal/compaction.test.ts +42 -2
package/tests/journal/journal-rle.test.ts +511 -0
package/tests/journal/lww-ordering-bug.test.ts +248 -0
package/tests/journal/multi-session-coalescing.test.ts +871 -0
package/tests/journal/rle-compression.test.ts +526 -0
package/tests/journal/text-coalescing.test.ts +210 -0
package/tests/unit/s3-compression.test.ts +257 -0
package/PHASE1_SUMMARY.md +0 -94

package/src/journal/JournalRLE.ts ADDED Viewed

@@ -0,0 +1,265 @@
+/**
+ * Run-Length Encoding (RLE) for Journal Storage
+ *
+ * Reduces journal storage size by:
+ * 1. Run-length encoding consecutive operations from the same agent
+ * 2. Storing agent ID only when it changes
+ * 3. Grouping consecutive operations into compact sequences
+ *
+ * Design:
+ * - Encodes a sequence of CRDTMessages into RLE-compressed format
+ * - Preserves all CRDT semantics (causality, ordering, timestamps)
+ * - Includes metadata for safe decompression
+ */
+import type { CRDTMessage } from '@vuer-ai/vuer-rtc';
+/**
+ * Compressed operation segment - groups consecutive ops from same agent
+ */
+export interface RLESegment {
+  agentId: string;        // sessionId from first message in run
+  count: number;          // Number of messages in this run
+  messages: CRDTMessage[]; // The actual messages (compressed payload)
+}
+/**
+ * RLE-encoded journal format
+ */
+export interface RLEEncodedJournal {
+  version: 1;
+  totalMessages: number;  // Original count before compression
+  segments: RLESegment[];
+  metadata: {
+    compressionRatio: number;    // (originalSize / compressedSize)
+    originalSize: number;        // Estimated bytes of JSON
+    compressedSize: number;      // Estimated bytes of RLE
+  };
+}
+/**
+ * Encode a sequence of CRDTMessages using RLE
+ *
+ * Groups consecutive messages from the same sessionId into segments.
+ * Each segment stores the sessionId once and all messages in that run.
+ */
+export function encodeJournalRLE(messages: CRDTMessage[]): RLEEncodedJournal {
+  if (messages.length === 0) {
+    return {
+      version: 1,
+      totalMessages: 0,
+      segments: [],
+      metadata: {
+        compressionRatio: 1,
+        originalSize: 0,
+        compressedSize: 0,
+      },
+    };
+  }
+  const segments: RLESegment[] = [];
+  let currentSegment: RLESegment | null = null;
+  for (const msg of messages) {
+    const agentId = msg.sessionId;
+    // Start a new segment if agent changes
+    if (!currentSegment || currentSegment.agentId !== agentId) {
+      if (currentSegment) {
+        segments.push(currentSegment);
+      }
+      currentSegment = {
+        agentId,
+        count: 1,
+        messages: [msg],
+      };
+    } else {
+      // Continue current segment
+      currentSegment.count++;
+      currentSegment.messages.push(msg);
+    }
+  }
+  // Push final segment
+  if (currentSegment) {
+    segments.push(currentSegment);
+  }
+  // Estimate compression ratio
+  const originalJson = JSON.stringify(messages);
+  const compressedJson = JSON.stringify(segments);
+  const originalSize = originalJson.length;
+  const compressedSize = compressedJson.length;
+  return {
+    version: 1,
+    totalMessages: messages.length,
+    segments,
+    metadata: {
+      compressionRatio: originalSize > 0 ? originalSize / compressedSize : 1,
+      originalSize,
+      compressedSize,
+    },
+  };
+}
+/**
+ * Decode an RLE-encoded journal back to sequential CRDTMessages
+ *
+ * Reverses the encoding process, recovering all original messages
+ * in their exact original order and form.
+ */
+export function decodeJournalRLE(encoded: RLEEncodedJournal): CRDTMessage[] {
+  const messages: CRDTMessage[] = [];
+  for (const segment of encoded.segments) {
+    // Verify all messages in segment have correct sessionId
+    for (const msg of segment.messages) {
+      if (msg.sessionId !== segment.agentId) {
+        throw new Error(
+          `RLE decode error: sessionId mismatch in segment. ` +
+          `Expected ${segment.agentId}, got ${msg.sessionId}`
+        );
+      }
+      messages.push(msg);
+    }
+  }
+  // Sanity check: verify we got back the right count
+  if (messages.length !== encoded.totalMessages) {
+    throw new Error(
+      `RLE decode error: expected ${encoded.totalMessages} messages, ` +
+      `got ${messages.length}`
+    );
+  }
+  return messages;
+}
+/**
+ * Verify RLE encoding preserves CRDT semantics
+ *
+ * Checks:
+ * 1. All messages decode correctly
+ * 2. Message ordering preserved
+ * 3. Vector clocks unchanged
+ * 4. Operations unchanged
+ * 5. Causal relationships maintained
+ */
+export function verifyRLEIntegrity(
+  original: CRDTMessage[],
+  encoded: RLEEncodedJournal
+): { valid: boolean; errors: string[] } {
+  const errors: string[] = [];
+  // Check total count
+  if (encoded.totalMessages !== original.length) {
+    errors.push(
+      `Message count mismatch: expected ${original.length}, got ${encoded.totalMessages}`
+    );
+  }
+  // Decode and check order preservation
+  let decoded: CRDTMessage[] = [];
+  try {
+    decoded = decodeJournalRLE(encoded);
+  } catch (err) {
+    errors.push(`Failed to decode: ${(err as Error).message}`);
+    return { valid: false, errors };
+  }
+  if (decoded.length !== original.length) {
+    errors.push(
+      `Decoded length mismatch: expected ${original.length}, got ${decoded.length}`
+    );
+  }
+  // Check each message
+  for (let i = 0; i < Math.min(original.length, decoded.length); i++) {
+    const orig = original[i];
+    const dec = decoded[i];
+    // Check identity
+    if (orig.id !== dec.id) {
+      errors.push(`Message ${i}: id mismatch (${orig.id} vs ${dec.id})`);
+    }
+    // Check sessionId
+    if (orig.sessionId !== dec.sessionId) {
+      errors.push(`Message ${i}: sessionId mismatch`);
+    }
+    // Check vector clock (deep equality)
+    const origClockKeys = Object.keys(orig.clock).sort();
+    const decClockKeys = Object.keys(dec.clock).sort();
+    if (origClockKeys.length !== decClockKeys.length ||
+        !origClockKeys.every((k, idx) => decClockKeys[idx] === k)) {
+      errors.push(`Message ${i}: vector clock structure mismatch`);
+    } else {
+      for (const key of origClockKeys) {
+        if (orig.clock[key] !== dec.clock[key]) {
+          errors.push(`Message ${i}: vector clock[${key}] mismatch`);
+        }
+      }
+    }
+    // Check lamport time
+    if (orig.lamportTime !== dec.lamportTime) {
+      errors.push(`Message ${i}: lamportTime mismatch`);
+    }
+    // Check timestamp
+    if (orig.timestamp !== dec.timestamp) {
+      errors.push(`Message ${i}: timestamp mismatch`);
+    }
+    // Check operations (deep equality)
+    const origOpsJson = JSON.stringify(orig.ops);
+    const decOpsJson = JSON.stringify(dec.ops);
+    if (origOpsJson !== decOpsJson) {
+      errors.push(`Message ${i}: operations mismatch`);
+    }
+  }
+  return {
+    valid: errors.length === 0,
+    errors,
+  };
+}
+/**
+ * Measure compression statistics
+ *
+ * Provides:
+ * - Compression ratio (original / compressed)
+ * - Space savings in bytes
+ * - Segment distribution
+ */
+export function getCompressionStats(encoded: RLEEncodedJournal): {
+  ratio: number;
+  percentSaved: number;
+  originalBytes: number;
+  compressedBytes: number;
+  savedBytes: number;
+  segmentCount: number;
+  avgMessagesPerSegment: number;
+} {
+  const { originalSize, compressedSize, compressionRatio } = encoded.metadata;
+  const savedBytes = originalSize - compressedSize;
+  const percentSaved = originalSize > 0
+    ? (savedBytes / originalSize) * 100
+    : 0;
+  return {
+    ratio: compressionRatio,
+    percentSaved,
+    originalBytes: originalSize,
+    compressedBytes: compressedSize,
+    savedBytes,
+    segmentCount: encoded.segments.length,
+    avgMessagesPerSegment:
+      encoded.totalMessages > 0
+        ? encoded.totalMessages / encoded.segments.length
+        : 0,
+  };
+}

package/src/journal/JournalService.ts CHANGED Viewed

@@ -13,16 +13,48 @@ import type { PrismaClient, Document } from '@prisma/client';
 import {
   type CRDTMessage,
   type SceneGraph,
+  type SceneNode,
   type Snapshot,
   type VectorClock,
   applyMessage,
   createEmptyGraph,
   OperationValidator,
+  TextRope,
+  compactRope,
+  toRaw,
+  fromRaw,
 } from '@vuer-ai/vuer-rtc';
 import { JournalRepository } from './JournalRepository.js';
 import { DocumentRepository } from '../persistence/DocumentRepository.js';
+/**
+ * Safely serialize an object, handling circular references by removing them.
+ * Also strips 'parent' references which cause cycles in tree structures.
+ * Properly serializes TextRope instances using toRaw().
+ */
+function safeSerialize(obj: unknown): unknown {
+  const seen = new WeakSet();
+  return JSON.parse(JSON.stringify(obj, (key, value) => {
+    // Skip parent references which cause cycles
+    if (key === 'parent') return undefined;
+    // Properly serialize TextRope instances using toRaw()
+    if (value instanceof TextRope) {
+      return {
+        _textRope: true,
+        raw: toRaw(value),
+      };
+    }
+    if (typeof value === 'object' && value !== null) {
+      if (seen.has(value)) return undefined; // Circular reference
+      seen.add(value);
+    }
+    return value;
+  }));
+}
 /** How often the compaction loop runs (ms). */
 const COMPACTION_INTERVAL_MS = 30_000;
@@ -39,20 +71,94 @@ export interface DocumentState {
   journal: JournalEntry[];
 }
+/**
+ * Recursively walk an object and restore TextRope instances from their raw form.
+ */
+function restoreTextRopes(obj: any): any {
+  if (obj === null || obj === undefined) return obj;
+  // Check if this is a serialized TextRope
+  if (typeof obj === 'object' && obj._textRope === true && obj.raw) {
+    return fromRaw(obj.raw);
+  }
+  // Recursively process arrays
+  if (Array.isArray(obj)) {
+    return obj.map(restoreTextRopes);
+  }
+  // Recursively process objects
+  if (typeof obj === 'object') {
+    const result: any = {};
+    for (const [key, value] of Object.entries(obj)) {
+      result[key] = restoreTextRopes(value);
+    }
+    return result;
+  }
+  return obj;
+}
 /**
  * Safely parse a Document.currentState (Json) into a Snapshot,
  * providing defaults for any missing fields.
+ * Restores TextRope instances from their serialized raw form.
  */
 function parseSnapshot(currentState: unknown): Snapshot {
   const raw = (currentState ?? {}) as Record<string, unknown>;
+  // Restore TextRope instances in the graph
+  const graph = restoreTextRopes(raw.graph as SceneGraph) || createEmptyGraph();
   return {
-    graph: (raw.graph as SceneGraph) || createEmptyGraph(),
+    graph,
     vectorClock: (raw.vectorClock as Record<string, number>) || {},
     lamportTime: (typeof raw.lamportTime === 'number' ? raw.lamportTime : 0),
     journalIndex: (typeof raw.journalIndex === 'number' ? raw.journalIndex : 0),
   };
 }
+/**
+ * Compact all TextRope instances in a SceneGraph by stripping tombstones
+ * and merging adjacent spans from the same agent. Returns a new graph
+ * with compacted ropes (the original is not mutated).
+ *
+ * This is critical for preventing B-tree depth explosion from single-char inserts.
+ */
+function compactTextRopes(graph: SceneGraph): SceneGraph {
+  const nodes: Record<string, SceneNode> = {};
+  let anyChanged = false;
+  for (const key of Object.keys(graph.nodes)) {
+    const node = graph.nodes[key];
+    let nodeChanged = false;
+    let cloned: SceneNode | null = null;
+    for (const prop of Object.keys(node)) {
+      if (node[prop] instanceof TextRope) {
+        if (!cloned) {
+          cloned = {
+            ...node,
+            children: [...(node.children ?? [])],
+          };
+        }
+        cloned[prop] = compactRope(node[prop] as TextRope);
+        nodeChanged = true;
+      }
+    }
+    if (nodeChanged && cloned) {
+      nodes[key] = cloned;
+      anyChanged = true;
+    } else {
+      nodes[key] = node;
+    }
+  }
+  if (!anyChanged) return graph;
+  return { ...graph, nodes };
+}
 export class JournalService {
   private journalRepo: JournalRepository;
   private documentRepo: DocumentRepository;
@@ -300,9 +406,13 @@ export class JournalService {
   /**
    * Get state for new client (snapshot + only post-snapshot journal entries).
    *
-   * After compaction the in-memory journal should already contain only
-   * post-snapshot entries, but we apply an explicit lamportTime filter
-   * as a safety check for the DB-reload path.
+   * After compaction, we need to filter journal entries that are already
+   * baked into the snapshot. We use vector clock comparison (not lamportTime)
+   * to correctly handle out-of-order or delayed messages.
+   *
+   * A message is included if ANY component of its vector clock is greater
+   * than the corresponding component in the snapshot's vector clock.
+   * This matches the client-side filtering logic in initFromServer().
    */
   async getStateForClient(documentId: string): Promise<{
     snapshot: Snapshot;
@@ -311,9 +421,18 @@ export class JournalService {
     const state = await this.loadDocument(documentId);
     if (!state) return null;
-    // Only return entries after the snapshot's lamport time
+    // Filter journal entries using vector clock comparison (not lamportTime)
+    // to handle out-of-order messages correctly after compaction
     const postSnapshotJournal = state.journal
-      .filter((e) => e.msg.lamportTime > state.snapshot.lamportTime)
+      .filter((e) => {
+        // Include message if ANY session in its clock is ahead of snapshot
+        for (const [sessionId, time] of Object.entries(e.msg.clock)) {
+          if (time > (state.snapshot.vectorClock[sessionId] ?? 0)) {
+            return true;
+          }
+        }
+        return false; // All clock components <= snapshot, already applied
+      })
       .map((e) => e.msg);
     return {
@@ -388,6 +507,15 @@ export class JournalService {
         .slice(0, compactUpToIndex + 1)
         .map((e) => e.msg.id);
+      // Compact TextRope instances before creating snapshot
+      // This merges single-char items into multi-char spans, preventing B-tree depth explosion
+      try {
+        newGraph = compactTextRopes(newGraph);
+      } catch (err: any) {
+        console.error(`[compact] TextRope compaction failed for doc ${documentId}:`, err);
+        throw new Error(`TextRope compaction failed: ${err.message}`);
+      }
       // Update snapshot
       state.snapshot = {
         graph: newGraph,
@@ -399,14 +527,38 @@ export class JournalService {
       // Remove compacted entries from in-memory journal
       state.journal = state.journal.slice(compactUpToIndex + 1);
-      // Persist snapshot
-      await this.documentRepo.update(documentId, {
-        currentState: state.snapshot as any,
-      });
+      // Persist snapshot (sanitize to break circular refs from parent pointers)
+      let sanitizedSnapshot: unknown;
+      try {
+        sanitizedSnapshot = safeSerialize(state.snapshot);
+      } catch (err: any) {
+        console.error(`[compact] Snapshot serialization failed for doc ${documentId}:`, err);
+        throw new Error(`Snapshot serialization failed: ${err.message}`);
+      }
+      try {
+        await this.documentRepo.update(documentId, {
+          currentState: sanitizedSnapshot as any,
+        });
+      } catch (err: any) {
+        console.error(`[compact] Document update failed for doc ${documentId}:`, err);
+        // If document was deleted, this is not a fatal error - just clean up journal
+        if (err?.code === 'P2025') {
+          console.warn(`[compact] Document ${documentId} not found, skipping snapshot update`);
+        } else {
+          throw new Error(`Document update failed: ${err.message}`);
+        }
+      }
       // Delete compacted entries from DB by their batchIds
       if (compactedBatchIds.length > 0) {
-        await this.journalRepo.deleteByIds(documentId, compactedBatchIds);
+        try {
+          const deletedCount = await this.journalRepo.deleteByIds(documentId, compactedBatchIds);
+          console.log(`[compact] Deleted ${deletedCount} journal batches for doc ${documentId}`);
+        } catch (err: any) {
+          console.error(`[compact] Journal batch deletion failed for doc ${documentId}:`, err);
+          throw new Error(`Journal batch deletion failed: ${err.message}`);
+        }
       }
     } finally {
       this.compactionLocks.delete(documentId);

package/src/journal/RLECompression.ts ADDED Viewed

@@ -0,0 +1,210 @@
+/**
+ * RLE (Run-Length Encoding) Compression for Journal Storage
+ *
+ * Optimizes journal storage by:
+ * 1. Run-length encoding consecutive operations (e.g., 10 sequential edits from same agent)
+ * 2. Only storing agent/sessionId when it changes
+ * 3. Preserving CRDT semantics (no operations are combined or lost)
+ *
+ * Format: { sessionId, count: N, ops: [op1, op2, ...] }
+ * - sessionId is stored with first op of each run
+ * - count tracks consecutive ops from same agent
+ * - ops are stored as-is (no merging/combining)
+ */
+import type { CRDTMessage } from '@vuer-ai/vuer-rtc';
+/**
+ * A run-length encoded journal entry
+ */
+export interface RLEJournalEntry {
+  sessionId: string;        // Agent/session that performed this run
+  count: number;            // Number of consecutive operations from this session
+  lamportTime: number;      // Start lamport time of this run
+  endLamportTime: number;   // End lamport time (start + count - 1)
+  ops: any[];               // Operations in this run (one per index)
+  timestamp: number;        // Wall-clock time
+}
+/**
+ * Encode consecutive operations from the same sessionId using RLE.
+ * Returns a list of RLE entries where consecutive ops from same session are grouped.
+ *
+ * Example:
+ *   Input: [msg1(sid=A), msg2(sid=A), msg3(sid=B), msg4(sid=B), msg5(sid=B)]
+ *   Output: [
+ *     { sessionId: A, count: 2, ops: [msg1.ops[0], msg2.ops[0]], ... },
+ *     { sessionId: B, count: 3, ops: [msg3.ops[0], msg4.ops[0], msg5.ops[0]], ... }
+ *   ]
+ */
+export function encodeRLE(messages: CRDTMessage[]): RLEJournalEntry[] {
+  if (messages.length === 0) return [];
+  const encoded: RLEJournalEntry[] = [];
+  let currentRun: RLEJournalEntry | null = null;
+  for (const msg of messages) {
+    if (!currentRun || currentRun.sessionId !== msg.sessionId) {
+      // Start new run
+      if (currentRun) {
+        encoded.push(currentRun);
+      }
+      currentRun = {
+        sessionId: msg.sessionId,
+        count: 1,
+        lamportTime: msg.lamportTime,
+        endLamportTime: msg.lamportTime,
+        ops: [...msg.ops],
+        timestamp: msg.timestamp,
+      };
+    } else {
+      // Extend current run
+      currentRun.count++;
+      currentRun.endLamportTime = msg.lamportTime;
+      currentRun.ops.push(...msg.ops);
+    }
+  }
+  // Don't forget the last run
+  if (currentRun) {
+    encoded.push(currentRun);
+  }
+  return encoded;
+}
+/**
+ * Decode RLE-encoded entries back into original messages.
+ * Reconstructs each original CRDTMessage from the RLE entry's operation stream.
+ *
+ * Note: We assume operations are stored sequentially in the RLE entry,
+ * with each original message's operations grouped together.
+ * This requires coordination with the encoding to track message boundaries.
+ */
+export function decodeRLE(
+  entries: RLEJournalEntry[],
+  opCountPerMessage: number[] // Array indicating how many ops per message
+): CRDTMessage[] {
+  const messages: CRDTMessage[] = [];
+  let globalOpIndex = 0;
+  let messageIdx = 0;
+  for (const entry of entries) {
+    let lamportTime = entry.lamportTime;
+    let entryOpIndex = 0;
+    for (let i = 0; i < entry.count; i++) {
+      // Get operations for this message using the metadata array
+      const opCount = opCountPerMessage[messageIdx] || 1;
+      const opsForMsg = entry.ops.slice(entryOpIndex, entryOpIndex + opCount);
+      entryOpIndex += opCount;
+      globalOpIndex += opCount;
+      messageIdx++;
+      // Reconstruct message
+      messages.push({
+        id: `msg-${lamportTime}`, // Note: Original IDs are lost; this is a limitation
+        sessionId: entry.sessionId,
+        clock: {}, // Vector clock info is lost in current RLE format
+        lamportTime,
+        timestamp: entry.timestamp,
+        ops: opsForMsg,
+      });
+      lamportTime++;
+    }
+  }
+  return messages;
+}
+/**
+ * Encode messages with metadata tracking for full reconstruction.
+ * This enhanced version preserves more information to allow perfect round-tripping.
+ *
+ * Returns both RLE entries and metadata needed for decoding.
+ */
+export interface RLEEncodedWithMetadata {
+  entries: RLEJournalEntry[];
+  // For each original message, track how many ops it had
+  messageOpsCount: number[];
+  // Map lamport time to original message ID for deduplication
+  messageIds: Record<number, string>;
+  // Vector clocks per message
+  vectorClocks: Record<number, Record<string, number>>;
+}
+export function encodeRLEWithMetadata(
+  messages: CRDTMessage[]
+): RLEEncodedWithMetadata {
+  const encoded = encodeRLE(messages);
+  const messageOpsCount = messages.map((m) => m.ops.length);
+  const messageIds: Record<number, string> = {};
+  const vectorClocks: Record<number, Record<string, number>> = {};
+  for (const msg of messages) {
+    messageIds[msg.lamportTime] = msg.id;
+    vectorClocks[msg.lamportTime] = msg.clock;
+  }
+  return {
+    entries: encoded,
+    messageOpsCount,
+    messageIds,
+    vectorClocks,
+  };
+}
+/**
+ * Decode with full metadata recovery for perfect round-tripping.
+ */
+export function decodeRLEWithMetadata(
+  encoded: RLEEncodedWithMetadata
+): CRDTMessage[] {
+  const messages: CRDTMessage[] = [];
+  let messageIdx = 0;
+  for (const entry of encoded.entries) {
+    let lamportTime = entry.lamportTime;
+    let entryOpIndex = 0;
+    for (let i = 0; i < entry.count; i++) {
+      const opCount = encoded.messageOpsCount[messageIdx] || 1;
+      const opsForMsg = entry.ops.slice(entryOpIndex, entryOpIndex + opCount);
+      entryOpIndex += opCount;
+      messageIdx++;
+      messages.push({
+        id: encoded.messageIds[lamportTime] || `msg-${lamportTime}`,
+        sessionId: entry.sessionId,
+        clock: encoded.vectorClocks[lamportTime] || {},
+        lamportTime,
+        timestamp: entry.timestamp,
+        ops: opsForMsg,
+      });
+      lamportTime++;
+    }
+  }
+  return messages;
+}
+/**
+ * Calculate compression ratio: how much space is saved.
+ * Returns { original: bytes, encoded: bytes, ratio: 0.0-1.0 }
+ */
+export function calculateCompressionRatio(
+  original: CRDTMessage[],
+  encoded: RLEJournalEntry[]
+): { original: number; encoded: number; ratio: number } {
+  const originalBytes = JSON.stringify(original).length;
+  const encodedBytes = JSON.stringify(encoded).length;
+  const ratio = (originalBytes - encodedBytes) / originalBytes;
+  return {
+    original: originalBytes,
+    encoded: encodedBytes,
+    ratio,
+  };
+}