@vuer-ai/vuer-rtc-server 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env +1 -0
- package/S3_COMPRESSION_GUIDE.md +233 -0
- package/dist/archive/ArchivalService.d.ts +117 -0
- package/dist/archive/ArchivalService.d.ts.map +1 -0
- package/dist/archive/ArchivalService.js +181 -0
- package/dist/archive/ArchivalService.js.map +1 -0
- package/dist/broker/InMemoryBroker.d.ts +2 -0
- package/dist/broker/InMemoryBroker.d.ts.map +1 -1
- package/dist/broker/InMemoryBroker.js +4 -0
- package/dist/broker/InMemoryBroker.js.map +1 -1
- package/dist/compression/CompressionUtils.d.ts +57 -0
- package/dist/compression/CompressionUtils.d.ts.map +1 -0
- package/dist/compression/CompressionUtils.js +90 -0
- package/dist/compression/CompressionUtils.js.map +1 -0
- package/dist/compression/index.d.ts +7 -0
- package/dist/compression/index.d.ts.map +1 -0
- package/dist/compression/index.js +7 -0
- package/dist/compression/index.js.map +1 -0
- package/dist/journal/CoalescingService.d.ts +63 -0
- package/dist/journal/CoalescingService.d.ts.map +1 -0
- package/dist/journal/CoalescingService.js +507 -0
- package/dist/journal/CoalescingService.js.map +1 -0
- package/dist/journal/JournalRLE.d.ts +81 -0
- package/dist/journal/JournalRLE.d.ts.map +1 -0
- package/dist/journal/JournalRLE.js +199 -0
- package/dist/journal/JournalRLE.js.map +1 -0
- package/dist/journal/JournalService.d.ts +7 -3
- package/dist/journal/JournalService.d.ts.map +1 -1
- package/dist/journal/JournalService.js +152 -12
- package/dist/journal/JournalService.js.map +1 -1
- package/dist/journal/RLECompression.d.ts +73 -0
- package/dist/journal/RLECompression.d.ts.map +1 -0
- package/dist/journal/RLECompression.js +152 -0
- package/dist/journal/RLECompression.js.map +1 -0
- package/dist/journal/rle-demo.d.ts +8 -0
- package/dist/journal/rle-demo.d.ts.map +1 -0
- package/dist/journal/rle-demo.js +159 -0
- package/dist/journal/rle-demo.js.map +1 -0
- package/dist/persistence/S3ColdStorage.d.ts +62 -0
- package/dist/persistence/S3ColdStorage.d.ts.map +1 -0
- package/dist/persistence/S3ColdStorage.js +88 -0
- package/dist/persistence/S3ColdStorage.js.map +1 -0
- package/dist/persistence/S3ColdStorageIntegration.d.ts +78 -0
- package/dist/persistence/S3ColdStorageIntegration.d.ts.map +1 -0
- package/dist/persistence/S3ColdStorageIntegration.js +93 -0
- package/dist/persistence/S3ColdStorageIntegration.js.map +1 -0
- package/dist/serve.d.ts +2 -0
- package/dist/serve.d.ts.map +1 -1
- package/dist/serve.js +623 -15
- package/dist/serve.js.map +1 -1
- package/docs/RLE_COMPRESSION.md +397 -0
- package/examples/compression-example.ts +259 -0
- package/package.json +14 -14
- package/src/archive/ArchivalService.ts +250 -0
- package/src/broker/InMemoryBroker.ts +5 -0
- package/src/compression/CompressionUtils.ts +113 -0
- package/src/compression/index.ts +14 -0
- package/src/journal/COALESCING.md +267 -0
- package/src/journal/CoalescingService.ts +626 -0
- package/src/journal/JournalRLE.ts +265 -0
- package/src/journal/JournalService.ts +163 -11
- package/src/journal/RLECompression.ts +210 -0
- package/src/journal/rle-demo.ts +193 -0
- package/src/serve.ts +702 -15
- package/tests/benchmark/journal-optimization-benchmark.test.ts +482 -0
- package/tests/compression/compression.test.ts +343 -0
- package/tests/integration/repositories.test.ts +89 -0
- package/tests/journal/compaction-load-bug.test.ts +409 -0
- package/tests/journal/compaction.test.ts +42 -2
- package/tests/journal/journal-rle.test.ts +511 -0
- package/tests/journal/lww-ordering-bug.test.ts +248 -0
- package/tests/journal/multi-session-coalescing.test.ts +871 -0
- package/tests/journal/rle-compression.test.ts +526 -0
- package/tests/journal/text-coalescing.test.ts +210 -0
- package/tests/unit/s3-compression.test.ts +257 -0
- package/PHASE1_SUMMARY.md +0 -94
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Run-Length Encoding (RLE) for Journal Storage
|
|
3
|
+
*
|
|
4
|
+
* Reduces journal storage size by:
|
|
5
|
+
* 1. Run-length encoding consecutive operations from the same agent
|
|
6
|
+
* 2. Storing agent ID only when it changes
|
|
7
|
+
* 3. Grouping consecutive operations into compact sequences
|
|
8
|
+
*
|
|
9
|
+
* Design:
|
|
10
|
+
* - Encodes a sequence of CRDTMessages into RLE-compressed format
|
|
11
|
+
* - Preserves all CRDT semantics (causality, ordering, timestamps)
|
|
12
|
+
* - Includes metadata for safe decompression
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import type { CRDTMessage } from '@vuer-ai/vuer-rtc';
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Compressed operation segment - groups consecutive ops from same agent
|
|
19
|
+
*/
|
|
20
|
+
export interface RLESegment {
|
|
21
|
+
agentId: string; // sessionId from first message in run
|
|
22
|
+
count: number; // Number of messages in this run
|
|
23
|
+
messages: CRDTMessage[]; // The actual messages (compressed payload)
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* RLE-encoded journal format
|
|
28
|
+
*/
|
|
29
|
+
export interface RLEEncodedJournal {
|
|
30
|
+
version: 1;
|
|
31
|
+
totalMessages: number; // Original count before compression
|
|
32
|
+
segments: RLESegment[];
|
|
33
|
+
metadata: {
|
|
34
|
+
compressionRatio: number; // (originalSize / compressedSize)
|
|
35
|
+
originalSize: number; // Estimated bytes of JSON
|
|
36
|
+
compressedSize: number; // Estimated bytes of RLE
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Encode a sequence of CRDTMessages using RLE
|
|
42
|
+
*
|
|
43
|
+
* Groups consecutive messages from the same sessionId into segments.
|
|
44
|
+
* Each segment stores the sessionId once and all messages in that run.
|
|
45
|
+
*/
|
|
46
|
+
export function encodeJournalRLE(messages: CRDTMessage[]): RLEEncodedJournal {
|
|
47
|
+
if (messages.length === 0) {
|
|
48
|
+
return {
|
|
49
|
+
version: 1,
|
|
50
|
+
totalMessages: 0,
|
|
51
|
+
segments: [],
|
|
52
|
+
metadata: {
|
|
53
|
+
compressionRatio: 1,
|
|
54
|
+
originalSize: 0,
|
|
55
|
+
compressedSize: 0,
|
|
56
|
+
},
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const segments: RLESegment[] = [];
|
|
61
|
+
let currentSegment: RLESegment | null = null;
|
|
62
|
+
|
|
63
|
+
for (const msg of messages) {
|
|
64
|
+
const agentId = msg.sessionId;
|
|
65
|
+
|
|
66
|
+
// Start a new segment if agent changes
|
|
67
|
+
if (!currentSegment || currentSegment.agentId !== agentId) {
|
|
68
|
+
if (currentSegment) {
|
|
69
|
+
segments.push(currentSegment);
|
|
70
|
+
}
|
|
71
|
+
currentSegment = {
|
|
72
|
+
agentId,
|
|
73
|
+
count: 1,
|
|
74
|
+
messages: [msg],
|
|
75
|
+
};
|
|
76
|
+
} else {
|
|
77
|
+
// Continue current segment
|
|
78
|
+
currentSegment.count++;
|
|
79
|
+
currentSegment.messages.push(msg);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Push final segment
|
|
84
|
+
if (currentSegment) {
|
|
85
|
+
segments.push(currentSegment);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Estimate compression ratio
|
|
89
|
+
const originalJson = JSON.stringify(messages);
|
|
90
|
+
const compressedJson = JSON.stringify(segments);
|
|
91
|
+
const originalSize = originalJson.length;
|
|
92
|
+
const compressedSize = compressedJson.length;
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
version: 1,
|
|
96
|
+
totalMessages: messages.length,
|
|
97
|
+
segments,
|
|
98
|
+
metadata: {
|
|
99
|
+
compressionRatio: originalSize > 0 ? originalSize / compressedSize : 1,
|
|
100
|
+
originalSize,
|
|
101
|
+
compressedSize,
|
|
102
|
+
},
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Decode an RLE-encoded journal back to sequential CRDTMessages
|
|
108
|
+
*
|
|
109
|
+
* Reverses the encoding process, recovering all original messages
|
|
110
|
+
* in their exact original order and form.
|
|
111
|
+
*/
|
|
112
|
+
export function decodeJournalRLE(encoded: RLEEncodedJournal): CRDTMessage[] {
|
|
113
|
+
const messages: CRDTMessage[] = [];
|
|
114
|
+
|
|
115
|
+
for (const segment of encoded.segments) {
|
|
116
|
+
// Verify all messages in segment have correct sessionId
|
|
117
|
+
for (const msg of segment.messages) {
|
|
118
|
+
if (msg.sessionId !== segment.agentId) {
|
|
119
|
+
throw new Error(
|
|
120
|
+
`RLE decode error: sessionId mismatch in segment. ` +
|
|
121
|
+
`Expected ${segment.agentId}, got ${msg.sessionId}`
|
|
122
|
+
);
|
|
123
|
+
}
|
|
124
|
+
messages.push(msg);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Sanity check: verify we got back the right count
|
|
129
|
+
if (messages.length !== encoded.totalMessages) {
|
|
130
|
+
throw new Error(
|
|
131
|
+
`RLE decode error: expected ${encoded.totalMessages} messages, ` +
|
|
132
|
+
`got ${messages.length}`
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return messages;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Verify RLE encoding preserves CRDT semantics
|
|
141
|
+
*
|
|
142
|
+
* Checks:
|
|
143
|
+
* 1. All messages decode correctly
|
|
144
|
+
* 2. Message ordering preserved
|
|
145
|
+
* 3. Vector clocks unchanged
|
|
146
|
+
* 4. Operations unchanged
|
|
147
|
+
* 5. Causal relationships maintained
|
|
148
|
+
*/
|
|
149
|
+
export function verifyRLEIntegrity(
|
|
150
|
+
original: CRDTMessage[],
|
|
151
|
+
encoded: RLEEncodedJournal
|
|
152
|
+
): { valid: boolean; errors: string[] } {
|
|
153
|
+
const errors: string[] = [];
|
|
154
|
+
|
|
155
|
+
// Check total count
|
|
156
|
+
if (encoded.totalMessages !== original.length) {
|
|
157
|
+
errors.push(
|
|
158
|
+
`Message count mismatch: expected ${original.length}, got ${encoded.totalMessages}`
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Decode and check order preservation
|
|
163
|
+
let decoded: CRDTMessage[] = [];
|
|
164
|
+
try {
|
|
165
|
+
decoded = decodeJournalRLE(encoded);
|
|
166
|
+
} catch (err) {
|
|
167
|
+
errors.push(`Failed to decode: ${(err as Error).message}`);
|
|
168
|
+
return { valid: false, errors };
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (decoded.length !== original.length) {
|
|
172
|
+
errors.push(
|
|
173
|
+
`Decoded length mismatch: expected ${original.length}, got ${decoded.length}`
|
|
174
|
+
);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Check each message
|
|
178
|
+
for (let i = 0; i < Math.min(original.length, decoded.length); i++) {
|
|
179
|
+
const orig = original[i];
|
|
180
|
+
const dec = decoded[i];
|
|
181
|
+
|
|
182
|
+
// Check identity
|
|
183
|
+
if (orig.id !== dec.id) {
|
|
184
|
+
errors.push(`Message ${i}: id mismatch (${orig.id} vs ${dec.id})`);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Check sessionId
|
|
188
|
+
if (orig.sessionId !== dec.sessionId) {
|
|
189
|
+
errors.push(`Message ${i}: sessionId mismatch`);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Check vector clock (deep equality)
|
|
193
|
+
const origClockKeys = Object.keys(orig.clock).sort();
|
|
194
|
+
const decClockKeys = Object.keys(dec.clock).sort();
|
|
195
|
+
if (origClockKeys.length !== decClockKeys.length ||
|
|
196
|
+
!origClockKeys.every((k, idx) => decClockKeys[idx] === k)) {
|
|
197
|
+
errors.push(`Message ${i}: vector clock structure mismatch`);
|
|
198
|
+
} else {
|
|
199
|
+
for (const key of origClockKeys) {
|
|
200
|
+
if (orig.clock[key] !== dec.clock[key]) {
|
|
201
|
+
errors.push(`Message ${i}: vector clock[${key}] mismatch`);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Check lamport time
|
|
207
|
+
if (orig.lamportTime !== dec.lamportTime) {
|
|
208
|
+
errors.push(`Message ${i}: lamportTime mismatch`);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Check timestamp
|
|
212
|
+
if (orig.timestamp !== dec.timestamp) {
|
|
213
|
+
errors.push(`Message ${i}: timestamp mismatch`);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Check operations (deep equality)
|
|
217
|
+
const origOpsJson = JSON.stringify(orig.ops);
|
|
218
|
+
const decOpsJson = JSON.stringify(dec.ops);
|
|
219
|
+
if (origOpsJson !== decOpsJson) {
|
|
220
|
+
errors.push(`Message ${i}: operations mismatch`);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return {
|
|
225
|
+
valid: errors.length === 0,
|
|
226
|
+
errors,
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Measure compression statistics
|
|
232
|
+
*
|
|
233
|
+
* Provides:
|
|
234
|
+
* - Compression ratio (original / compressed)
|
|
235
|
+
* - Space savings in bytes
|
|
236
|
+
* - Segment distribution
|
|
237
|
+
*/
|
|
238
|
+
export function getCompressionStats(encoded: RLEEncodedJournal): {
|
|
239
|
+
ratio: number;
|
|
240
|
+
percentSaved: number;
|
|
241
|
+
originalBytes: number;
|
|
242
|
+
compressedBytes: number;
|
|
243
|
+
savedBytes: number;
|
|
244
|
+
segmentCount: number;
|
|
245
|
+
avgMessagesPerSegment: number;
|
|
246
|
+
} {
|
|
247
|
+
const { originalSize, compressedSize, compressionRatio } = encoded.metadata;
|
|
248
|
+
const savedBytes = originalSize - compressedSize;
|
|
249
|
+
const percentSaved = originalSize > 0
|
|
250
|
+
? (savedBytes / originalSize) * 100
|
|
251
|
+
: 0;
|
|
252
|
+
|
|
253
|
+
return {
|
|
254
|
+
ratio: compressionRatio,
|
|
255
|
+
percentSaved,
|
|
256
|
+
originalBytes: originalSize,
|
|
257
|
+
compressedBytes: compressedSize,
|
|
258
|
+
savedBytes,
|
|
259
|
+
segmentCount: encoded.segments.length,
|
|
260
|
+
avgMessagesPerSegment:
|
|
261
|
+
encoded.totalMessages > 0
|
|
262
|
+
? encoded.totalMessages / encoded.segments.length
|
|
263
|
+
: 0,
|
|
264
|
+
};
|
|
265
|
+
}
|
|
@@ -13,16 +13,48 @@ import type { PrismaClient, Document } from '@prisma/client';
|
|
|
13
13
|
import {
|
|
14
14
|
type CRDTMessage,
|
|
15
15
|
type SceneGraph,
|
|
16
|
+
type SceneNode,
|
|
16
17
|
type Snapshot,
|
|
17
18
|
type VectorClock,
|
|
18
19
|
applyMessage,
|
|
19
20
|
createEmptyGraph,
|
|
20
21
|
OperationValidator,
|
|
22
|
+
TextRope,
|
|
23
|
+
compactRope,
|
|
24
|
+
toRaw,
|
|
25
|
+
fromRaw,
|
|
21
26
|
} from '@vuer-ai/vuer-rtc';
|
|
22
27
|
|
|
23
28
|
import { JournalRepository } from './JournalRepository.js';
|
|
24
29
|
import { DocumentRepository } from '../persistence/DocumentRepository.js';
|
|
25
30
|
|
|
31
|
+
/**
|
|
32
|
+
* Safely serialize an object, handling circular references by removing them.
|
|
33
|
+
* Also strips 'parent' references which cause cycles in tree structures.
|
|
34
|
+
* Properly serializes TextRope instances using toRaw().
|
|
35
|
+
*/
|
|
36
|
+
function safeSerialize(obj: unknown): unknown {
|
|
37
|
+
const seen = new WeakSet();
|
|
38
|
+
return JSON.parse(JSON.stringify(obj, (key, value) => {
|
|
39
|
+
// Skip parent references which cause cycles
|
|
40
|
+
if (key === 'parent') return undefined;
|
|
41
|
+
|
|
42
|
+
// Properly serialize TextRope instances using toRaw()
|
|
43
|
+
if (value instanceof TextRope) {
|
|
44
|
+
return {
|
|
45
|
+
_textRope: true,
|
|
46
|
+
raw: toRaw(value),
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (typeof value === 'object' && value !== null) {
|
|
51
|
+
if (seen.has(value)) return undefined; // Circular reference
|
|
52
|
+
seen.add(value);
|
|
53
|
+
}
|
|
54
|
+
return value;
|
|
55
|
+
}));
|
|
56
|
+
}
|
|
57
|
+
|
|
26
58
|
/** How often the compaction loop runs (ms). */
|
|
27
59
|
const COMPACTION_INTERVAL_MS = 30_000;
|
|
28
60
|
|
|
@@ -39,20 +71,94 @@ export interface DocumentState {
|
|
|
39
71
|
journal: JournalEntry[];
|
|
40
72
|
}
|
|
41
73
|
|
|
74
|
+
/**
|
|
75
|
+
* Recursively walk an object and restore TextRope instances from their raw form.
|
|
76
|
+
*/
|
|
77
|
+
function restoreTextRopes(obj: any): any {
|
|
78
|
+
if (obj === null || obj === undefined) return obj;
|
|
79
|
+
|
|
80
|
+
// Check if this is a serialized TextRope
|
|
81
|
+
if (typeof obj === 'object' && obj._textRope === true && obj.raw) {
|
|
82
|
+
return fromRaw(obj.raw);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Recursively process arrays
|
|
86
|
+
if (Array.isArray(obj)) {
|
|
87
|
+
return obj.map(restoreTextRopes);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Recursively process objects
|
|
91
|
+
if (typeof obj === 'object') {
|
|
92
|
+
const result: any = {};
|
|
93
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
94
|
+
result[key] = restoreTextRopes(value);
|
|
95
|
+
}
|
|
96
|
+
return result;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return obj;
|
|
100
|
+
}
|
|
101
|
+
|
|
42
102
|
/**
|
|
43
103
|
* Safely parse a Document.currentState (Json) into a Snapshot,
|
|
44
104
|
* providing defaults for any missing fields.
|
|
105
|
+
* Restores TextRope instances from their serialized raw form.
|
|
45
106
|
*/
|
|
46
107
|
function parseSnapshot(currentState: unknown): Snapshot {
|
|
47
108
|
const raw = (currentState ?? {}) as Record<string, unknown>;
|
|
109
|
+
|
|
110
|
+
// Restore TextRope instances in the graph
|
|
111
|
+
const graph = restoreTextRopes(raw.graph as SceneGraph) || createEmptyGraph();
|
|
112
|
+
|
|
48
113
|
return {
|
|
49
|
-
graph
|
|
114
|
+
graph,
|
|
50
115
|
vectorClock: (raw.vectorClock as Record<string, number>) || {},
|
|
51
116
|
lamportTime: (typeof raw.lamportTime === 'number' ? raw.lamportTime : 0),
|
|
52
117
|
journalIndex: (typeof raw.journalIndex === 'number' ? raw.journalIndex : 0),
|
|
53
118
|
};
|
|
54
119
|
}
|
|
55
120
|
|
|
121
|
+
/**
|
|
122
|
+
* Compact all TextRope instances in a SceneGraph by stripping tombstones
|
|
123
|
+
* and merging adjacent spans from the same agent. Returns a new graph
|
|
124
|
+
* with compacted ropes (the original is not mutated).
|
|
125
|
+
*
|
|
126
|
+
* This is critical for preventing B-tree depth explosion from single-char inserts.
|
|
127
|
+
*/
|
|
128
|
+
function compactTextRopes(graph: SceneGraph): SceneGraph {
|
|
129
|
+
const nodes: Record<string, SceneNode> = {};
|
|
130
|
+
let anyChanged = false;
|
|
131
|
+
|
|
132
|
+
for (const key of Object.keys(graph.nodes)) {
|
|
133
|
+
const node = graph.nodes[key];
|
|
134
|
+
let nodeChanged = false;
|
|
135
|
+
let cloned: SceneNode | null = null;
|
|
136
|
+
|
|
137
|
+
for (const prop of Object.keys(node)) {
|
|
138
|
+
if (node[prop] instanceof TextRope) {
|
|
139
|
+
if (!cloned) {
|
|
140
|
+
cloned = {
|
|
141
|
+
...node,
|
|
142
|
+
children: [...(node.children ?? [])],
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
cloned[prop] = compactRope(node[prop] as TextRope);
|
|
146
|
+
nodeChanged = true;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (nodeChanged && cloned) {
|
|
151
|
+
nodes[key] = cloned;
|
|
152
|
+
anyChanged = true;
|
|
153
|
+
} else {
|
|
154
|
+
nodes[key] = node;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if (!anyChanged) return graph;
|
|
159
|
+
return { ...graph, nodes };
|
|
160
|
+
}
|
|
161
|
+
|
|
56
162
|
export class JournalService {
|
|
57
163
|
private journalRepo: JournalRepository;
|
|
58
164
|
private documentRepo: DocumentRepository;
|
|
@@ -300,9 +406,13 @@ export class JournalService {
|
|
|
300
406
|
/**
|
|
301
407
|
* Get state for new client (snapshot + only post-snapshot journal entries).
|
|
302
408
|
*
|
|
303
|
-
* After compaction
|
|
304
|
-
*
|
|
305
|
-
*
|
|
409
|
+
* After compaction, we need to filter journal entries that are already
|
|
410
|
+
* baked into the snapshot. We use vector clock comparison (not lamportTime)
|
|
411
|
+
* to correctly handle out-of-order or delayed messages.
|
|
412
|
+
*
|
|
413
|
+
* A message is included if ANY component of its vector clock is greater
|
|
414
|
+
* than the corresponding component in the snapshot's vector clock.
|
|
415
|
+
* This matches the client-side filtering logic in initFromServer().
|
|
306
416
|
*/
|
|
307
417
|
async getStateForClient(documentId: string): Promise<{
|
|
308
418
|
snapshot: Snapshot;
|
|
@@ -311,9 +421,18 @@ export class JournalService {
|
|
|
311
421
|
const state = await this.loadDocument(documentId);
|
|
312
422
|
if (!state) return null;
|
|
313
423
|
|
|
314
|
-
//
|
|
424
|
+
// Filter journal entries using vector clock comparison (not lamportTime)
|
|
425
|
+
// to handle out-of-order messages correctly after compaction
|
|
315
426
|
const postSnapshotJournal = state.journal
|
|
316
|
-
.filter((e) =>
|
|
427
|
+
.filter((e) => {
|
|
428
|
+
// Include message if ANY session in its clock is ahead of snapshot
|
|
429
|
+
for (const [sessionId, time] of Object.entries(e.msg.clock)) {
|
|
430
|
+
if (time > (state.snapshot.vectorClock[sessionId] ?? 0)) {
|
|
431
|
+
return true;
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
return false; // All clock components <= snapshot, already applied
|
|
435
|
+
})
|
|
317
436
|
.map((e) => e.msg);
|
|
318
437
|
|
|
319
438
|
return {
|
|
@@ -388,6 +507,15 @@ export class JournalService {
|
|
|
388
507
|
.slice(0, compactUpToIndex + 1)
|
|
389
508
|
.map((e) => e.msg.id);
|
|
390
509
|
|
|
510
|
+
// Compact TextRope instances before creating snapshot
|
|
511
|
+
// This merges single-char items into multi-char spans, preventing B-tree depth explosion
|
|
512
|
+
try {
|
|
513
|
+
newGraph = compactTextRopes(newGraph);
|
|
514
|
+
} catch (err: any) {
|
|
515
|
+
console.error(`[compact] TextRope compaction failed for doc ${documentId}:`, err);
|
|
516
|
+
throw new Error(`TextRope compaction failed: ${err.message}`);
|
|
517
|
+
}
|
|
518
|
+
|
|
391
519
|
// Update snapshot
|
|
392
520
|
state.snapshot = {
|
|
393
521
|
graph: newGraph,
|
|
@@ -399,14 +527,38 @@ export class JournalService {
|
|
|
399
527
|
// Remove compacted entries from in-memory journal
|
|
400
528
|
state.journal = state.journal.slice(compactUpToIndex + 1);
|
|
401
529
|
|
|
402
|
-
// Persist snapshot
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
530
|
+
// Persist snapshot (sanitize to break circular refs from parent pointers)
|
|
531
|
+
let sanitizedSnapshot: unknown;
|
|
532
|
+
try {
|
|
533
|
+
sanitizedSnapshot = safeSerialize(state.snapshot);
|
|
534
|
+
} catch (err: any) {
|
|
535
|
+
console.error(`[compact] Snapshot serialization failed for doc ${documentId}:`, err);
|
|
536
|
+
throw new Error(`Snapshot serialization failed: ${err.message}`);
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
try {
|
|
540
|
+
await this.documentRepo.update(documentId, {
|
|
541
|
+
currentState: sanitizedSnapshot as any,
|
|
542
|
+
});
|
|
543
|
+
} catch (err: any) {
|
|
544
|
+
console.error(`[compact] Document update failed for doc ${documentId}:`, err);
|
|
545
|
+
// If document was deleted, this is not a fatal error - just clean up journal
|
|
546
|
+
if (err?.code === 'P2025') {
|
|
547
|
+
console.warn(`[compact] Document ${documentId} not found, skipping snapshot update`);
|
|
548
|
+
} else {
|
|
549
|
+
throw new Error(`Document update failed: ${err.message}`);
|
|
550
|
+
}
|
|
551
|
+
}
|
|
406
552
|
|
|
407
553
|
// Delete compacted entries from DB by their batchIds
|
|
408
554
|
if (compactedBatchIds.length > 0) {
|
|
409
|
-
|
|
555
|
+
try {
|
|
556
|
+
const deletedCount = await this.journalRepo.deleteByIds(documentId, compactedBatchIds);
|
|
557
|
+
console.log(`[compact] Deleted ${deletedCount} journal batches for doc ${documentId}`);
|
|
558
|
+
} catch (err: any) {
|
|
559
|
+
console.error(`[compact] Journal batch deletion failed for doc ${documentId}:`, err);
|
|
560
|
+
throw new Error(`Journal batch deletion failed: ${err.message}`);
|
|
561
|
+
}
|
|
410
562
|
}
|
|
411
563
|
} finally {
|
|
412
564
|
this.compactionLocks.delete(documentId);
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RLE (Run-Length Encoding) Compression for Journal Storage
|
|
3
|
+
*
|
|
4
|
+
* Optimizes journal storage by:
|
|
5
|
+
* 1. Run-length encoding consecutive operations (e.g., 10 sequential edits from same agent)
|
|
6
|
+
* 2. Only storing agent/sessionId when it changes
|
|
7
|
+
* 3. Preserving CRDT semantics (no operations are combined or lost)
|
|
8
|
+
*
|
|
9
|
+
* Format: { sessionId, count: N, ops: [op1, op2, ...] }
|
|
10
|
+
* - sessionId is stored with first op of each run
|
|
11
|
+
* - count tracks consecutive ops from same agent
|
|
12
|
+
* - ops are stored as-is (no merging/combining)
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import type { CRDTMessage } from '@vuer-ai/vuer-rtc';
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* A run-length encoded journal entry
|
|
19
|
+
*/
|
|
20
|
+
export interface RLEJournalEntry {
|
|
21
|
+
sessionId: string; // Agent/session that performed this run
|
|
22
|
+
count: number; // Number of consecutive operations from this session
|
|
23
|
+
lamportTime: number; // Start lamport time of this run
|
|
24
|
+
endLamportTime: number; // End lamport time (start + count - 1)
|
|
25
|
+
ops: any[]; // Operations in this run (one per index)
|
|
26
|
+
timestamp: number; // Wall-clock time
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Encode consecutive operations from the same sessionId using RLE.
|
|
31
|
+
* Returns a list of RLE entries where consecutive ops from same session are grouped.
|
|
32
|
+
*
|
|
33
|
+
* Example:
|
|
34
|
+
* Input: [msg1(sid=A), msg2(sid=A), msg3(sid=B), msg4(sid=B), msg5(sid=B)]
|
|
35
|
+
* Output: [
|
|
36
|
+
* { sessionId: A, count: 2, ops: [msg1.ops[0], msg2.ops[0]], ... },
|
|
37
|
+
* { sessionId: B, count: 3, ops: [msg3.ops[0], msg4.ops[0], msg5.ops[0]], ... }
|
|
38
|
+
* ]
|
|
39
|
+
*/
|
|
40
|
+
export function encodeRLE(messages: CRDTMessage[]): RLEJournalEntry[] {
|
|
41
|
+
if (messages.length === 0) return [];
|
|
42
|
+
|
|
43
|
+
const encoded: RLEJournalEntry[] = [];
|
|
44
|
+
let currentRun: RLEJournalEntry | null = null;
|
|
45
|
+
|
|
46
|
+
for (const msg of messages) {
|
|
47
|
+
if (!currentRun || currentRun.sessionId !== msg.sessionId) {
|
|
48
|
+
// Start new run
|
|
49
|
+
if (currentRun) {
|
|
50
|
+
encoded.push(currentRun);
|
|
51
|
+
}
|
|
52
|
+
currentRun = {
|
|
53
|
+
sessionId: msg.sessionId,
|
|
54
|
+
count: 1,
|
|
55
|
+
lamportTime: msg.lamportTime,
|
|
56
|
+
endLamportTime: msg.lamportTime,
|
|
57
|
+
ops: [...msg.ops],
|
|
58
|
+
timestamp: msg.timestamp,
|
|
59
|
+
};
|
|
60
|
+
} else {
|
|
61
|
+
// Extend current run
|
|
62
|
+
currentRun.count++;
|
|
63
|
+
currentRun.endLamportTime = msg.lamportTime;
|
|
64
|
+
currentRun.ops.push(...msg.ops);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Don't forget the last run
|
|
69
|
+
if (currentRun) {
|
|
70
|
+
encoded.push(currentRun);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return encoded;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Decode RLE-encoded entries back into original messages.
|
|
78
|
+
* Reconstructs each original CRDTMessage from the RLE entry's operation stream.
|
|
79
|
+
*
|
|
80
|
+
* Note: We assume operations are stored sequentially in the RLE entry,
|
|
81
|
+
* with each original message's operations grouped together.
|
|
82
|
+
* This requires coordination with the encoding to track message boundaries.
|
|
83
|
+
*/
|
|
84
|
+
export function decodeRLE(
|
|
85
|
+
entries: RLEJournalEntry[],
|
|
86
|
+
opCountPerMessage: number[] // Array indicating how many ops per message
|
|
87
|
+
): CRDTMessage[] {
|
|
88
|
+
const messages: CRDTMessage[] = [];
|
|
89
|
+
let globalOpIndex = 0;
|
|
90
|
+
let messageIdx = 0;
|
|
91
|
+
|
|
92
|
+
for (const entry of entries) {
|
|
93
|
+
let lamportTime = entry.lamportTime;
|
|
94
|
+
let entryOpIndex = 0;
|
|
95
|
+
|
|
96
|
+
for (let i = 0; i < entry.count; i++) {
|
|
97
|
+
// Get operations for this message using the metadata array
|
|
98
|
+
const opCount = opCountPerMessage[messageIdx] || 1;
|
|
99
|
+
const opsForMsg = entry.ops.slice(entryOpIndex, entryOpIndex + opCount);
|
|
100
|
+
entryOpIndex += opCount;
|
|
101
|
+
globalOpIndex += opCount;
|
|
102
|
+
messageIdx++;
|
|
103
|
+
|
|
104
|
+
// Reconstruct message
|
|
105
|
+
messages.push({
|
|
106
|
+
id: `msg-${lamportTime}`, // Note: Original IDs are lost; this is a limitation
|
|
107
|
+
sessionId: entry.sessionId,
|
|
108
|
+
clock: {}, // Vector clock info is lost in current RLE format
|
|
109
|
+
lamportTime,
|
|
110
|
+
timestamp: entry.timestamp,
|
|
111
|
+
ops: opsForMsg,
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
lamportTime++;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return messages;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Encode messages with metadata tracking for full reconstruction.
|
|
123
|
+
* This enhanced version preserves more information to allow perfect round-tripping.
|
|
124
|
+
*
|
|
125
|
+
* Returns both RLE entries and metadata needed for decoding.
|
|
126
|
+
*/
|
|
127
|
+
export interface RLEEncodedWithMetadata {
|
|
128
|
+
entries: RLEJournalEntry[];
|
|
129
|
+
// For each original message, track how many ops it had
|
|
130
|
+
messageOpsCount: number[];
|
|
131
|
+
// Map lamport time to original message ID for deduplication
|
|
132
|
+
messageIds: Record<number, string>;
|
|
133
|
+
// Vector clocks per message
|
|
134
|
+
vectorClocks: Record<number, Record<string, number>>;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export function encodeRLEWithMetadata(
|
|
138
|
+
messages: CRDTMessage[]
|
|
139
|
+
): RLEEncodedWithMetadata {
|
|
140
|
+
const encoded = encodeRLE(messages);
|
|
141
|
+
const messageOpsCount = messages.map((m) => m.ops.length);
|
|
142
|
+
const messageIds: Record<number, string> = {};
|
|
143
|
+
const vectorClocks: Record<number, Record<string, number>> = {};
|
|
144
|
+
|
|
145
|
+
for (const msg of messages) {
|
|
146
|
+
messageIds[msg.lamportTime] = msg.id;
|
|
147
|
+
vectorClocks[msg.lamportTime] = msg.clock;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
entries: encoded,
|
|
152
|
+
messageOpsCount,
|
|
153
|
+
messageIds,
|
|
154
|
+
vectorClocks,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Decode with full metadata recovery for perfect round-tripping.
|
|
160
|
+
*/
|
|
161
|
+
export function decodeRLEWithMetadata(
|
|
162
|
+
encoded: RLEEncodedWithMetadata
|
|
163
|
+
): CRDTMessage[] {
|
|
164
|
+
const messages: CRDTMessage[] = [];
|
|
165
|
+
let messageIdx = 0;
|
|
166
|
+
|
|
167
|
+
for (const entry of encoded.entries) {
|
|
168
|
+
let lamportTime = entry.lamportTime;
|
|
169
|
+
let entryOpIndex = 0;
|
|
170
|
+
|
|
171
|
+
for (let i = 0; i < entry.count; i++) {
|
|
172
|
+
const opCount = encoded.messageOpsCount[messageIdx] || 1;
|
|
173
|
+
const opsForMsg = entry.ops.slice(entryOpIndex, entryOpIndex + opCount);
|
|
174
|
+
entryOpIndex += opCount;
|
|
175
|
+
messageIdx++;
|
|
176
|
+
|
|
177
|
+
messages.push({
|
|
178
|
+
id: encoded.messageIds[lamportTime] || `msg-${lamportTime}`,
|
|
179
|
+
sessionId: entry.sessionId,
|
|
180
|
+
clock: encoded.vectorClocks[lamportTime] || {},
|
|
181
|
+
lamportTime,
|
|
182
|
+
timestamp: entry.timestamp,
|
|
183
|
+
ops: opsForMsg,
|
|
184
|
+
});
|
|
185
|
+
|
|
186
|
+
lamportTime++;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
return messages;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Calculate compression ratio: how much space is saved.
|
|
195
|
+
* Returns { original: bytes, encoded: bytes, ratio: 0.0-1.0 }
|
|
196
|
+
*/
|
|
197
|
+
export function calculateCompressionRatio(
|
|
198
|
+
original: CRDTMessage[],
|
|
199
|
+
encoded: RLEJournalEntry[]
|
|
200
|
+
): { original: number; encoded: number; ratio: number } {
|
|
201
|
+
const originalBytes = JSON.stringify(original).length;
|
|
202
|
+
const encodedBytes = JSON.stringify(encoded).length;
|
|
203
|
+
const ratio = (originalBytes - encodedBytes) / originalBytes;
|
|
204
|
+
|
|
205
|
+
return {
|
|
206
|
+
original: originalBytes,
|
|
207
|
+
encoded: encodedBytes,
|
|
208
|
+
ratio,
|
|
209
|
+
};
|
|
210
|
+
}
|