articulated 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/id_list.ts CHANGED
@@ -1,13 +1,100 @@
1
- import { ElementId, equalsId } from "./id";
1
+ import { SparseIndices } from "sparse-array-rled";
2
+ import { ElementId } from "./id";
2
3
  import { SavedIdList } from "./saved_id_list";
3
4
 
4
- interface ListElement {
5
- id: ElementId;
6
- isDeleted: boolean;
5
+ // Most exports are only for tests. See index.ts for public exports.
6
+
7
+ /*
8
+ IdList implementation using a modified B+Tree.
9
+
10
+ See tests/id_list_simple.ts for a simpler implementation with the same API but
11
+ impractical efficiency (linear time ops; one object in memory per id).
12
+ The fuzz tests compare that implementation to this one.
13
+
14
+ The B+Tree is unusual in that it has no keys, only values (= ids). The order on the values
15
+ is determined "by fiat" using insertAfter/insertBefore instead of using sorted keys.
16
+
17
+ The leaves in the B+Tree are not individual ids; instead, each leaf is a compressed representation of a groups of ids
18
+ with the same bunchId and sequential counters. Each leaf also contains a `present`
19
+ field to track which of its ids are deleted.
20
+ (Unlike in a SavedIdList, we do not separate adjacent ids with different isDeleted statuses.)
21
+
22
+ Note that it is possible for adjacent leaves to be mergeable (i.e., they could be one leaf) but not merged.
23
+ This happens if you insert the middle ids later (e.g., 0, 2, 1).
24
+ It has a slight perf penalty that goes away once you reload.
25
+ Note that save() needs to work around this possibility - see pushSaveItem.
26
+
27
+ The B+Tree also stores two statistics about each subtree: its size (# of present ids)
28
+ and its knownSize (# of known ids). These allow indexed access in log time.
29
+
30
+ Unlike some B+Trees, we do not store a linked list of leaves. Iteration instead uses a depth-first search.
31
+ */
32
+
33
+ export interface LeafNode {
34
+ readonly bunchId: string;
35
+ readonly startCounter: number;
36
+ readonly count: number;
37
+ /**
38
+ * The present counter values in this leaf node.
39
+ *
40
+ * Note that it is indexed by counter, not by (counter - this.startCounter).
41
+ */
42
+ readonly present: SparseIndices;
43
+ }
44
+
45
+ /**
46
+ * An inner node with inner-node children.
47
+ */
48
+ export class InnerNodeInner {
49
+ readonly size: number;
50
+ readonly knownSize: number;
51
+
52
+ constructor(readonly children: readonly InnerNode[]) {
53
+ let size = 0;
54
+ let knownSize = 0;
55
+ for (const child of children) {
56
+ size += child.size;
57
+ knownSize += child.knownSize;
58
+ }
59
+ this.size = size;
60
+ this.knownSize = knownSize;
61
+ }
62
+ }
63
+
64
+ /**
65
+ * An inner node with leaf children.
66
+ */
67
+ export class InnerNodeLeaf {
68
+ readonly size: number;
69
+ readonly knownSize: number;
70
+
71
+ constructor(readonly children: readonly LeafNode[]) {
72
+ let size = 0;
73
+ let knownSize = 0;
74
+ for (const child of children) {
75
+ size += child.present.count();
76
+ knownSize += child.count;
77
+ }
78
+ this.size = size;
79
+ this.knownSize = knownSize;
80
+ }
7
81
  }
8
82
 
83
+ export type InnerNode = InnerNodeInner | InnerNodeLeaf;
84
+
85
+ /**
86
+ * The B+Tree's branching factor, i.e., the max number of children of a node.
87
+ *
88
+ * Note that our B+Tree has no keys - in particular, no keys in internal nodes.
89
+ *
90
+ * Wiki B+Tree: "B+ trees can also be used for data stored in RAM.
91
+ * In this case a reasonable choice for block size would be the size of [the] processor's cache line."
92
+ * (64 byte cache line) / (8 byte pointer) = 8.
93
+ */
94
+ export const M = 8;
95
+
9
96
  /**
10
- * A list of ElementIds.
97
+ * A list of ElementIds, as a persistent (immutable) data structure.
11
98
  *
12
99
  * An IdList helps you assign a unique immutable id to each element of a list, such
13
100
  * as a todo-list or a text document (= list of characters). That way, you can keep track
@@ -16,11 +103,14 @@ interface ListElement {
16
103
  *
17
104
  * Any id that has been inserted into an IdList remains **known** to that list indefinitely,
18
105
  * allowing you to reference it in insertAfter/insertBefore operations. Calling {@link delete}
19
- * merely marks an id as deleted (not present); it remains in memory as a "tombstone".
106
+ * merely marks an id as deleted (= not present); a deleted id does not count towards the length of the list or index-based accessors, but it does remain in memory as a "tombstone".
20
107
  * This is useful in collaborative settings, since another user might instruct you to
21
108
  * call `insertAfter(before, newId)` when you have already deleted `before` locally.
22
- * If that is not a concern and you truly want to make an id no longer known, instead
23
- * call {@link uninsert}.
109
+ *
110
+ * To enable easy and efficient rollbacks, such as in a
111
+ * [server reconciliation](https://mattweidner.com/2024/06/04/server-architectures.html#1-server-reconciliation)
112
+ * architecture, IdList is a persistent (immutable) data structure. Mutating methods
113
+ * return a new IdList, sharing memory with the old IdList where possible.
24
114
  *
25
115
  * See {@link ElementId} for advice on generating ElementIds. IdList is optimized for
26
116
  * the case where sequential ElementIds often have the same bunchId and sequential counters.
@@ -28,30 +118,53 @@ interface ListElement {
28
118
  * cause such ids to be separated, partially deleted, or even reordered.
29
119
  */
30
120
  export class IdList {
31
- private readonly state: ListElement[];
32
- private _length: number;
121
+ /**
122
+ * Internal - construct an IdList using a static method (e.g. `IdList.new`).
123
+ */
124
+ private constructor(private readonly root: InnerNode) {}
33
125
 
34
126
  /**
35
127
  * Constructs an empty list.
36
128
  *
37
- * To begin with a non-empty list, use {@link IdList.from} or {@link IdList.fromIds}.
129
+ * To begin with a non-empty list, use {@link IdList.from}, {@link IdList.fromIds},
130
+ * or {@link IdList.load}.
38
131
  */
39
- constructor() {
40
- this.state = [];
41
- this._length = 0;
132
+ static new() {
133
+ return new this(new InnerNodeLeaf([]));
42
134
  }
43
135
 
44
136
  /**
45
137
  * Constructs a list with the given known ids and their isDeleted status, in list order.
46
138
  */
47
- static from(state: Iterable<{ id: ElementId; isDeleted: boolean }>) {
48
- const list = new IdList();
49
- for (const { id, isDeleted } of state) {
50
- // Clone to prevent aliasing.
51
- list.state.push({ id, isDeleted });
52
- if (!isDeleted) list._length++;
139
+ static from(
140
+ knownIds: Iterable<{ id: ElementId; isDeleted: boolean }>
141
+ ): IdList {
142
+ // Convert knownIds to a saved state and load that.
143
+ const savedState: SavedIdList = [];
144
+
145
+ for (const { id, isDeleted } of knownIds) {
146
+ if (savedState.length !== 0) {
147
+ const current = savedState.at(-1)!;
148
+ if (
149
+ id.bunchId === current.bunchId &&
150
+ id.counter === current.startCounter + current.count &&
151
+ isDeleted === current.isDeleted
152
+ ) {
153
+ // @ts-expect-error Mutating for convenience; no aliasing to worry about.
154
+ current.count++;
155
+ continue;
156
+ }
157
+ }
158
+
159
+ savedState.push({
160
+ bunchId: id.bunchId,
161
+ startCounter: id.counter,
162
+ count: 1,
163
+ isDeleted,
164
+ });
53
165
  }
54
- return list;
166
+
167
+ return IdList.load(savedState);
55
168
  }
56
169
 
57
170
  /**
@@ -61,17 +174,18 @@ export class IdList {
61
174
  * specify known-but-deleted ids. That way, you can reference the known-but-deleted ids
62
175
  * in future insertAfter/insertBefore operations.
63
176
  */
64
- static fromIds(ids: Iterable<ElementId>) {
65
- const list = new IdList();
66
- for (const id of ids) {
67
- list.state.push({ id, isDeleted: false });
68
- list._length++;
69
- }
70
- return list;
177
+ static fromIds(ids: Iterable<ElementId>): IdList {
178
+ return this.from(
179
+ (function* () {
180
+ for (const id of ids) yield { id, isDeleted: false };
181
+ })()
182
+ );
71
183
  }
72
184
 
73
185
  /**
74
186
  * Inserts `newId` immediately after the given id (`before`), which may be deleted.
187
+ * A new IdList is returned and the current list remains unchanged.
188
+ *
75
189
  * All ids to the right of `before` are shifted one index to the right, in the manner
76
190
  * of [Array.splice](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/splice).
77
191
  *
@@ -81,30 +195,108 @@ export class IdList {
81
195
  * @param count Provide this to bulk-insert `count` ids from left-to-right,
82
196
  * starting with newId and proceeding with the same bunchId and sequential counters.
83
197
  * @throws If `before` is not known.
84
- * @throws If `newId` is already known.
198
+ * @throws If any inserted id is already known.
85
199
  */
86
- insertAfter(before: ElementId | null, newId: ElementId, count = 1) {
87
- if (this.isKnown(newId)) {
88
- throw new Error("newId is already known");
200
+ insertAfter(before: ElementId | null, newId: ElementId, count = 1): IdList {
201
+ if (!(Number.isSafeInteger(newId.counter) && newId.counter >= 0)) {
202
+ throw new Error(`Invalid counter: ${newId.counter}`);
203
+ }
204
+ if (!(Number.isSafeInteger(count) && count >= 0)) {
205
+ throw new Error(`Invalid count: ${count}`);
206
+ }
207
+ if (count !== 0 && isAnyKnown(newId, count, this.root)) {
208
+ throw new Error("An inserted id is already known");
89
209
  }
90
210
 
91
- let index: number;
92
211
  if (before === null) {
93
- // -1 so index + 1 is 0: insert at the beginning of the list.
94
- index = -1;
95
- } else {
96
- index = this.state.findIndex((elt) => equalsId(elt.id, before));
97
- if (index === -1) {
98
- throw new Error("before is not known");
212
+ if (count === 0) return this;
213
+
214
+ if (this.root.children.length === 0) {
215
+ // Insert the first leaf as a child of root.
216
+ const present = SparseIndices.new();
217
+ present.set(newId.counter, count);
218
+ return new IdList(
219
+ new InnerNodeLeaf([
220
+ {
221
+ bunchId: newId.bunchId,
222
+ startCounter: newId.counter,
223
+ count,
224
+ present,
225
+ },
226
+ ])
227
+ );
228
+ } else {
229
+ // Insert before the first known id.
230
+ return this.insertBefore(firstId(this.root), newId, count);
99
231
  }
100
232
  }
101
233
 
102
- this.state.splice(index + 1, 0, ...expandElements(newId, false, count));
103
- this._length += count;
234
+ const located = locate(before, this.root);
235
+ if (located === null) {
236
+ throw new Error("before is not known");
237
+ }
238
+ if (count === 0) return this;
239
+ const leaf = located[0].node;
240
+
241
+ if (before.counter === leaf.startCounter + leaf.count - 1) {
242
+ // before is leaf's last id: we insert directly after leaf.
243
+ if (
244
+ leaf.bunchId === newId.bunchId &&
245
+ leaf.startCounter + leaf.count === newId.counter
246
+ ) {
247
+ // Extending leaf forwards.
248
+ const present = leaf.present.clone();
249
+ present.set(newId.counter, count);
250
+ return this.replaceLeaf(located, {
251
+ ...leaf,
252
+ count: leaf.count + count,
253
+ present,
254
+ });
255
+ } else {
256
+ const present = SparseIndices.new();
257
+ present.set(newId.counter, count);
258
+ return this.replaceLeaf(located, leaf, {
259
+ bunchId: newId.bunchId,
260
+ startCounter: newId.counter,
261
+ count,
262
+ present,
263
+ });
264
+ }
265
+ } else {
266
+ // before is not leaf's last id: we need to split leaf and insert there.
267
+ const newPresent = SparseIndices.new();
268
+ newPresent.set(newId.counter, count);
269
+ const [leftPresent, rightPresent] = splitPresent(
270
+ leaf.present,
271
+ before.counter + 1
272
+ );
273
+ return this.replaceLeaf(
274
+ located,
275
+ {
276
+ ...leaf,
277
+ count: before.counter + 1 - leaf.startCounter,
278
+ present: leftPresent,
279
+ },
280
+ {
281
+ bunchId: newId.bunchId,
282
+ startCounter: newId.counter,
283
+ count,
284
+ present: newPresent,
285
+ },
286
+ {
287
+ ...leaf,
288
+ startCounter: before.counter + 1,
289
+ count: leaf.count - (before.counter + 1 - leaf.startCounter),
290
+ present: rightPresent,
291
+ }
292
+ );
293
+ }
104
294
  }
105
295
 
106
296
  /**
107
297
  * Inserts `newId` immediately before the given id (`after`), which may be deleted.
298
+ * A new IdList is returned and the current list remains unchanged.
299
+ *
108
300
  * All ids to the right of `after`, plus `after` itself, are shifted one index to the right, in the manner
109
301
  * of [Array.splice](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/splice).
110
302
  *
@@ -117,80 +309,153 @@ export class IdList {
117
309
  * @throws If `after` is not known.
118
310
  * @throws If `newId` is already known.
119
311
  */
120
- insertBefore(after: ElementId | null, newId: ElementId, count = 1) {
121
- if (this.isKnown(newId)) {
122
- throw new Error("newId is already known");
312
+ insertBefore(after: ElementId | null, newId: ElementId, count = 1): IdList {
313
+ if (!(Number.isSafeInteger(newId.counter) && newId.counter >= 0)) {
314
+ throw new Error(`Invalid counter: ${newId.counter}`);
315
+ }
316
+ if (!(Number.isSafeInteger(count) && count >= 0)) {
317
+ throw new Error(`Invalid count: ${count}`);
318
+ }
319
+ if (count !== 0 && isAnyKnown(newId, count, this.root)) {
320
+ throw new Error("An inserted id is already known");
123
321
  }
124
322
 
125
- let index: number;
126
323
  if (after === null) {
127
- index = this.state.length;
128
- } else {
129
- index = this.state.findIndex((elt) => equalsId(elt.id, after));
130
- if (index === -1) {
131
- throw new Error("after is not known");
132
- }
324
+ if (count === 0) return this;
325
+
326
+ // Insert after the last known id, or at the beginning if empty.
327
+ return this.insertAfter(
328
+ this.root.knownSize === 0 ? null : lastId(this.root),
329
+ newId,
330
+ count
331
+ );
133
332
  }
134
333
 
135
- // We insert the bunch from left-to-right even though it's insertBefore.
136
- this.state.splice(index, 0, ...expandElements(newId, false, count));
137
- this._length += count;
138
- }
334
+ const located = locate(after, this.root);
335
+ if (located === null) {
336
+ throw new Error("after is not known");
337
+ }
338
+ if (count === 0) return this;
339
+ const leaf = located[0].node;
139
340
 
140
- /**
141
- * Un-inserts `id` from the list, making it no longer known or present in this list.
142
- *
143
- * Typically, you instead want to call {@link delete}, which marks `id` as deleted while
144
- * it remains known. That way, you can reference `id` in future insertAfter/insertBefore
145
- * operations, including ones sent concurrently by other devices.
146
- *
147
- * If `id` is already not known, this method does nothing.
148
- */
149
- uninsert(id: ElementId) {
150
- const index = this.state.findIndex((elt) => equalsId(elt.id, id));
151
- if (index !== -1) {
152
- this.state.splice(index, 1);
153
- this._length--;
341
+ if (after.counter === leaf.startCounter) {
342
+ // after is leaf's first id: we insert directly before leaf.
343
+ if (
344
+ leaf.bunchId === newId.bunchId &&
345
+ leaf.startCounter === newId.counter + count
346
+ ) {
347
+ // Extending leaf backwards.
348
+ const present = leaf.present.clone();
349
+ present.set(newId.counter, count);
350
+ return this.replaceLeaf(located, {
351
+ ...leaf,
352
+ startCounter: leaf.startCounter - count,
353
+ count: leaf.count + count,
354
+ present,
355
+ });
356
+ } else {
357
+ const present = SparseIndices.new();
358
+ present.set(newId.counter, count);
359
+ return this.replaceLeaf(
360
+ located,
361
+ {
362
+ bunchId: newId.bunchId,
363
+ startCounter: newId.counter,
364
+ count,
365
+ present,
366
+ },
367
+ leaf
368
+ );
369
+ }
370
+ } else {
371
+ // after is not leaf's first id: we need to split leaf and insert there.
372
+ const present = SparseIndices.new();
373
+ present.set(newId.counter, count);
374
+ const [leftPresent, rightPresent] = splitPresent(
375
+ leaf.present,
376
+ after.counter
377
+ );
378
+ return this.replaceLeaf(
379
+ located,
380
+ {
381
+ ...leaf,
382
+ count: after.counter - leaf.startCounter,
383
+ present: leftPresent,
384
+ },
385
+ {
386
+ bunchId: newId.bunchId,
387
+ startCounter: newId.counter,
388
+ count,
389
+ present,
390
+ },
391
+ {
392
+ ...leaf,
393
+ startCounter: after.counter,
394
+ count: leaf.count - (after.counter - leaf.startCounter),
395
+ present: rightPresent,
396
+ }
397
+ );
154
398
  }
155
399
  }
156
400
 
157
401
  /**
158
- * Marks `id` as deleted from this list. The id remains known (a "tombstone").
402
+ * Marks `id` as deleted from this list.
403
+ * A new IdList is returned and the current list remains unchanged.
159
404
  *
405
+ * Once deleted, `id` does not count towards the length of the list or index-based accessors.
406
+ * However, it remains known (a "tombstone").
160
407
  * Because `id` is still known, you can reference it in future insertAfter/insertBefore
161
408
  * operations, including ones sent concurrently by other devices.
162
- * However, it does occupy space in memory (compressed in common cases).
163
- *
164
- * For an exact inverse to `insertAfter(-, id)` or `insertBefore(-, id)`
165
- * that makes `id` no longer known, see {@link uninsert}.
409
+ * This does have a memory cost, but it is compressed in common cases.
166
410
  *
167
- * If `id` is already deleted or not known, this method does nothing.
411
+ * If `id` is already deleted or is not known, this method does nothing.
168
412
  */
169
413
  delete(id: ElementId) {
170
- const elt = this.state.find((elt) => equalsId(elt.id, id));
171
- if (elt !== undefined && !elt.isDeleted) {
172
- elt.isDeleted = true;
173
- this._length--;
174
- }
414
+ const located = locate(id, this.root);
415
+ if (located === null) return this;
416
+
417
+ const leaf = located[0].node;
418
+ if (!leaf.present.has(id.counter)) return this;
419
+
420
+ const newPresent = leaf.present.clone();
421
+ newPresent.delete(id.counter);
422
+
423
+ return this.replaceLeaf(located, { ...leaf, present: newPresent });
175
424
  }
176
425
 
177
426
  /**
178
427
  * Un-marks `id` as deleted from this list, making it present again.
179
- * This is an exact inverse to {@link delete}.
428
+ * A new IdList is returned and the current list remains unchanged.
429
+ *
430
+ * This method is an exact inverse to {@link delete}.
180
431
  *
181
432
  * If `id` is already present, this method does nothing.
182
433
  *
183
434
  * @throws If `id` is not known.
184
435
  */
185
436
  undelete(id: ElementId) {
186
- const elt = this.state.find((elt) => equalsId(elt.id, id));
187
- if (elt === undefined) {
437
+ const located = locate(id, this.root);
438
+ if (located === null) {
188
439
  throw new Error("id is not known");
189
440
  }
190
- if (elt.isDeleted) {
191
- elt.isDeleted = false;
192
- this._length++;
193
- }
441
+
442
+ const leaf = located[0].node;
443
+ if (leaf.present.has(id.counter)) return this;
444
+
445
+ const newPresent = leaf.present.clone();
446
+ newPresent.set(id.counter);
447
+
448
+ return this.replaceLeaf(located, { ...leaf, present: newPresent });
449
+ }
450
+
451
+ /**
452
+ * Replaces the leaf at the given path with newLeaves.
453
+ * Returns a proper (sufficiently balanced) B+Tree with updated sizes.
454
+ *
455
+ * newLeaves.length must be in [1, M].
456
+ */
457
+ private replaceLeaf(located: Located, ...newLeaves: LeafNode[]): IdList {
458
+ return new IdList(replaceNode(located, this.root, newLeaves, 0));
194
459
  }
195
460
 
196
461
  // Accessors
@@ -203,9 +468,9 @@ export class IdList {
203
468
  * Compare to {@link isKnown}.
204
469
  */
205
470
  has(id: ElementId): boolean {
206
- const elt = this.state.find((elt) => equalsId(elt.id, id));
207
- if (elt === undefined) return false;
208
- return !elt.isDeleted;
471
+ const located = locate(id, this.root);
472
+ if (located === null) return false;
473
+ return located[0].node.present.has(id.counter);
209
474
  }
210
475
 
211
476
  /**
@@ -214,7 +479,16 @@ export class IdList {
214
479
  * Compare to {@link has}.
215
480
  */
216
481
  isKnown(id: ElementId): boolean {
217
- return this.state.some((elt) => equalsId(elt.id, id));
482
+ return locate(id, this.root) !== null;
483
+ }
484
+
485
+ /**
486
+ * The length of the list, counting only present ids.
487
+ *
488
+ * To include known but deleted ids, use `this.knownIds.length`.
489
+ */
490
+ get length() {
491
+ return this.root.size;
218
492
  }
219
493
 
220
494
  /**
@@ -228,14 +502,36 @@ export class IdList {
228
502
  }
229
503
 
230
504
  let remaining = index;
231
- for (const elt of this.state) {
232
- if (!elt.isDeleted) {
233
- if (remaining === 0) return elt.id;
234
- remaining--;
505
+ let curParent = this.root;
506
+ // eslint-disable-next-line no-constant-condition
507
+ recurse: while (true) {
508
+ if (curParent instanceof InnerNodeInner) {
509
+ for (const child of curParent.children) {
510
+ if (remaining < child.size) {
511
+ // Recurse.
512
+ curParent = child;
513
+ continue recurse;
514
+ } else {
515
+ remaining -= child.size;
516
+ }
517
+ }
518
+ } else {
519
+ for (const child of curParent.children) {
520
+ const childSize = child.present.count();
521
+ if (remaining < childSize) {
522
+ // Found it.
523
+ return {
524
+ bunchId: child.bunchId,
525
+ counter: child.present.indexOfCount(remaining),
526
+ };
527
+ } else {
528
+ remaining -= childSize;
529
+ }
530
+ }
235
531
  }
236
- }
237
532
 
238
- throw new Error("Internal error");
533
+ throw new Error("Internal error");
534
+ }
239
535
  }
240
536
 
241
537
  /**
@@ -250,36 +546,47 @@ export class IdList {
250
546
  * @throws If `id` is not known.
251
547
  */
252
548
  indexOf(id: ElementId, bias: "none" | "left" | "right" = "none"): number {
549
+ const located = locate(id, this.root);
550
+ if (located === null) throw new Error("id is not known");
551
+
253
552
  /**
254
553
  * The number of present ids less than id.
255
554
  * Equivalently, the index id would have if present.
256
555
  */
257
556
  let index = 0;
258
- for (const elt of this.state) {
259
- if (equalsId(elt.id, id)) {
260
- // Found it.
261
- if (elt.isDeleted) {
262
- switch (bias) {
263
- case "none":
264
- return -1;
265
- case "left":
266
- return index - 1;
267
- case "right":
268
- return index;
269
- }
270
- } else return index;
557
+
558
+ // Lesser siblings of parent, grandparent, etc.
559
+ for (let i = 1; i < located.length; i++) {
560
+ const parent = (
561
+ i === located.length - 1 ? this.root : located[i + 1].node
562
+ ) as InnerNodeInner;
563
+ for (let c = 0; c < located[i].indexInParent; c++) {
564
+ index += parent.children[c].size;
271
565
  }
272
- if (!elt.isDeleted) index++;
273
566
  }
274
567
 
275
- throw new Error("id is not known");
276
- }
568
+ // Siblings of id's leaf.
569
+ const leafParent = (
570
+ located.length === 1 ? this.root : located[1].node
571
+ ) as InnerNodeLeaf;
572
+ for (let c = 0; c < located[0].indexInParent; c++) {
573
+ index += leafParent.children[c].present.count();
574
+ }
277
575
 
278
- /**
279
- * The length of the list.
280
- */
281
- get length(): number {
282
- return this._length;
576
+ // id's index within leaf.
577
+ const [count, has] = located[0].node.present._countHas(id.counter);
578
+ index += count;
579
+ if (has) return index;
580
+ else {
581
+ switch (bias) {
582
+ case "none":
583
+ return -1;
584
+ case "left":
585
+ return index - 1;
586
+ case "right":
587
+ return index;
588
+ }
589
+ }
283
590
  }
284
591
 
285
592
  // Iterators and views
@@ -287,10 +594,8 @@ export class IdList {
287
594
  /**
288
595
  * Iterates over all present ids in the list.
289
596
  */
290
- *[Symbol.iterator](): IterableIterator<ElementId> {
291
- for (const elt of this.state) {
292
- if (!elt.isDeleted) yield elt.id;
293
- }
597
+ [Symbol.iterator](): IterableIterator<ElementId> {
598
+ return iterateNode(this.root, false);
294
599
  }
295
600
 
296
601
  /**
@@ -303,26 +608,22 @@ export class IdList {
303
608
  /**
304
609
  * Iterates over all __known__ ids in the list, indicating which are deleted.
305
610
  */
306
- valuesWithDeleted(): IterableIterator<{ id: ElementId; isDeleted: boolean }> {
307
- return this.state.values();
308
- }
309
-
310
- /**
311
- * Returns an independent copy of this list, including known but deleted ids.
312
- */
313
- clone(): IdList {
314
- return IdList.from(this.state);
611
+ valuesWithIsDeleted(): IterableIterator<{
612
+ id: ElementId;
613
+ isDeleted: boolean;
614
+ }> {
615
+ return iterateNodeWithIsDeleted(this.root);
315
616
  }
316
617
 
317
618
  private _knownIds?: KnownIdView;
318
619
 
319
620
  /**
320
- * A live-updating view of this list that treats all known ids as present.
321
- * That is, it ignores isDeleted status when computing list indices or iterating.
621
+ * A view of this list that treats all known ids as present.
622
+ * That is, it ignores is-deleted status when computing list indices or iterating.
322
623
  */
323
624
  get knownIds(): KnownIdView {
324
625
  if (this._knownIds === undefined) {
325
- this._knownIds = new KnownIdView(this, this.state);
626
+ this._knownIds = new KnownIdView(this, this.root);
326
627
  }
327
628
  return this._knownIds;
328
629
  }
@@ -336,66 +637,89 @@ export class IdList {
336
637
  * See {@link SavedIdList} for a description of the save format.
337
638
  */
338
639
  save(): SavedIdList {
339
- const ans: SavedIdList = [];
640
+ const acc: SavedIdList = [];
641
+ saveNode(this.root, acc);
642
+ return acc;
643
+ }
644
+
645
+ /**
646
+ * Loads a saved state returned by {@link save}.
647
+ */
648
+ static load(savedState: SavedIdList) {
649
+ // 1. Determine the leaves.
340
650
 
341
- for (const { id, isDeleted } of this.state) {
342
- if (ans.length !== 0) {
343
- const current = ans[ans.length - 1];
651
+ const leaves: LeafNode[] = [];
652
+ for (let i = 0; i < savedState.length; i++) {
653
+ const item = savedState[i];
654
+
655
+ if (!(Number.isSafeInteger(item.count) && item.count >= 0)) {
656
+ throw new Error(`Invalid count: ${item.count}`);
657
+ }
658
+ if (
659
+ !(Number.isSafeInteger(item.startCounter) && item.startCounter >= 0)
660
+ ) {
661
+ throw new Error(`Invalid startCounter: ${item.startCounter}`);
662
+ }
663
+
664
+ if (item.count === 0) continue;
665
+
666
+ if (leaves.length !== 0) {
667
+ const lastLeaf = leaves.at(-1)!;
344
668
  if (
345
- id.bunchId === current.bunchId &&
346
- id.counter === current.startCounter + current.count &&
347
- isDeleted === current.isDeleted
669
+ item.bunchId === lastLeaf.bunchId &&
670
+ item.startCounter === lastLeaf.startCounter + lastLeaf.count
348
671
  ) {
349
- current.count++;
672
+ // Extend lastLeaf.
673
+ // Okay to mutate in-place since we haven't referenced it anywhere else yet.
674
+ // @ts-expect-error Mutate in place
675
+ lastLeaf.count += item.count;
676
+ if (!item.isDeleted) {
677
+ lastLeaf.present.set(item.startCounter, item.count);
678
+ }
350
679
  continue;
351
680
  }
352
681
  }
353
682
 
354
- ans.push({
355
- bunchId: id.bunchId,
356
- startCounter: id.counter,
357
- count: 1,
358
- isDeleted,
683
+ // If we get to here, we need a new leaf.
684
+ const present = SparseIndices.new();
685
+ if (!item.isDeleted) present.set(item.startCounter, item.count);
686
+ leaves.push({
687
+ bunchId: item.bunchId,
688
+ startCounter: item.startCounter,
689
+ count: item.count,
690
+ present,
359
691
  });
360
692
  }
361
693
 
362
- return ans;
363
- }
364
-
365
- /**
366
- * Loads a saved state returned by {@link save}, __overwriting__ the current state of this list.
367
- */
368
- load(savedState: SavedIdList) {
369
- this.state.length = 0;
370
- this._length = 0;
694
+ // 2. Create a B+Tree with the given leaves.
695
+ // We do a "direct" balanced construction that takes O(n) time, instead of inserting
696
+ // leaves one-by-one, which would take O(n log(n)) time.
371
697
 
372
- for (const { bunchId, startCounter, count, isDeleted } of savedState) {
373
- if (!(Number.isSafeInteger(count) && count >= 0)) {
374
- throw new Error(`Invalid length: ${count}`);
375
- }
698
+ if (leaves.length === 0) return IdList.new();
376
699
 
377
- for (let i = 0; i < count; i++) {
378
- this.state.push({
379
- id: { bunchId, counter: startCounter + i },
380
- isDeleted,
381
- });
382
- }
383
- if (!isDeleted) this._length += count;
384
- }
700
+ // Depth of the B+Tree (number of non-root nodes on any path from a leaf to the root).
701
+ // A fully balanced B+Tree of depth d has between [M^{d-1} + 1, M^d] leaves.
702
+ const depth =
703
+ leaves.length === 1
704
+ ? 1
705
+ : Math.ceil(Math.log(leaves.length) / Math.log(M));
706
+ return new IdList(buildTree(leaves, 0, depth));
385
707
  }
386
708
  }
387
709
 
388
710
  /**
389
- * A live-updating view of an IdList that treats all known ids as present.
390
- * That is, this class ignores the underlying list's isDeleted status when computing list indices.
711
+ * A view of an IdList that treats all known ids as present.
712
+ * That is, this class ignores the underlying list's is-deleted status when computing list indices.
713
+ * Access using {@link IdList.knownIds}.
391
714
  *
392
- * To mutate, call methods on the original IdList (`this.list`).
715
+ * Like IdList, KnownIdView is immutable. To mutate, use a mutating method on the original IdList
716
+ * and access the returned list's `knownIds`.
393
717
  */
394
718
  export class KnownIdView {
395
719
  /**
396
720
  * Internal use only. Use {@link IdList.knownIds} instead.
397
721
  */
398
- constructor(readonly list: IdList, private readonly state: ListElement[]) {}
722
+ constructor(readonly list: IdList, private readonly root: InnerNode) {}
399
723
 
400
724
  // Mutators are omitted - mutate this.list instead.
401
725
 
@@ -413,14 +737,72 @@ export class KnownIdView {
413
737
  throw new Error(`Index out of bounds: ${index} (length: ${this.length}`);
414
738
  }
415
739
 
416
- return this.state[index].id;
740
+ let remaining = index;
741
+ let curParent = this.root;
742
+ // eslint-disable-next-line no-constant-condition
743
+ recurse: while (true) {
744
+ if (curParent instanceof InnerNodeInner) {
745
+ for (const child of curParent.children) {
746
+ if (remaining < child.knownSize) {
747
+ // Recurse.
748
+ curParent = child;
749
+ continue recurse;
750
+ } else {
751
+ remaining -= child.knownSize;
752
+ }
753
+ }
754
+ } else {
755
+ for (const child of curParent.children) {
756
+ if (remaining < child.count) {
757
+ // Found it.
758
+ return {
759
+ bunchId: child.bunchId,
760
+ counter: child.startCounter + remaining,
761
+ };
762
+ } else {
763
+ remaining -= child.count;
764
+ }
765
+ }
766
+ }
767
+
768
+ throw new Error("Internal error");
769
+ }
417
770
  }
418
771
 
419
772
  /**
420
773
  * Returns the index of `id` in this view, or -1 if it is not known.
421
774
  */
422
775
  indexOf(id: ElementId): number {
423
- return this.state.findIndex((elt) => equalsId(elt.id, id));
776
+ const located = locate(id, this.root);
777
+ if (located === null) throw new Error("id is not known");
778
+
779
+ /**
780
+ * The number of present ids less than id.
781
+ * Equivalently, the index id would have if present.
782
+ */
783
+ let index = 0;
784
+
785
+ // Lesser siblings of parent, grandparent, etc.
786
+ for (let i = 1; i < located.length; i++) {
787
+ const parent = (
788
+ i === located.length - 1 ? this.root : located[i + 1].node
789
+ ) as InnerNodeInner;
790
+ for (let c = 0; c < located[i].indexInParent; c++) {
791
+ index += parent.children[c].knownSize;
792
+ }
793
+ }
794
+
795
+ // Siblings of id's leaf.
796
+ const leafParent = (
797
+ located.length === 1 ? this.root : located[1].node
798
+ ) as InnerNodeLeaf;
799
+ for (let c = 0; c < located[0].indexInParent; c++) {
800
+ const child = leafParent.children[c];
801
+ index += child.count;
802
+ }
803
+
804
+ // id's index with leaf.
805
+ return index + (id.counter - located[0].node.startCounter);
424
806
  }
425
807
 
426
808
  /**
@@ -429,7 +811,7 @@ export class KnownIdView {
429
811
  * Equivalently, the number of known ids in `this.list`.
430
812
  */
431
813
  get length(): number {
432
- return this.state.length;
814
+ return this.root.knownSize;
433
815
  }
434
816
 
435
817
  // Iterators
@@ -437,10 +819,8 @@ export class KnownIdView {
437
819
  /**
438
820
  * Iterates over all ids in this view, i.e., all known ids in `this.list`.
439
821
  */
440
- *[Symbol.iterator](): IterableIterator<ElementId> {
441
- for (const elt of this.state) {
442
- yield elt.id;
443
- }
822
+ [Symbol.iterator](): IterableIterator<ElementId> {
823
+ return iterateNode(this.root, true);
444
824
  }
445
825
 
446
826
  /**
@@ -451,21 +831,319 @@ export class KnownIdView {
451
831
  }
452
832
  }
453
833
 
454
- function expandElements(
455
- startId: ElementId,
456
- isDeleted: boolean,
457
- count: number
458
- ): ListElement[] {
459
- if (!(Number.isSafeInteger(count) && count >= 0)) {
460
- throw new Error(`Invalid count: ${count}`);
834
+ /**
835
+ * Returns the first (leftmost) known ElementId in node's subtree.
836
+ */
837
+ function firstId(node: InnerNode): ElementId {
838
+ let currentInner = node;
839
+ while (!(currentInner instanceof InnerNodeLeaf)) {
840
+ currentInner = currentInner.children[0];
841
+ }
842
+ const firstLeaf = currentInner.children[0];
843
+ return {
844
+ bunchId: firstLeaf.bunchId,
845
+ counter: firstLeaf.startCounter,
846
+ };
847
+ }
848
+
849
+ /**
850
+ * Returns the last (rightmost) known ElementId in node's subtree.
851
+ */
852
+ function lastId(node: InnerNode): ElementId {
853
+ let currentInner = node;
854
+ while (!(currentInner instanceof InnerNodeLeaf)) {
855
+ currentInner = currentInner.children.at(-1)!;
856
+ }
857
+ const lastLeaf = currentInner.children.at(-1)!;
858
+ return {
859
+ bunchId: lastLeaf.bunchId,
860
+ counter: lastLeaf.startCounter + lastLeaf.count - 1,
861
+ };
862
+ }
863
+
864
+ type Located = [
865
+ { node: LeafNode; indexInParent: number },
866
+ // Index 1 will be an InnerNodeLeaf if it exists.
867
+ ...{ node: InnerNode; indexInParent: number }[]
868
+ ];
869
+
870
+ /**
871
+ * Returns the path from id's leaf node to the root, or null if id is not found.
872
+ *
873
+ * The path contains each node and its index in its parent's node, starting with id's
874
+ * LeafNode and ending at a child of the root.
875
+ */
876
+ export function locate(id: ElementId, node: InnerNode): Located | null {
877
+ if (node instanceof InnerNodeInner) {
878
+ for (let i = 0; i < node.children.length; i++) {
879
+ const child = node.children[i];
880
+ const childLocated = locate(id, child);
881
+ if (childLocated !== null) {
882
+ childLocated.push({ node: child, indexInParent: i });
883
+ return childLocated;
884
+ }
885
+ }
886
+ } else {
887
+ for (let i = 0; i < node.children.length; i++) {
888
+ const child = node.children[i];
889
+ if (
890
+ child.bunchId === id.bunchId &&
891
+ child.startCounter <= id.counter &&
892
+ id.counter < child.startCounter + child.count
893
+ ) {
894
+ return [{ node: child, indexInParent: i }];
895
+ }
896
+ }
897
+ }
898
+ return null;
899
+ }
900
+
901
+ /**
902
+ * Returns true if any of the given bulk ids are known within node's subtree.
903
+ *
904
+ * Assumes count > 0.
905
+ */
906
+ function isAnyKnown(id: ElementId, count: number, node: InnerNode): boolean {
907
+ if (node instanceof InnerNodeInner) {
908
+ for (const child of node.children) {
909
+ if (isAnyKnown(id, count, child)) return true;
910
+ }
911
+ } else {
912
+ for (const child of node.children) {
913
+ if (child.bunchId === id.bunchId) {
914
+ // Test if there is any overlap between the child's counter range [a, b]
915
+ // and the bulk id's counter range [c, d].
916
+ const a = child.startCounter;
917
+ const b = child.startCounter + child.count - 1;
918
+ const c = id.counter;
919
+ const d = id.counter + count - 1;
920
+ if (a <= d && c <= b) return true;
921
+ }
922
+ }
923
+ }
924
+ return false;
925
+ }
926
+
927
+ /**
928
+ * Replace located[i].node with newNodes.
929
+ *
930
+ * newNodes.length must be in [1, M].
931
+ */
932
+ function replaceNode(
933
+ located: Located,
934
+ root: InnerNode,
935
+ newNodes: InnerNode[] | LeafNode[],
936
+ i: number
937
+ ): InnerNode {
938
+ const parent =
939
+ i === located.length - 1 ? root : (located[i + 1].node as InnerNode);
940
+ const indexInParent = located[i].indexInParent;
941
+ // Copy-on-write version of parent.children.splice(indexInParent, 1, ...newNodes)
942
+ const newChildren = parent.children
943
+ .slice(0, indexInParent)
944
+ .concat(newNodes, parent.children.slice(indexInParent + 1));
945
+
946
+ if (newChildren.length > M) {
947
+ // Split the parent to maintain BTree property (# children <= M).
948
+ const split = Math.ceil(newChildren.length / 2);
949
+ const newParents = [
950
+ newChildren.slice(0, split),
951
+ newChildren.slice(split),
952
+ ].map((children) =>
953
+ i === 0
954
+ ? new InnerNodeLeaf(children as LeafNode[])
955
+ : new InnerNodeInner(children as InnerNode[])
956
+ );
957
+ if (i === located.length - 1) {
958
+ // newParents replace root. We need a new root to hold them.
959
+ return new InnerNodeInner(newParents);
960
+ } else {
961
+ return replaceNode(located, root, newParents, i + 1);
962
+ }
963
+ } else {
964
+ const newParent =
965
+ i === 0
966
+ ? new InnerNodeLeaf(newChildren as LeafNode[])
967
+ : new InnerNodeInner(newChildren as InnerNode[]);
968
+ if (i === located.length - 1) {
969
+ // Replaces root.
970
+ return newParent;
971
+ } else {
972
+ return replaceNode(located, root, [newParent], i + 1);
973
+ }
974
+ }
975
+ }
976
+
977
+ /**
978
+ * Splits present into two SparseIndices at the given counter.
979
+ */
980
+ function splitPresent(
981
+ present: SparseIndices,
982
+ splitCounter: number
983
+ ): [leftPresent: SparseIndices, rightPresent: SparseIndices] {
984
+ const leftPresent = SparseIndices.new();
985
+ const rightPresent = SparseIndices.new();
986
+ const leafSlicer = present.newSlicer();
987
+ for (const [index, count] of leafSlicer.nextSlice(splitCounter)) {
988
+ leftPresent.set(index, count);
989
+ }
990
+ for (const [index, count] of leafSlicer.nextSlice(null)) {
991
+ rightPresent.set(index, count);
992
+ }
993
+ return [leftPresent, rightPresent];
994
+ }
995
+
996
+ function* iterateNode(
997
+ node: InnerNode,
998
+ includeDeleted: boolean
999
+ ): IterableIterator<ElementId> {
1000
+ if (node instanceof InnerNodeInner) {
1001
+ for (const child of node.children) {
1002
+ yield* iterateNode(child, includeDeleted);
1003
+ }
1004
+ } else {
1005
+ for (const child of node.children) {
1006
+ if (includeDeleted) {
1007
+ for (let i = 0; i < child.count; i++) {
1008
+ yield { bunchId: child.bunchId, counter: child.startCounter + i };
1009
+ }
1010
+ } else {
1011
+ for (const counter of child.present.keys()) {
1012
+ yield { bunchId: child.bunchId, counter };
1013
+ }
1014
+ }
1015
+ }
1016
+ }
1017
+ }
1018
+
1019
+ function* iterateNodeWithIsDeleted(
1020
+ node: InnerNode
1021
+ ): IterableIterator<{ id: ElementId; isDeleted: boolean }> {
1022
+ if (node instanceof InnerNodeInner) {
1023
+ for (const child of node.children) {
1024
+ yield* iterateNodeWithIsDeleted(child);
1025
+ }
1026
+ } else {
1027
+ for (const child of node.children) {
1028
+ let nextIndex = child.startCounter;
1029
+ for (const index of child.present.keys()) {
1030
+ while (nextIndex < index) {
1031
+ yield {
1032
+ id: { bunchId: child.bunchId, counter: nextIndex },
1033
+ isDeleted: true,
1034
+ };
1035
+ nextIndex++;
1036
+ }
1037
+ yield {
1038
+ id: { bunchId: child.bunchId, counter: index },
1039
+ isDeleted: false,
1040
+ };
1041
+ nextIndex++;
1042
+ }
1043
+ while (nextIndex < child.startCounter + child.count) {
1044
+ yield {
1045
+ id: { bunchId: child.bunchId, counter: nextIndex },
1046
+ isDeleted: true,
1047
+ };
1048
+ nextIndex++;
1049
+ }
1050
+ }
1051
+ }
1052
+ }
1053
+
1054
+ /**
1055
+ * Updates acc to account for node's subtree, as part of a depth-first search
1056
+ * in list order.
1057
+ */
1058
+ function saveNode(node: InnerNode, acc: SavedIdList) {
1059
+ if (node instanceof InnerNodeInner) {
1060
+ for (const child of node.children) {
1061
+ saveNode(child, acc);
1062
+ }
1063
+ } else {
1064
+ for (const child of node.children) {
1065
+ let nextIndex = child.startCounter;
1066
+ for (const [index, count] of child.present.items()) {
1067
+ if (nextIndex < index) {
1068
+ // Need a deleted item.
1069
+ pushSaveItem(acc, {
1070
+ bunchId: child.bunchId,
1071
+ startCounter: nextIndex,
1072
+ count: index - nextIndex,
1073
+ isDeleted: true,
1074
+ });
1075
+ }
1076
+ pushSaveItem(acc, {
1077
+ bunchId: child.bunchId,
1078
+ startCounter: index,
1079
+ count,
1080
+ isDeleted: false,
1081
+ });
1082
+ nextIndex = index + count;
1083
+ }
1084
+ if (nextIndex < child.startCounter + child.count) {
1085
+ pushSaveItem(acc, {
1086
+ bunchId: child.bunchId,
1087
+ startCounter: nextIndex,
1088
+ count: child.startCounter + child.count - nextIndex,
1089
+ isDeleted: true,
1090
+ });
1091
+ }
1092
+ }
1093
+ }
1094
+ }
1095
+
1096
+ /**
1097
+ * Pushes a save item onto acc, combing it with the previous item if possible.
1098
+ *
1099
+ * This function is necessary because we don't guarantee that adjacent leaves are fully merged.
1100
+ * Specifically, if you insert a bunch's ids with counter values (0, 2, 1)
1101
+ * in that order, then counter 1 will extend one of the existing leaves
1102
+ * but not merge with the other leaf.
1103
+ *
1104
+ * This situation won't appear in typical usage, and its perf penalty
1105
+ * will go away once you reload. Thus we tolerate it instead of figuring out
1106
+ * how to delete leaves from a B+Tree.
1107
+ */
1108
+ function pushSaveItem(acc: SavedIdList, item: SavedIdList[number]) {
1109
+ if (acc.length > 0) {
1110
+ const previous = acc.at(-1)!;
1111
+ if (
1112
+ previous.isDeleted === item.isDeleted &&
1113
+ previous.bunchId === item.bunchId &&
1114
+ previous.startCounter + previous.count === item.startCounter
1115
+ ) {
1116
+ // Combine items.
1117
+ // @ts-expect-error Mutating for convenience; no aliasing to worry about.
1118
+ previous.count += item.count;
1119
+ return;
1120
+ }
461
1121
  }
1122
+ acc.push(item);
1123
+ }
462
1124
 
463
- const ans: ListElement[] = [];
464
- for (let i = 0; i < count; i++) {
465
- ans.push({
466
- id: { bunchId: startId.bunchId, counter: startId.counter + i },
467
- isDeleted,
468
- });
1125
+ /**
1126
+ * Builds a tree with the given leaves. Used by IdList.load.
1127
+ *
1128
+ * In contrast to inserting the leaves one-by-one, this function balances the
1129
+ * tree, with full inner nodes (M children) whenever possible,
1130
+ * and runs in O(L) time instead of O(L log(L)).
1131
+ */
1132
+ function buildTree(
1133
+ leaves: LeafNode[],
1134
+ startIndex: number,
1135
+ depthRemaining: number
1136
+ ): InnerNode {
1137
+ if (depthRemaining === 1) {
1138
+ return new InnerNodeLeaf(leaves.slice(startIndex, startIndex + M));
1139
+ } else {
1140
+ const children: InnerNode[] = [];
1141
+ const childLeafCount = Math.pow(M, depthRemaining - 1);
1142
+ for (let i = 0; i < M; i++) {
1143
+ const childStartIndex = startIndex + i * childLeafCount;
1144
+ if (childStartIndex >= leaves.length) break;
1145
+ children.push(buildTree(leaves, childStartIndex, depthRemaining - 1));
1146
+ }
1147
+ return new InnerNodeInner(children);
469
1148
  }
470
- return ans;
471
1149
  }