articulated 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -23,22 +23,23 @@ yarn add articulated
23
23
  ```typescript
24
24
  import { IdList } from "articulated";
25
25
 
26
- // Create an empty list
27
- const list = new IdList();
26
+ // Create an empty list.
27
+ let list = IdList.new();
28
28
 
29
- // Insert a new element at the beginning
30
- list.insertAfter(null, { bunchId: "user1", counter: 0 });
29
+ // Insert a new ElementId at the beginning.
30
+ // Note: Persistent (immutable) data structure! Mutators return a new IdList.
31
+ list = list.insertAfter(null, { bunchId: "user1", counter: 0 });
31
32
 
32
- // Insert another element after the first
33
- list.insertAfter(
33
+ // Insert another ElementId after the first.
34
+ list = list.insertAfter(
34
35
  { bunchId: "user1", counter: 0 },
35
36
  { bunchId: "user1", counter: 1 }
36
37
  );
37
38
 
38
- // Delete an element (marks as deleted but keeps as known)
39
- list.delete({ bunchId: "user1", counter: 0 });
39
+ // Delete an ElementId (marks as deleted but keeps as known).
40
+ list = list.delete({ bunchId: "user1", counter: 0 });
40
41
 
41
- // Check if elements are present/known
42
+ // Check if ElementIds are present/known.
42
43
  console.log(list.has({ bunchId: "user1", counter: 0 })); // false (deleted)
43
44
  console.log(list.isKnown({ bunchId: "user1", counter: 0 })); // true (known but deleted)
44
45
  ```
@@ -50,9 +51,9 @@ console.log(list.isKnown({ bunchId: "user1", counter: 0 })); // true (known but
50
51
  An `ElementId` is a globally unique identifier for a list element, composed of:
51
52
 
52
53
  - `bunchId`: A string UUID or similar globally unique ID
53
- - `counter`: A numeric value to distinguish elements in the same bunch
54
+ - `counter`: A numeric value to distinguish ElementIds in the same bunch
54
55
 
55
- For optimal compression, when inserting multiple elements in sequence, use the same `bunchId` with sequential `counter` values.
56
+ For optimal compression, when inserting multiple ElementIds in a left-to-right sequence, use the same `bunchId` with sequential `counter` values.
56
57
 
57
58
  ```typescript
58
59
  // Example of IDs that will compress well
@@ -63,39 +64,38 @@ const id3 = { bunchId: "abc123", counter: 2 };
63
64
 
64
65
  ### IdList Operations
65
66
 
67
+ To enable easy and efficient rollbacks, such as in a [server reconciliation](https://mattweidner.com/2024/06/04/server-architectures.html#1-server-reconciliation) architecture, IdList is a persistent (immutable) data structure. Mutating methods return a new IdList, sharing memory with the old IdList where possible.
68
+
66
69
  #### Basic Operations
67
70
 
68
- - `insertAfter(before, newId)`: Insert after a specific element
69
- - `insertBefore(after, newId)`: Insert before a specific element
70
- - `delete(id)`: Mark an element as deleted (remains known)
71
- - `undelete(id)`: Restore a deleted element
71
+ - `insertAfter(before, newId): IdList`: Insert after a specific ElementId
72
+ - `insertBefore(after, newId): IdList`: Insert before a specific ElementId
73
+ - `delete(id): IdList`: Mark an ElementId as deleted (remains known)
74
+ - `undelete(id): IdList`: Restore a deleted ElementId
72
75
 
73
- #### Advanced Operations
76
+ #### Basic Accessors
74
77
 
75
- - `uninsert(id)`: Remove an element completely (no longer known)
76
- - `at(index)`: Get the element ID at a specific index
77
- - `indexOf(id, bias)`: Get the index of an element with optional bias for deleted elements
78
- - `clone()`: Create a deep copy of the list
78
+ - `at(index)`: Get the ElementId at a specific index
79
+ - `indexOf(id, bias: "none" | "left" | "right" = "none")`: Get the index of an ElementId, with optional bias for deleted-but-known ElementIds
79
80
 
80
81
  #### Bulk Operations
81
82
 
82
83
  ```typescript
83
84
  // Insert multiple sequential ids at once
84
- list.insertAfter(null, { bunchId: "user1", counter: 0 }, 5);
85
+ list = list.insertAfter(null, { bunchId: "user1", counter: 0 }, 5);
85
86
  // Inserts 5 ids with bunchId="user1" and counters 0, 1, 2, 3, 4
86
87
  ```
87
88
 
88
- ### Persistence
89
+ #### Save and load
89
90
 
90
- Save and restore the list state:
91
+ Save and load the list state in JSON form:
91
92
 
92
93
  ```typescript
93
94
  // Save list state
94
95
  const savedState = list.save();
95
96
 
96
- // Later, restore from saved state
97
- const newList = new IdList();
98
- newList.load(savedState);
97
+ // Later, load from saved state
98
+ let newList = IdList.load(savedState);
99
99
  ```
100
100
 
101
101
  ## Use Cases
@@ -104,3 +104,13 @@ newList.load(savedState);
104
104
  - Todo lists with collaborative editing
105
105
  - Any list where elements' positions change but need stable identifiers
106
106
  - Conflict-free replicated data type (CRDT) implementations
107
+
108
+ ## Internals
109
+
110
+ IdList stores its state as a modified [B+Tree](https://en.wikipedia.org/wiki/B%2B_tree), described at the top of [its source code](./src/id_list.ts). Each leaf in the B+Tree represents multiple ElementIds (sharing a bunchId and sequential counters) in a compressed way; for normal collaborative text editing, expect 10-20 ElementIds per leaf.
111
+
112
+ In terms of the number of leaves `L`, mutating an IdList with insertAfter/insertBefore/delete/undelete will only create `O(log(L))` new tree nodes, reusing the rest. However, most methods currently take `O(L)` total time because they search the whole tree for a given ElementId, which has not yet been optimized (it uses a simple depth-first search). Exception: `IdList.at(index)` takes only `O(log(L))` time.
113
+
114
+ If you want to get a sense of what IdList is or how to implement your own version, consider reading the source code for [IdListSimple](./test/id_list_simple.ts), which behaves identically to IdList. It is short (<300 SLOC) and direct, using an array and `Array.splice`. The downside is that IdListSimple does not compress ElementIds, and all of its operations take `O(# ids)` time. We use it as a known-good implementation in our fuzz tests.
115
+
116
+ <!-- TODO: related work: CRDTs, ropes, list-positions, ?? -->
@@ -3,12 +3,12 @@
3
3
  *
4
4
  * ElementIds are conceptually the same as UUIDs (or nanoids, etc.).
5
5
  * However, when a single thread generates a series of ElementIds, you are
6
- * allowed to optimize by generating a single UUID/nanoid/etc. and using that as the "bunchId"
6
+ * allowed to optimize by generating a single UUID/nanoid/etc. and using that as the `bunchId`
7
7
  * for a "bunch" of elements, with varying `counter`.
8
8
  * The resulting ElementIds compress better than a set of UUIDs, but they are
9
- * still globally unique, even if another thread/user/device generates ElementIds concurrently.
9
+ * still globally unique, even if another thread/device/user generates ElementIds concurrently.
10
10
  *
11
- * For example, if a user types a sentence from left to right, you may generate a
11
+ * For example, if a user types a sentence from left to right, you can generate a
12
12
  * single `bunchId` and assign their characters the sequential ElementIds
13
13
  * `{ bunchId, counter: 0 }, { bunchId, counter: 1 }, { bunchId, counter: 2 }, ...`.
14
14
  * An IdList will store all of these as a single object instead of
@@ -31,9 +31,6 @@ export interface ElementId {
31
31
  * IdList is optimized for this case, but it is not mandatory.
32
32
  * In particular, it is okay if future edits cause the sequential ids to be
33
33
  * separated, partially deleted, or even reordered.
34
- *
35
- * Negative integers are supported by IdList (e.g., for optimized right-to-left insertions),
36
- * though you may choose to avoid these in your application, to make serialization easier.
37
34
  */
38
35
  readonly counter: number;
39
36
  }
@@ -1 +1 @@
1
- {"version":3,"file":"id.js","sourceRoot":"","sources":["../../src/id.ts"],"names":[],"mappings":";;;AAwCA;;GAEG;AACH,SAAgB,QAAQ,CAAC,CAAY,EAAE,CAAY;IACjD,OAAO,CAAC,CAAC,OAAO,KAAK,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,KAAK,CAAC,CAAC,OAAO,CAAC;AAC5D,CAAC;AAFD,4BAEC;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,SAAgB,SAAS,CAAC,OAAkB,EAAE,KAAa;IACzD,IAAI,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC,EAAE,CAAC;QACjD,MAAM,IAAI,KAAK,CAAC,kBAAkB,KAAK,EAAE,CAAC,CAAC;IAC7C,CAAC;IAED,MAAM,GAAG,GAAgB,EAAE,CAAC;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/B,GAAG,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC,CAAC;IACvE,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAVD,8BAUC"}
1
+ {"version":3,"file":"id.js","sourceRoot":"","sources":["../../src/id.ts"],"names":[],"mappings":";;;AAqCA;;GAEG;AACH,SAAgB,QAAQ,CAAC,CAAY,EAAE,CAAY;IACjD,OAAO,CAAC,CAAC,OAAO,KAAK,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,KAAK,CAAC,CAAC,OAAO,CAAC;AAC5D,CAAC;AAFD,4BAEC;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,SAAgB,SAAS,CAAC,OAAkB,EAAE,KAAa;IACzD,IAAI,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC,EAAE,CAAC;QACjD,MAAM,IAAI,KAAK,CAAC,kBAAkB,KAAK,EAAE,CAAC,CAAC;IAC7C,CAAC;IAED,MAAM,GAAG,GAAgB,EAAE,CAAC;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/B,GAAG,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC,CAAC;IACvE,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAVD,8BAUC"}
@@ -1,11 +1,48 @@
1
+ import { SparseIndices } from "sparse-array-rled";
1
2
  import { ElementId } from "./id";
2
3
  import { SavedIdList } from "./saved_id_list";
3
- interface ListElement {
4
- id: ElementId;
5
- isDeleted: boolean;
4
+ export interface LeafNode {
5
+ readonly bunchId: string;
6
+ readonly startCounter: number;
7
+ readonly count: number;
8
+ /**
9
+ * The present counter values in this leaf node.
10
+ *
11
+ * Note that it is indexed by counter, not by (counter - this.startCounter).
12
+ */
13
+ readonly present: SparseIndices;
14
+ }
15
+ /**
16
+ * An inner node with inner-node children.
17
+ */
18
+ export declare class InnerNodeInner {
19
+ readonly children: readonly InnerNode[];
20
+ readonly size: number;
21
+ readonly knownSize: number;
22
+ constructor(children: readonly InnerNode[]);
6
23
  }
7
24
  /**
8
- * A list of ElementIds.
25
+ * An inner node with leaf children.
26
+ */
27
+ export declare class InnerNodeLeaf {
28
+ readonly children: readonly LeafNode[];
29
+ readonly size: number;
30
+ readonly knownSize: number;
31
+ constructor(children: readonly LeafNode[]);
32
+ }
33
+ export type InnerNode = InnerNodeInner | InnerNodeLeaf;
34
+ /**
35
+ * The B+Tree's branching factor, i.e., the max number of children of a node.
36
+ *
37
+ * Note that our B+Tree has no keys - in particular, no keys in internal nodes.
38
+ *
39
+ * Wiki B+Tree: "B+ trees can also be used for data stored in RAM.
40
+ * In this case a reasonable choice for block size would be the size of [the] processor's cache line."
41
+ * (64 byte cache line) / (8 byte pointer) = 8.
42
+ */
43
+ export declare const M = 8;
44
+ /**
45
+ * A list of ElementIds, as a persistent (immutable) data structure.
9
46
  *
10
47
  * An IdList helps you assign a unique immutable id to each element of a list, such
11
48
  * as a todo-list or a text document (= list of characters). That way, you can keep track
@@ -14,11 +51,14 @@ interface ListElement {
14
51
  *
15
52
  * Any id that has been inserted into an IdList remains **known** to that list indefinitely,
16
53
  * allowing you to reference it in insertAfter/insertBefore operations. Calling {@link delete}
17
- * merely marks an id as deleted (not present); it remains in memory as a "tombstone".
54
+ * merely marks an id as deleted (= not present); a deleted id does not count towards the length of the list or index-based accessors, but it does remain in memory as a "tombstone".
18
55
  * This is useful in collaborative settings, since another user might instruct you to
19
56
  * call `insertAfter(before, newId)` when you have already deleted `before` locally.
20
- * If that is not a concern and you truly want to make an id no longer known, instead
21
- * call {@link uninsert}.
57
+ *
58
+ * To enable easy and efficient rollbacks, such as in a
59
+ * [server reconciliation](https://mattweidner.com/2024/06/04/server-architectures.html#1-server-reconciliation)
60
+ * architecture, IdList is a persistent (immutable) data structure. Mutating methods
61
+ * return a new IdList, sharing memory with the old IdList where possible.
22
62
  *
23
63
  * See {@link ElementId} for advice on generating ElementIds. IdList is optimized for
24
64
  * the case where sequential ElementIds often have the same bunchId and sequential counters.
@@ -26,18 +66,22 @@ interface ListElement {
26
66
  * cause such ids to be separated, partially deleted, or even reordered.
27
67
  */
28
68
  export declare class IdList {
29
- private readonly state;
30
- private _length;
69
+ private readonly root;
70
+ /**
71
+ * Internal - construct an IdList using a static method (e.g. `IdList.new`).
72
+ */
73
+ private constructor();
31
74
  /**
32
75
  * Constructs an empty list.
33
76
  *
34
- * To begin with a non-empty list, use {@link IdList.from} or {@link IdList.fromIds}.
77
+ * To begin with a non-empty list, use {@link IdList.from}, {@link IdList.fromIds},
78
+ * or {@link IdList.load}.
35
79
  */
36
- constructor();
80
+ static new(): IdList;
37
81
  /**
38
82
  * Constructs a list with the given known ids and their isDeleted status, in list order.
39
83
  */
40
- static from(state: Iterable<{
84
+ static from(knownIds: Iterable<{
41
85
  id: ElementId;
42
86
  isDeleted: boolean;
43
87
  }>): IdList;
@@ -51,6 +95,8 @@ export declare class IdList {
51
95
  static fromIds(ids: Iterable<ElementId>): IdList;
52
96
  /**
53
97
  * Inserts `newId` immediately after the given id (`before`), which may be deleted.
98
+ * A new IdList is returned and the current list remains unchanged.
99
+ *
54
100
  * All ids to the right of `before` are shifted one index to the right, in the manner
55
101
  * of [Array.splice](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/splice).
56
102
  *
@@ -60,11 +106,13 @@ export declare class IdList {
60
106
  * @param count Provide this to bulk-insert `count` ids from left-to-right,
61
107
  * starting with newId and proceeding with the same bunchId and sequential counters.
62
108
  * @throws If `before` is not known.
63
- * @throws If `newId` is already known.
109
+ * @throws If any inserted id is already known.
64
110
  */
65
- insertAfter(before: ElementId | null, newId: ElementId, count?: number): void;
111
+ insertAfter(before: ElementId | null, newId: ElementId, count?: number): IdList;
66
112
  /**
67
113
  * Inserts `newId` immediately before the given id (`after`), which may be deleted.
114
+ * A new IdList is returned and the current list remains unchanged.
115
+ *
68
116
  * All ids to the right of `after`, plus `after` itself, are shifted one index to the right, in the manner
69
117
  * of [Array.splice](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/splice).
70
118
  *
@@ -77,39 +125,38 @@ export declare class IdList {
77
125
  * @throws If `after` is not known.
78
126
  * @throws If `newId` is already known.
79
127
  */
80
- insertBefore(after: ElementId | null, newId: ElementId, count?: number): void;
128
+ insertBefore(after: ElementId | null, newId: ElementId, count?: number): IdList;
81
129
  /**
82
- * Un-inserts `id` from the list, making it no longer known or present in this list.
83
- *
84
- * Typically, you instead want to call {@link delete}, which marks `id` as deleted while
85
- * it remains known. That way, you can reference `id` in future insertAfter/insertBefore
86
- * operations, including ones sent concurrently by other devices.
87
- *
88
- * If `id` is already not known, this method does nothing.
89
- */
90
- uninsert(id: ElementId): void;
91
- /**
92
- * Marks `id` as deleted from this list. The id remains known (a "tombstone").
130
+ * Marks `id` as deleted from this list.
131
+ * A new IdList is returned and the current list remains unchanged.
93
132
  *
133
+ * Once deleted, `id` does not count towards the length of the list or index-based accessors.
134
+ * However, it remains known (a "tombstone").
94
135
  * Because `id` is still known, you can reference it in future insertAfter/insertBefore
95
136
  * operations, including ones sent concurrently by other devices.
96
- * However, it does occupy space in memory (compressed in common cases).
137
+ * This does have a memory cost, but it is compressed in common cases.
97
138
  *
98
- * For an exact inverse to `insertAfter(-, id)` or `insertBefore(-, id)`
99
- * that makes `id` no longer known, see {@link uninsert}.
100
- *
101
- * If `id` is already deleted or not known, this method does nothing.
139
+ * If `id` is already deleted or is not known, this method does nothing.
102
140
  */
103
- delete(id: ElementId): void;
141
+ delete(id: ElementId): IdList;
104
142
  /**
105
143
  * Un-marks `id` as deleted from this list, making it present again.
106
- * This is an exact inverse to {@link delete}.
144
+ * A new IdList is returned and the current list remains unchanged.
145
+ *
146
+ * This method is an exact inverse to {@link delete}.
107
147
  *
108
148
  * If `id` is already present, this method does nothing.
109
149
  *
110
150
  * @throws If `id` is not known.
111
151
  */
112
- undelete(id: ElementId): void;
152
+ undelete(id: ElementId): IdList;
153
+ /**
154
+ * Replaces the leaf at the given path with newLeaves.
155
+ * Returns a proper (sufficiently balanced) B+Tree with updated sizes.
156
+ *
157
+ * newLeaves.length must be in [1, M].
158
+ */
159
+ private replaceLeaf;
113
160
  /**
114
161
  * Returns whether `id` is present in the list, i.e., it is known and not deleted.
115
162
  *
@@ -124,6 +171,12 @@ export declare class IdList {
124
171
  * Compare to {@link has}.
125
172
  */
126
173
  isKnown(id: ElementId): boolean;
174
+ /**
175
+ * The length of the list, counting only present ids.
176
+ *
177
+ * To include known but deleted ids, use `this.knownIds.length`.
178
+ */
179
+ get length(): number;
127
180
  /**
128
181
  * Returns the id at the given index in the list.
129
182
  *
@@ -142,10 +195,6 @@ export declare class IdList {
142
195
  * @throws If `id` is not known.
143
196
  */
144
197
  indexOf(id: ElementId, bias?: "none" | "left" | "right"): number;
145
- /**
146
- * The length of the list.
147
- */
148
- get length(): number;
149
198
  /**
150
199
  * Iterates over all present ids in the list.
151
200
  */
@@ -157,18 +206,14 @@ export declare class IdList {
157
206
  /**
158
207
  * Iterates over all __known__ ids in the list, indicating which are deleted.
159
208
  */
160
- valuesWithDeleted(): IterableIterator<{
209
+ valuesWithIsDeleted(): IterableIterator<{
161
210
  id: ElementId;
162
211
  isDeleted: boolean;
163
212
  }>;
164
- /**
165
- * Returns an independent copy of this list, including known but deleted ids.
166
- */
167
- clone(): IdList;
168
213
  private _knownIds?;
169
214
  /**
170
- * A live-updating view of this list that treats all known ids as present.
171
- * That is, it ignores isDeleted status when computing list indices or iterating.
215
+ * A view of this list that treats all known ids as present.
216
+ * That is, it ignores is-deleted status when computing list indices or iterating.
172
217
  */
173
218
  get knownIds(): KnownIdView;
174
219
  /**
@@ -179,23 +224,25 @@ export declare class IdList {
179
224
  */
180
225
  save(): SavedIdList;
181
226
  /**
182
- * Loads a saved state returned by {@link save}, __overwriting__ the current state of this list.
227
+ * Loads a saved state returned by {@link save}.
183
228
  */
184
- load(savedState: SavedIdList): void;
229
+ static load(savedState: SavedIdList): IdList;
185
230
  }
186
231
  /**
187
- * A live-updating view of an IdList that treats all known ids as present.
188
- * That is, this class ignores the underlying list's isDeleted status when computing list indices.
232
+ * A view of an IdList that treats all known ids as present.
233
+ * That is, this class ignores the underlying list's is-deleted status when computing list indices.
234
+ * Access using {@link IdList.knownIds}.
189
235
  *
190
- * To mutate, call methods on the original IdList (`this.list`).
236
+ * Like IdList, KnownIdView is immutable. To mutate, use a mutating method on the original IdList
237
+ * and access the returned list's `knownIds`.
191
238
  */
192
239
  export declare class KnownIdView {
193
240
  readonly list: IdList;
194
- private readonly state;
241
+ private readonly root;
195
242
  /**
196
243
  * Internal use only. Use {@link IdList.knownIds} instead.
197
244
  */
198
- constructor(list: IdList, state: ListElement[]);
245
+ constructor(list: IdList, root: InnerNode);
199
246
  /**
200
247
  * Returns the id at the given index in this view.
201
248
  *
@@ -223,4 +270,21 @@ export declare class KnownIdView {
223
270
  */
224
271
  values(): IterableIterator<ElementId>;
225
272
  }
273
+ type Located = [
274
+ {
275
+ node: LeafNode;
276
+ indexInParent: number;
277
+ },
278
+ ...{
279
+ node: InnerNode;
280
+ indexInParent: number;
281
+ }[]
282
+ ];
283
+ /**
284
+ * Returns the path from id's leaf node to the root, or null if id is not found.
285
+ *
286
+ * The path contains each node and its index in its parent's node, starting with id's
287
+ * LeafNode and ending at a child of the root.
288
+ */
289
+ export declare function locate(id: ElementId, node: InnerNode): Located | null;
226
290
  export {};