@atproto/repo 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/bench/mst.bench.ts +7 -4
  2. package/bench/repo.bench.ts +25 -16
  3. package/dist/block-map.d.ts +27 -0
  4. package/dist/data-diff.d.ts +36 -0
  5. package/dist/error.d.ts +20 -0
  6. package/dist/index.d.ts +4 -1
  7. package/dist/index.js +22870 -12456
  8. package/dist/index.js.map +4 -4
  9. package/dist/mst/diff.d.ts +4 -33
  10. package/dist/mst/mst.d.ts +73 -31
  11. package/dist/mst/util.d.ts +13 -5
  12. package/dist/parse.d.ts +16 -0
  13. package/dist/readable-repo.d.ts +23 -0
  14. package/dist/repo.d.ts +19 -31
  15. package/dist/src/block-map.d.ts +23 -0
  16. package/dist/src/blockstore/persistent-blockstore.d.ts +12 -0
  17. package/dist/src/cid-set.d.ts +14 -0
  18. package/dist/src/collection.d.ts +22 -0
  19. package/dist/src/data-diff.d.ts +34 -0
  20. package/dist/src/error.d.ts +21 -0
  21. package/dist/src/index.d.ts +7 -0
  22. package/dist/src/logger.d.ts +2 -0
  23. package/dist/src/mst/diff.d.ts +33 -0
  24. package/dist/src/mst/index.d.ts +4 -0
  25. package/dist/src/mst/mst.d.ts +106 -0
  26. package/dist/src/mst/util.d.ts +9 -0
  27. package/dist/src/mst/walker.d.ts +22 -0
  28. package/dist/src/parse.d.ts +11 -0
  29. package/dist/src/readable-repo.d.ts +25 -0
  30. package/dist/src/repo.d.ts +39 -0
  31. package/dist/src/storage/error.d.ts +22 -0
  32. package/dist/src/storage/index.d.ts +1 -0
  33. package/dist/src/storage/memory-blobstore.d.ts +1 -0
  34. package/dist/src/storage/memory-blockstore.d.ts +28 -0
  35. package/dist/src/storage/readable-blockstore.d.ts +21 -0
  36. package/dist/src/storage/repo-storage.d.ts +18 -0
  37. package/dist/src/storage/sync-storage.d.ts +15 -0
  38. package/dist/src/storage/types.d.ts +12 -0
  39. package/dist/src/storage/util.d.ts +17 -0
  40. package/dist/src/structure.d.ts +39 -0
  41. package/dist/src/sync/consumer.d.ts +19 -0
  42. package/dist/src/sync/index.d.ts +2 -0
  43. package/dist/src/sync/producer.d.ts +13 -0
  44. package/dist/src/sync/provider.d.ts +11 -0
  45. package/dist/src/types.d.ts +368 -0
  46. package/dist/src/util.d.ts +13 -0
  47. package/dist/src/verify.d.ts +5 -0
  48. package/dist/storage/index.d.ts +4 -0
  49. package/dist/storage/memory-blockstore.d.ts +29 -0
  50. package/dist/storage/readable-blockstore.d.ts +24 -0
  51. package/dist/storage/repo-storage.d.ts +19 -0
  52. package/dist/storage/sync-storage.d.ts +15 -0
  53. package/dist/storage/types.d.ts +4 -0
  54. package/dist/sync/consumer.d.ts +19 -0
  55. package/dist/sync/index.d.ts +2 -0
  56. package/dist/sync/provider.d.ts +9 -0
  57. package/dist/tsconfig.build.tsbuildinfo +1 -0
  58. package/dist/types.d.ts +137 -331
  59. package/dist/util.d.ts +35 -12
  60. package/dist/verify.d.ts +31 -4
  61. package/jest.bench.config.js +2 -1
  62. package/package.json +13 -6
  63. package/src/block-map.ts +103 -0
  64. package/src/cid-set.ts +1 -2
  65. package/src/data-diff.ts +117 -0
  66. package/src/error.ts +31 -0
  67. package/src/index.ts +4 -1
  68. package/src/mst/diff.ts +120 -90
  69. package/src/mst/mst.ts +179 -187
  70. package/src/mst/util.ts +54 -31
  71. package/src/parse.ts +44 -0
  72. package/src/readable-repo.ts +75 -0
  73. package/src/repo.ts +145 -244
  74. package/src/storage/index.ts +4 -0
  75. package/src/storage/memory-blockstore.ts +133 -0
  76. package/src/storage/readable-blockstore.ts +56 -0
  77. package/src/storage/repo-storage.ts +43 -0
  78. package/src/storage/sync-storage.ts +35 -0
  79. package/src/storage/types.ts +4 -0
  80. package/src/sync/consumer.ts +140 -0
  81. package/src/sync/index.ts +2 -0
  82. package/src/sync/provider.ts +91 -0
  83. package/src/types.ts +110 -73
  84. package/src/util.ts +258 -56
  85. package/src/verify.ts +248 -42
  86. package/tests/_util.ts +132 -97
  87. package/tests/mst.test.ts +269 -122
  88. package/tests/rebase.test.ts +37 -0
  89. package/tests/repo.test.ts +48 -50
  90. package/tests/sync/checkout.test.ts +75 -0
  91. package/tests/sync/diff.test.ts +92 -0
  92. package/tests/sync/narrow.test.ts +149 -0
  93. package/tests/util.test.ts +21 -0
  94. package/tsconfig.build.tsbuildinfo +1 -1
  95. package/tsconfig.json +2 -1
  96. package/src/blockstore/index.ts +0 -2
  97. package/src/blockstore/ipld-store.ts +0 -103
  98. package/src/blockstore/memory-blockstore.ts +0 -49
  99. package/src/sync.ts +0 -38
  100. package/tests/sync.test.ts +0 -129
  101. /package/dist/{blockstore → src/blockstore}/index.d.ts +0 -0
  102. /package/dist/{blockstore → src/blockstore}/ipld-store.d.ts +0 -0
  103. /package/dist/{blockstore → src/blockstore}/memory-blockstore.d.ts +0 -0
  104. /package/dist/{sync.d.ts → src/sync.d.ts} +0 -0
package/src/mst/mst.ts CHANGED
@@ -1,13 +1,14 @@
1
1
  import z from 'zod'
2
2
  import { CID } from 'multiformats'
3
3
 
4
- import IpldStore from '../blockstore/ipld-store'
5
- import { def, cidForData } from '@atproto/common'
6
- import { DataDiff } from './diff'
7
- import { DataStore } from '../types'
4
+ import { ReadableBlockstore } from '../storage'
5
+ import { schema as common, cidForCbor } from '@atproto/common'
8
6
  import { BlockWriter } from '@ipld/car/api'
9
7
  import * as util from './util'
10
- import MstWalker from './walker'
8
+ import BlockMap from '../block-map'
9
+ import CidSet from '../cid-set'
10
+ import { MissingBlockError, MissingBlocksError } from '../error'
11
+ import * as parse from '../parse'
11
12
 
12
13
  /**
13
14
  * This is an implementation of a Merkle Search Tree (MST)
@@ -19,6 +20,9 @@ import MstWalker from './walker'
19
20
  * This is a merkle tree, so each subtree is referred to by it's hash (CID).
20
21
  * When a leaf is changed, ever tree on the path to that leaf is changed as well,
21
22
  * thereby updating the root hash.
23
+ *
24
+ * For atproto, we use SHA-256 as the key hashing algorithm, and ~4 fanout
25
+ * (2-bits of zero per layer).
22
26
  */
23
27
 
24
28
  /**
@@ -39,74 +43,78 @@ import MstWalker from './walker'
39
43
  * Then the first will be described as `prefix: 0, key: 'bsky/posts/abcdefg'`,
40
44
  * and the second will be described as `prefix: 16, key: 'hi'.`
41
45
  */
42
- const subTreePointer = z.nullable(def.cid)
46
+ const subTreePointer = z.nullable(common.cid)
43
47
  const treeEntry = z.object({
44
- p: z.number(), // prefix count of utf-8 chars that this key shares with the prev key
45
- k: z.string(), // the rest of the key outside the shared prefix
46
- v: def.cid, // value
48
+ p: z.number(), // prefix count of ascii chars that this key shares with the prev key
49
+ k: common.bytes, // the rest of the key outside the shared prefix
50
+ v: common.cid, // value
47
51
  t: subTreePointer, // next subtree (to the right of leaf)
48
52
  })
49
- export const nodeDataDef = z.object({
53
+ const nodeData = z.object({
50
54
  l: subTreePointer, // left-most subtree
51
55
  e: z.array(treeEntry), //entries
52
56
  })
53
- export type NodeData = z.infer<typeof nodeDataDef>
57
+ export type NodeData = z.infer<typeof nodeData>
58
+
59
+ export const nodeDataDef = {
60
+ name: 'mst node',
61
+ schema: nodeData,
62
+ }
54
63
 
55
64
  export type NodeEntry = MST | Leaf
56
65
 
57
- const DEFAULT_MST_FANOUT = 16
58
- export type Fanout = 2 | 8 | 16 | 32 | 64
59
66
  export type MstOpts = {
60
67
  layer: number
61
- fanout: Fanout
62
68
  }
63
69
 
64
- export class MST implements DataStore {
65
- blockstore: IpldStore
66
- fanout: Fanout
70
+ export class MST {
71
+ storage: ReadableBlockstore
67
72
  entries: NodeEntry[] | null
68
73
  layer: number | null
69
74
  pointer: CID
70
75
  outdatedPointer = false
71
76
 
72
77
  constructor(
73
- blockstore: IpldStore,
74
- fanout: Fanout,
78
+ storage: ReadableBlockstore,
75
79
  pointer: CID,
76
80
  entries: NodeEntry[] | null,
77
81
  layer: number | null,
78
82
  ) {
79
- this.blockstore = blockstore
80
- this.fanout = fanout
83
+ this.storage = storage
81
84
  this.entries = entries
82
85
  this.layer = layer
83
86
  this.pointer = pointer
84
87
  }
85
88
 
86
89
  static async create(
87
- blockstore: IpldStore,
90
+ storage: ReadableBlockstore,
88
91
  entries: NodeEntry[] = [],
89
92
  opts?: Partial<MstOpts>,
90
93
  ): Promise<MST> {
91
94
  const pointer = await util.cidForEntries(entries)
92
- const { layer = 0, fanout = DEFAULT_MST_FANOUT } = opts || {}
93
- return new MST(blockstore, fanout, pointer, entries, layer)
95
+ const { layer = null } = opts || {}
96
+ return new MST(storage, pointer, entries, layer)
94
97
  }
95
98
 
96
99
  static async fromData(
97
- blockstore: IpldStore,
100
+ storage: ReadableBlockstore,
98
101
  data: NodeData,
99
102
  opts?: Partial<MstOpts>,
100
103
  ): Promise<MST> {
101
- const { layer = null, fanout = DEFAULT_MST_FANOUT } = opts || {}
102
- const entries = await util.deserializeNodeData(blockstore, data, opts)
103
- const pointer = await cidForData(data)
104
- return new MST(blockstore, fanout, pointer, entries, layer)
104
+ const { layer = null } = opts || {}
105
+ const entries = await util.deserializeNodeData(storage, data, opts)
106
+ const pointer = await cidForCbor(data)
107
+ return new MST(storage, pointer, entries, layer)
105
108
  }
106
109
 
107
- static load(blockstore: IpldStore, cid: CID, opts?: Partial<MstOpts>): MST {
108
- const { layer = null, fanout = DEFAULT_MST_FANOUT } = opts || {}
109
- return new MST(blockstore, fanout, cid, null, layer)
110
+ // this is really a *lazy* load, doesn't actually touch storage
111
+ static load(
112
+ storage: ReadableBlockstore,
113
+ cid: CID,
114
+ opts?: Partial<MstOpts>,
115
+ ): MST {
116
+ const { layer = null } = opts || {}
117
+ return new MST(storage, cid, null, layer)
110
118
  }
111
119
 
112
120
  // Immutability
@@ -114,13 +122,7 @@ export class MST implements DataStore {
114
122
 
115
123
  // We never mutate an MST, we just return a new MST with updated values
116
124
  async newTree(entries: NodeEntry[]): Promise<MST> {
117
- const mst = new MST(
118
- this.blockstore,
119
- this.fanout,
120
- this.pointer,
121
- entries,
122
- this.layer,
123
- )
125
+ const mst = new MST(this.storage, this.pointer, entries, this.layer)
124
126
  mst.outdatedPointer = true
125
127
  return mst
126
128
  }
@@ -132,15 +134,14 @@ export class MST implements DataStore {
132
134
  async getEntries(): Promise<NodeEntry[]> {
133
135
  if (this.entries) return [...this.entries]
134
136
  if (this.pointer) {
135
- const data = await this.blockstore.get(this.pointer, nodeDataDef)
137
+ const data = await this.storage.readObj(this.pointer, nodeDataDef)
136
138
  const firstLeaf = data.e[0]
137
139
  const layer =
138
140
  firstLeaf !== undefined
139
- ? await util.leadingZerosOnHash(firstLeaf.k, this.fanout)
141
+ ? await util.leadingZerosOnHash(firstLeaf.k)
140
142
  : undefined
141
- this.entries = await util.deserializeNodeData(this.blockstore, data, {
143
+ this.entries = await util.deserializeNodeData(this.storage, data, {
142
144
  layer,
143
- fanout: this.fanout,
144
145
  })
145
146
 
146
147
  return this.entries
@@ -178,7 +179,7 @@ export class MST implements DataStore {
178
179
  async attemptGetLayer(): Promise<number | null> {
179
180
  if (this.layer !== null) return this.layer
180
181
  const entries = await this.getEntries()
181
- let layer = await util.layerForEntries(entries, this.fanout)
182
+ let layer = await util.layerForEntries(entries)
182
183
  if (layer === null) {
183
184
  for (const entry of entries) {
184
185
  if (entry.isTree()) {
@@ -197,39 +198,29 @@ export class MST implements DataStore {
197
198
  // Core functionality
198
199
  // -------------------
199
200
 
200
- // Persist the MST to the blockstore
201
- // If the topmost tree only has one entry and it's a subtree, we can eliminate the topmost tree
202
- // However, lower trees with only one entry must be preserved
203
- async stage(): Promise<CID> {
204
- return this.stageRecurse(true)
205
- }
206
-
207
- async stageRecurse(trimTop = false): Promise<CID> {
201
+ // Return the necessary blocks to persist the MST to repo storage
202
+ async getUnstoredBlocks(): Promise<{ root: CID; blocks: BlockMap }> {
203
+ const blocks = new BlockMap()
208
204
  const pointer = await this.getPointer()
209
- const alreadyHas = await this.blockstore.has(pointer)
210
- if (alreadyHas) return pointer
205
+ const alreadyHas = await this.storage.has(pointer)
206
+ if (alreadyHas) return { root: pointer, blocks }
211
207
  const entries = await this.getEntries()
212
- if (entries.length === 1 && trimTop) {
213
- const node = entries[0]
214
- if (node.isTree()) {
215
- return node.stageRecurse(true)
216
- }
217
- }
218
208
  const data = util.serializeNodeData(entries)
219
- await this.blockstore.stage(data)
209
+ await blocks.add(data)
220
210
  for (const entry of entries) {
221
211
  if (entry.isTree()) {
222
- await entry.stageRecurse(false)
212
+ const subtree = await entry.getUnstoredBlocks()
213
+ blocks.addMap(subtree.blocks)
223
214
  }
224
215
  }
225
- return pointer
216
+ return { root: pointer, blocks: blocks }
226
217
  }
227
218
 
228
219
  // Adds a new leaf for the given key/value pair
229
220
  // Throws if a leaf with that key already exists
230
221
  async add(key: string, value: CID, knownZeros?: number): Promise<MST> {
231
- const keyZeros =
232
- knownZeros ?? (await util.leadingZerosOnHash(key, this.fanout))
222
+ util.ensureValidMstKey(key)
223
+ const keyZeros = knownZeros ?? (await util.leadingZerosOnHash(key))
233
224
  const layer = await this.getLayer()
234
225
  const newLeaf = new Leaf(key, value)
235
226
  if (keyZeros === layer) {
@@ -288,9 +279,8 @@ export class MST implements DataStore {
288
279
  if (left) updated.push(left)
289
280
  updated.push(new Leaf(key, value))
290
281
  if (right) updated.push(right)
291
- const newRoot = await MST.create(this.blockstore, updated, {
282
+ const newRoot = await MST.create(this.storage, updated, {
292
283
  layer: keyZeros,
293
- fanout: this.fanout,
294
284
  })
295
285
  newRoot.outdatedPointer = true
296
286
  return newRoot
@@ -314,6 +304,7 @@ export class MST implements DataStore {
314
304
  // Edits the value at the given key
315
305
  // Throws if the given key does not exist
316
306
  async update(key: string, value: CID): Promise<MST> {
307
+ util.ensureValidMstKey(key)
317
308
  const index = await this.findGtOrEqualLeafIndex(key)
318
309
  const found = await this.atIndex(index)
319
310
  if (found && found.isLeaf() && found.key === key) {
@@ -329,6 +320,11 @@ export class MST implements DataStore {
329
320
 
330
321
  // Deletes the value at the given key
331
322
  async delete(key: string): Promise<MST> {
323
+ const altered = await this.deleteRecurse(key)
324
+ return altered.trimTop()
325
+ }
326
+
327
+ async deleteRecurse(key: string): Promise<MST> {
332
328
  const index = await this.findGtOrEqualLeafIndex(key)
333
329
  const found = await this.atIndex(index)
334
330
  // if found, remove it on this level
@@ -349,7 +345,7 @@ export class MST implements DataStore {
349
345
  // else recurse down to find it
350
346
  const prev = await this.atIndex(index - 1)
351
347
  if (prev?.isTree()) {
352
- const subtree = await prev.delete(key)
348
+ const subtree = await prev.deleteRecurse(key)
353
349
  const subTreeEntries = await subtree.getEntries()
354
350
  if (subTreeEntries.length === 0) {
355
351
  return this.removeEntry(index - 1)
@@ -361,114 +357,6 @@ export class MST implements DataStore {
361
357
  }
362
358
  }
363
359
 
364
- // Walk two MSTs to find the semantic changes
365
- async diff(other: MST): Promise<DataDiff> {
366
- await this.getPointer()
367
- await other.getPointer()
368
- const diff = new DataDiff()
369
-
370
- const leftWalker = new MstWalker(this)
371
- const rightWalker = new MstWalker(other)
372
- while (!leftWalker.status.done || !rightWalker.status.done) {
373
- // if one walker is finished, continue walking the other & logging all nodes
374
- if (leftWalker.status.done && !rightWalker.status.done) {
375
- const node = rightWalker.status.curr
376
- if (node.isLeaf()) {
377
- diff.recordAdd(node.key, node.value)
378
- } else {
379
- diff.recordNewCid(node.pointer)
380
- }
381
- await rightWalker.advance()
382
- continue
383
- } else if (!leftWalker.status.done && rightWalker.status.done) {
384
- const node = leftWalker.status.curr
385
- if (node.isLeaf()) {
386
- diff.recordDelete(node.key, node.value)
387
- }
388
- await leftWalker.advance()
389
- continue
390
- }
391
- if (leftWalker.status.done || rightWalker.status.done) break
392
- const left = leftWalker.status.curr
393
- const right = rightWalker.status.curr
394
- if (left === null || right === null) break
395
-
396
- // if both pointers are leaves, record an update & advance both or record the lowest key and advance that pointer
397
- if (left.isLeaf() && right.isLeaf()) {
398
- if (left.key === right.key) {
399
- if (!left.value.equals(right.value)) {
400
- diff.recordUpdate(left.key, left.value, right.value)
401
- }
402
- await leftWalker.advance()
403
- await rightWalker.advance()
404
- } else if (left.key < right.key) {
405
- diff.recordDelete(left.key, left.value)
406
- await leftWalker.advance()
407
- } else {
408
- diff.recordAdd(right.key, right.value)
409
- await rightWalker.advance()
410
- }
411
- continue
412
- }
413
-
414
- // next, ensure that we're on the same layer
415
- // if one walker is at a higher layer than the other, we need to do one of two things
416
- // if the higher walker is pointed at a tree, step into that tree to try to catch up with the lower
417
- // if the higher walker is pointed at a leaf, then advance the lower walker to try to catch up the higher
418
- if (leftWalker.layer() > rightWalker.layer()) {
419
- if (left.isLeaf()) {
420
- if (right.isLeaf()) {
421
- diff.recordAdd(right.key, right.value)
422
- } else {
423
- diff.recordNewCid(right.pointer)
424
- }
425
- await rightWalker.advance()
426
- } else {
427
- await leftWalker.stepInto()
428
- }
429
- continue
430
- } else if (leftWalker.layer() < rightWalker.layer()) {
431
- if (right.isLeaf()) {
432
- if (left.isLeaf()) {
433
- diff.recordDelete(left.key, left.value)
434
- }
435
- await leftWalker.advance()
436
- } else {
437
- diff.recordNewCid(right.pointer)
438
- await rightWalker.stepInto()
439
- }
440
- continue
441
- }
442
-
443
- // if we're on the same level, and both pointers are trees, do a comparison
444
- // if they're the same, step over. if they're different, step in to find the subdiff
445
- if (left.isTree() && right.isTree()) {
446
- if (left.pointer.equals(right.pointer)) {
447
- await leftWalker.stepOver()
448
- await rightWalker.stepOver()
449
- } else {
450
- diff.recordNewCid(right.pointer)
451
- await leftWalker.stepInto()
452
- await rightWalker.stepInto()
453
- }
454
- continue
455
- }
456
-
457
- // finally, if one pointer is a tree and the other is a leaf, simply step into the tree
458
- if (left.isLeaf() && right.isTree()) {
459
- await diff.recordNewCid(right.pointer)
460
- await rightWalker.stepInto()
461
- continue
462
- } else if (left.isTree() && right.isLeaf()) {
463
- await leftWalker.stepInto()
464
- continue
465
- }
466
-
467
- throw new Error('Unidentifiable case in diff walk')
468
- }
469
- return diff
470
- }
471
-
472
360
  // Simple Operations
473
361
  // -------------------
474
362
 
@@ -543,6 +431,16 @@ export class MST implements DataStore {
543
431
  return this.newTree(update)
544
432
  }
545
433
 
434
+ // if the topmost node in the tree only points to another tree, trim the top and return the subtree
435
+ async trimTop(): Promise<MST> {
436
+ const entries = await this.getEntries()
437
+ if (entries.length === 1 && entries[0].isTree()) {
438
+ return entries[0].trimTop()
439
+ } else {
440
+ return this
441
+ }
442
+ }
443
+
546
444
  // Subtree & Splits
547
445
  // -------------------
548
446
 
@@ -604,17 +502,15 @@ export class MST implements DataStore {
604
502
 
605
503
  async createChild(): Promise<MST> {
606
504
  const layer = await this.getLayer()
607
- return MST.create(this.blockstore, [], {
505
+ return MST.create(this.storage, [], {
608
506
  layer: layer - 1,
609
- fanout: this.fanout,
610
507
  })
611
508
  }
612
509
 
613
510
  async createParent(): Promise<MST> {
614
511
  const layer = await this.getLayer()
615
- const parent = await MST.create(this.blockstore, [this], {
512
+ const parent = await MST.create(this.storage, [this], {
616
513
  layer: layer + 1,
617
- fanout: this.fanout,
618
514
  })
619
515
  parent.outdatedPointer = true
620
516
  return parent
@@ -660,7 +556,11 @@ export class MST implements DataStore {
660
556
  }
661
557
  }
662
558
 
663
- async list(count: number, after?: string, before?: string): Promise<Leaf[]> {
559
+ async list(
560
+ count = Number.MAX_SAFE_INTEGER,
561
+ after?: string,
562
+ before?: string,
563
+ ): Promise<Leaf[]> {
664
564
  const vals: Leaf[] = []
665
565
  for await (const leaf of this.walkLeavesFrom(after || '')) {
666
566
  if (leaf.key === after) continue
@@ -726,6 +626,22 @@ export class MST implements DataStore {
726
626
  return nodes
727
627
  }
728
628
 
629
+ // Walks tree & returns all cids
630
+ async allCids(): Promise<CidSet> {
631
+ const cids = new CidSet()
632
+ const entries = await this.getEntries()
633
+ for (const entry of entries) {
634
+ if (entry.isLeaf()) {
635
+ cids.add(entry.value)
636
+ } else {
637
+ const subtreeCids = await entry.allCids()
638
+ cids.addSet(subtreeCids)
639
+ }
640
+ }
641
+ cids.add(await this.getPointer())
642
+ return cids
643
+ }
644
+
729
645
  // Walks tree & returns all leaves
730
646
  async leaves() {
731
647
  const leaves: Leaf[] = []
@@ -741,17 +657,93 @@ export class MST implements DataStore {
741
657
  return leaves.length
742
658
  }
743
659
 
744
- // Sync Protocol
660
+ // Reachable tree traversal
661
+ // -------------------
745
662
 
746
- async writeToCarStream(car: BlockWriter): Promise<void> {
747
- for await (const entry of this.walk()) {
663
+ // Walk reachable branches of tree & emit nodes, consumer can bail at any point by returning false
664
+ async *walkReachable(): AsyncIterable<NodeEntry> {
665
+ yield this
666
+ const entries = await this.getEntries()
667
+ for (const entry of entries) {
748
668
  if (entry.isTree()) {
749
- const pointer = await entry.getPointer()
750
- await this.blockstore.addToCar(car, pointer)
669
+ try {
670
+ for await (const e of entry.walkReachable()) {
671
+ yield e
672
+ }
673
+ } catch (err) {
674
+ if (err instanceof MissingBlockError) {
675
+ continue
676
+ } else {
677
+ throw err
678
+ }
679
+ }
751
680
  } else {
752
- await this.blockstore.addToCar(car, entry.value)
681
+ yield entry
682
+ }
683
+ }
684
+ }
685
+
686
+ async reachableLeaves(): Promise<Leaf[]> {
687
+ const leaves: Leaf[] = []
688
+ for await (const entry of this.walkReachable()) {
689
+ if (entry.isLeaf()) leaves.push(entry)
690
+ }
691
+ return leaves
692
+ }
693
+
694
+ // Sync Protocol
695
+
696
+ async writeToCarStream(car: BlockWriter): Promise<void> {
697
+ const leaves = new CidSet()
698
+ let toFetch = new CidSet()
699
+ toFetch.add(await this.getPointer())
700
+ while (toFetch.size() > 0) {
701
+ const nextLayer = new CidSet()
702
+ const fetched = await this.storage.getBlocks(toFetch.toList())
703
+ if (fetched.missing.length > 0) {
704
+ throw new MissingBlocksError('mst node', fetched.missing)
705
+ }
706
+ for (const cid of toFetch.toList()) {
707
+ const found = await parse.getAndParseByDef(
708
+ fetched.blocks,
709
+ cid,
710
+ nodeDataDef,
711
+ )
712
+ await car.put({ cid, bytes: found.bytes })
713
+ const entries = await util.deserializeNodeData(this.storage, found.obj)
714
+
715
+ for (const entry of entries) {
716
+ if (entry.isLeaf()) {
717
+ leaves.add(entry.value)
718
+ } else {
719
+ nextLayer.add(await entry.getPointer())
720
+ }
721
+ }
753
722
  }
723
+ toFetch = nextLayer
724
+ }
725
+ const leafData = await this.storage.getBlocks(leaves.toList())
726
+ if (leafData.missing.length > 0) {
727
+ throw new MissingBlocksError('mst leaf', leafData.missing)
728
+ }
729
+
730
+ for (const leaf of leafData.blocks.entries()) {
731
+ await car.put(leaf)
732
+ }
733
+ }
734
+
735
+ async cidsForPath(key: string): Promise<CID[]> {
736
+ const cids: CID[] = [await this.getPointer()]
737
+ const index = await this.findGtOrEqualLeafIndex(key)
738
+ const found = await this.atIndex(index)
739
+ if (found && found.isLeaf() && found.key === key) {
740
+ return [...cids, found.value]
741
+ }
742
+ const prev = await this.atIndex(index - 1)
743
+ if (prev && prev.isTree()) {
744
+ return [...cids, ...(await prev.cidsForPath(key))]
754
745
  }
746
+ return cids
755
747
  }
756
748
 
757
749
  // Matching Leaf interface
package/src/mst/util.ts CHANGED
@@ -1,68 +1,60 @@
1
1
  import { CID } from 'multiformats'
2
2
  import * as uint8arrays from 'uint8arrays'
3
- import IpldStore from '../blockstore/ipld-store'
3
+ import { ReadableBlockstore } from '../storage'
4
4
  import { sha256 } from '@atproto/crypto'
5
- import { MST, Leaf, NodeEntry, NodeData, MstOpts, Fanout } from './mst'
6
- import { cidForData } from '@atproto/common'
5
+ import { MST, Leaf, NodeEntry, NodeData, MstOpts } from './mst'
6
+ import { cidForCbor } from '@atproto/common'
7
7
 
8
- type SupportedBases = 'base2' | 'base8' | 'base16' | 'base32' | 'base64'
9
-
10
- export const leadingZerosOnHash = async (
11
- key: string,
12
- fanout: Fanout,
13
- ): Promise<number> => {
14
- if ([2, 8, 16, 32, 64].indexOf(fanout) < 0) {
15
- throw new Error(`Not a valid fanout: ${fanout}`)
16
- }
17
- const base: SupportedBases = `base${fanout}`
18
- const zeroChar = uint8arrays.toString(new Uint8Array(1), base)[0]
8
+ export const leadingZerosOnHash = async (key: string | Uint8Array) => {
19
9
  const hash = await sha256(key)
20
- const encoded = uint8arrays.toString(hash, base)
21
- let count = 0
22
- for (const char of encoded) {
23
- if (char === zeroChar) {
24
- count++
10
+ let leadingZeros = 0
11
+ for (let i = 0; i < hash.length; i++) {
12
+ const byte = hash[i]
13
+ if (byte < 64) leadingZeros++
14
+ if (byte < 16) leadingZeros++
15
+ if (byte < 4) leadingZeros++
16
+ if (byte === 0) {
17
+ leadingZeros++
25
18
  } else {
26
19
  break
27
20
  }
28
21
  }
29
- return count
22
+ return leadingZeros
30
23
  }
31
24
 
32
25
  export const layerForEntries = async (
33
26
  entries: NodeEntry[],
34
- fanout: Fanout,
35
27
  ): Promise<number | null> => {
36
28
  const firstLeaf = entries.find((entry) => entry.isLeaf())
37
29
  if (!firstLeaf || firstLeaf.isTree()) return null
38
- return await leadingZerosOnHash(firstLeaf.key, fanout)
30
+ return await leadingZerosOnHash(firstLeaf.key)
39
31
  }
40
32
 
41
33
  export const deserializeNodeData = async (
42
- blockstore: IpldStore,
34
+ storage: ReadableBlockstore,
43
35
  data: NodeData,
44
36
  opts?: Partial<MstOpts>,
45
37
  ): Promise<NodeEntry[]> => {
46
- const { layer, fanout } = opts || {}
38
+ const { layer } = opts || {}
47
39
  const entries: NodeEntry[] = []
48
40
  if (data.l !== null) {
49
41
  entries.push(
50
- await MST.load(blockstore, data.l, {
42
+ await MST.load(storage, data.l, {
51
43
  layer: layer ? layer - 1 : undefined,
52
- fanout,
53
44
  }),
54
45
  )
55
46
  }
56
47
  let lastKey = ''
57
48
  for (const entry of data.e) {
58
- const key = lastKey.slice(0, entry.p) + entry.k
49
+ const keyStr = uint8arrays.toString(entry.k, 'ascii')
50
+ const key = lastKey.slice(0, entry.p) + keyStr
51
+ ensureValidMstKey(key)
59
52
  entries.push(new Leaf(key, entry.v))
60
53
  lastKey = key
61
54
  if (entry.t !== null) {
62
55
  entries.push(
63
- await MST.load(blockstore, entry.t, {
56
+ await MST.load(storage, entry.t, {
64
57
  layer: layer ? layer - 1 : undefined,
65
- fanout,
66
58
  }),
67
59
  )
68
60
  }
@@ -93,10 +85,11 @@ export const serializeNodeData = (entries: NodeEntry[]): NodeData => {
93
85
  subtree = next.pointer
94
86
  i++
95
87
  }
88
+ ensureValidMstKey(leaf.key)
96
89
  const prefixLen = countPrefixLen(lastKey, leaf.key)
97
90
  data.e.push({
98
91
  p: prefixLen,
99
- k: leaf.key.slice(prefixLen),
92
+ k: uint8arrays.fromString(leaf.key.slice(prefixLen), 'ascii'),
100
93
  v: leaf.value,
101
94
  t: subtree,
102
95
  })
@@ -118,5 +111,35 @@ export const countPrefixLen = (a: string, b: string): number => {
118
111
 
119
112
  export const cidForEntries = async (entries: NodeEntry[]): Promise<CID> => {
120
113
  const data = serializeNodeData(entries)
121
- return cidForData(data)
114
+ return cidForCbor(data)
115
+ }
116
+
117
+ export const isValidMstKey = (str: string): boolean => {
118
+ const split = str.split('/')
119
+ return (
120
+ str.length <= 256 &&
121
+ split.length === 2 &&
122
+ split[0].length > 0 &&
123
+ split[1].length > 0 &&
124
+ isValidChars(split[0]) &&
125
+ isValidChars(split[1])
126
+ )
127
+ }
128
+
129
+ export const validCharsRegex = /^[a-zA-Z0-9_\-:.]*$/
130
+
131
+ export const isValidChars = (str: string): boolean => {
132
+ return str.match(validCharsRegex) !== null
133
+ }
134
+
135
+ export const ensureValidMstKey = (str: string) => {
136
+ if (!isValidMstKey(str)) {
137
+ throw new InvalidMstKeyError(str)
138
+ }
139
+ }
140
+
141
+ export class InvalidMstKeyError extends Error {
142
+ constructor(public key: string) {
143
+ super(`Not a valid MST key: ${key}`)
144
+ }
122
145
  }