@atproto/repo 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/bench/mst.bench.ts +7 -4
  2. package/bench/repo.bench.ts +25 -16
  3. package/dist/block-map.d.ts +25 -0
  4. package/dist/data-diff.d.ts +36 -0
  5. package/dist/error.d.ts +20 -0
  6. package/dist/index.d.ts +3 -1
  7. package/dist/index.js +11605 -10399
  8. package/dist/index.js.map +4 -4
  9. package/dist/mst/diff.d.ts +4 -33
  10. package/dist/mst/mst.d.ts +68 -25
  11. package/dist/mst/util.d.ts +13 -5
  12. package/dist/parse.d.ts +16 -0
  13. package/dist/readable-repo.d.ts +22 -0
  14. package/dist/repo.d.ts +14 -30
  15. package/dist/storage/index.d.ts +4 -0
  16. package/dist/storage/memory-blockstore.d.ts +28 -0
  17. package/dist/storage/readable-blockstore.d.ts +24 -0
  18. package/dist/storage/repo-storage.d.ts +18 -0
  19. package/dist/storage/sync-storage.d.ts +15 -0
  20. package/dist/storage/types.d.ts +3 -0
  21. package/dist/sync/consumer.d.ts +18 -0
  22. package/dist/sync/index.d.ts +2 -0
  23. package/dist/sync/provider.d.ts +9 -0
  24. package/dist/types.d.ts +124 -317
  25. package/dist/util.d.ts +31 -12
  26. package/dist/verify.d.ts +26 -4
  27. package/package.json +4 -2
  28. package/src/block-map.ts +95 -0
  29. package/src/cid-set.ts +1 -2
  30. package/src/data-diff.ts +121 -0
  31. package/src/error.ts +31 -0
  32. package/src/index.ts +3 -1
  33. package/src/mst/diff.ts +120 -90
  34. package/src/mst/mst.ts +185 -184
  35. package/src/mst/util.ts +54 -31
  36. package/src/parse.ts +44 -0
  37. package/src/readable-repo.ts +75 -0
  38. package/src/repo.ts +119 -249
  39. package/src/storage/index.ts +4 -0
  40. package/src/storage/memory-blockstore.ts +114 -0
  41. package/src/storage/readable-blockstore.ts +56 -0
  42. package/src/storage/repo-storage.ts +42 -0
  43. package/src/storage/sync-storage.ts +35 -0
  44. package/src/storage/types.ts +3 -0
  45. package/src/sync/consumer.ts +137 -0
  46. package/src/sync/index.ts +2 -0
  47. package/src/sync/provider.ts +91 -0
  48. package/src/types.ts +101 -62
  49. package/src/util.ts +237 -56
  50. package/src/verify.ts +207 -42
  51. package/tests/_util.ts +132 -97
  52. package/tests/mst.test.ts +269 -122
  53. package/tests/repo.test.ts +48 -50
  54. package/tests/sync/checkout.test.ts +57 -0
  55. package/tests/sync/diff.test.ts +87 -0
  56. package/tests/sync/narrow.test.ts +145 -0
  57. package/tsconfig.build.tsbuildinfo +1 -1
  58. package/tsconfig.json +2 -1
  59. package/src/blockstore/index.ts +0 -2
  60. package/src/blockstore/ipld-store.ts +0 -103
  61. package/src/blockstore/memory-blockstore.ts +0 -49
  62. package/src/sync.ts +0 -38
  63. package/tests/sync.test.ts +0 -129
package/src/mst/mst.ts CHANGED
@@ -1,13 +1,15 @@
1
1
  import z from 'zod'
2
2
  import { CID } from 'multiformats'
3
3
 
4
- import IpldStore from '../blockstore/ipld-store'
5
- import { def, cidForData } from '@atproto/common'
6
- import { DataDiff } from './diff'
4
+ import { ReadableBlockstore } from '../storage'
5
+ import { schema as common, cidForCbor } from '@atproto/common'
7
6
  import { DataStore } from '../types'
8
7
  import { BlockWriter } from '@ipld/car/api'
9
8
  import * as util from './util'
10
- import MstWalker from './walker'
9
+ import BlockMap from '../block-map'
10
+ import CidSet from '../cid-set'
11
+ import { MissingBlockError, MissingBlocksError } from '../error'
12
+ import * as parse from '../parse'
11
13
 
12
14
  /**
13
15
  * This is an implementation of a Merkle Search Tree (MST)
@@ -19,6 +21,9 @@ import MstWalker from './walker'
19
21
  * This is a merkle tree, so each subtree is referred to by it's hash (CID).
20
22
  * When a leaf is changed, ever tree on the path to that leaf is changed as well,
21
23
  * thereby updating the root hash.
24
+ *
25
+ * For atproto, we use SHA-256 as the key hashing algorithm, and ~4 fanout
26
+ * (2-bits of zero per layer).
22
27
  */
23
28
 
24
29
  /**
@@ -39,74 +44,78 @@ import MstWalker from './walker'
39
44
  * Then the first will be described as `prefix: 0, key: 'bsky/posts/abcdefg'`,
40
45
  * and the second will be described as `prefix: 16, key: 'hi'.`
41
46
  */
42
- const subTreePointer = z.nullable(def.cid)
47
+ const subTreePointer = z.nullable(common.cid)
43
48
  const treeEntry = z.object({
44
- p: z.number(), // prefix count of utf-8 chars that this key shares with the prev key
45
- k: z.string(), // the rest of the key outside the shared prefix
46
- v: def.cid, // value
49
+ p: z.number(), // prefix count of ascii chars that this key shares with the prev key
50
+ k: common.bytes, // the rest of the key outside the shared prefix
51
+ v: common.cid, // value
47
52
  t: subTreePointer, // next subtree (to the right of leaf)
48
53
  })
49
- export const nodeDataDef = z.object({
54
+ const nodeData = z.object({
50
55
  l: subTreePointer, // left-most subtree
51
56
  e: z.array(treeEntry), //entries
52
57
  })
53
- export type NodeData = z.infer<typeof nodeDataDef>
58
+ export type NodeData = z.infer<typeof nodeData>
59
+
60
+ export const nodeDataDef = {
61
+ name: 'mst node',
62
+ schema: nodeData,
63
+ }
54
64
 
55
65
  export type NodeEntry = MST | Leaf
56
66
 
57
- const DEFAULT_MST_FANOUT = 16
58
- export type Fanout = 2 | 8 | 16 | 32 | 64
59
67
  export type MstOpts = {
60
68
  layer: number
61
- fanout: Fanout
62
69
  }
63
70
 
64
71
  export class MST implements DataStore {
65
- blockstore: IpldStore
66
- fanout: Fanout
72
+ storage: ReadableBlockstore
67
73
  entries: NodeEntry[] | null
68
74
  layer: number | null
69
75
  pointer: CID
70
76
  outdatedPointer = false
71
77
 
72
78
  constructor(
73
- blockstore: IpldStore,
74
- fanout: Fanout,
79
+ storage: ReadableBlockstore,
75
80
  pointer: CID,
76
81
  entries: NodeEntry[] | null,
77
82
  layer: number | null,
78
83
  ) {
79
- this.blockstore = blockstore
80
- this.fanout = fanout
84
+ this.storage = storage
81
85
  this.entries = entries
82
86
  this.layer = layer
83
87
  this.pointer = pointer
84
88
  }
85
89
 
86
90
  static async create(
87
- blockstore: IpldStore,
91
+ storage: ReadableBlockstore,
88
92
  entries: NodeEntry[] = [],
89
93
  opts?: Partial<MstOpts>,
90
94
  ): Promise<MST> {
91
95
  const pointer = await util.cidForEntries(entries)
92
- const { layer = 0, fanout = DEFAULT_MST_FANOUT } = opts || {}
93
- return new MST(blockstore, fanout, pointer, entries, layer)
96
+ const { layer = null } = opts || {}
97
+ return new MST(storage, pointer, entries, layer)
94
98
  }
95
99
 
96
100
  static async fromData(
97
- blockstore: IpldStore,
101
+ storage: ReadableBlockstore,
98
102
  data: NodeData,
99
103
  opts?: Partial<MstOpts>,
100
104
  ): Promise<MST> {
101
- const { layer = null, fanout = DEFAULT_MST_FANOUT } = opts || {}
102
- const entries = await util.deserializeNodeData(blockstore, data, opts)
103
- const pointer = await cidForData(data)
104
- return new MST(blockstore, fanout, pointer, entries, layer)
105
+ const { layer = null } = opts || {}
106
+ const entries = await util.deserializeNodeData(storage, data, opts)
107
+ const pointer = await cidForCbor(data)
108
+ return new MST(storage, pointer, entries, layer)
105
109
  }
106
110
 
107
- static load(blockstore: IpldStore, cid: CID, opts?: Partial<MstOpts>): MST {
108
- const { layer = null, fanout = DEFAULT_MST_FANOUT } = opts || {}
109
- return new MST(blockstore, fanout, cid, null, layer)
111
+ // this is really a *lazy* load, doesn't actually touch storage
112
+ static load(
113
+ storage: ReadableBlockstore,
114
+ cid: CID,
115
+ opts?: Partial<MstOpts>,
116
+ ): MST {
117
+ const { layer = null } = opts || {}
118
+ return new MST(storage, cid, null, layer)
110
119
  }
111
120
 
112
121
  // Immutability
@@ -114,13 +123,7 @@ export class MST implements DataStore {
114
123
 
115
124
  // We never mutate an MST, we just return a new MST with updated values
116
125
  async newTree(entries: NodeEntry[]): Promise<MST> {
117
- const mst = new MST(
118
- this.blockstore,
119
- this.fanout,
120
- this.pointer,
121
- entries,
122
- this.layer,
123
- )
126
+ const mst = new MST(this.storage, this.pointer, entries, this.layer)
124
127
  mst.outdatedPointer = true
125
128
  return mst
126
129
  }
@@ -132,15 +135,14 @@ export class MST implements DataStore {
132
135
  async getEntries(): Promise<NodeEntry[]> {
133
136
  if (this.entries) return [...this.entries]
134
137
  if (this.pointer) {
135
- const data = await this.blockstore.get(this.pointer, nodeDataDef)
138
+ const data = await this.storage.readObj(this.pointer, nodeDataDef)
136
139
  const firstLeaf = data.e[0]
137
140
  const layer =
138
141
  firstLeaf !== undefined
139
- ? await util.leadingZerosOnHash(firstLeaf.k, this.fanout)
142
+ ? await util.leadingZerosOnHash(firstLeaf.k)
140
143
  : undefined
141
- this.entries = await util.deserializeNodeData(this.blockstore, data, {
144
+ this.entries = await util.deserializeNodeData(this.storage, data, {
142
145
  layer,
143
- fanout: this.fanout,
144
146
  })
145
147
 
146
148
  return this.entries
@@ -178,7 +180,7 @@ export class MST implements DataStore {
178
180
  async attemptGetLayer(): Promise<number | null> {
179
181
  if (this.layer !== null) return this.layer
180
182
  const entries = await this.getEntries()
181
- let layer = await util.layerForEntries(entries, this.fanout)
183
+ let layer = await util.layerForEntries(entries)
182
184
  if (layer === null) {
183
185
  for (const entry of entries) {
184
186
  if (entry.isTree()) {
@@ -197,39 +199,29 @@ export class MST implements DataStore {
197
199
  // Core functionality
198
200
  // -------------------
199
201
 
200
- // Persist the MST to the blockstore
201
- // If the topmost tree only has one entry and it's a subtree, we can eliminate the topmost tree
202
- // However, lower trees with only one entry must be preserved
203
- async stage(): Promise<CID> {
204
- return this.stageRecurse(true)
205
- }
206
-
207
- async stageRecurse(trimTop = false): Promise<CID> {
202
+ // Return the necessary blocks to persist the MST to repo storage
203
+ async getUnstoredBlocks(): Promise<{ root: CID; blocks: BlockMap }> {
204
+ const blocks = new BlockMap()
208
205
  const pointer = await this.getPointer()
209
- const alreadyHas = await this.blockstore.has(pointer)
210
- if (alreadyHas) return pointer
206
+ const alreadyHas = await this.storage.has(pointer)
207
+ if (alreadyHas) return { root: pointer, blocks }
211
208
  const entries = await this.getEntries()
212
- if (entries.length === 1 && trimTop) {
213
- const node = entries[0]
214
- if (node.isTree()) {
215
- return node.stageRecurse(true)
216
- }
217
- }
218
209
  const data = util.serializeNodeData(entries)
219
- await this.blockstore.stage(data)
210
+ await blocks.add(data)
220
211
  for (const entry of entries) {
221
212
  if (entry.isTree()) {
222
- await entry.stageRecurse(false)
213
+ const subtree = await entry.getUnstoredBlocks()
214
+ blocks.addMap(subtree.blocks)
223
215
  }
224
216
  }
225
- return pointer
217
+ return { root: pointer, blocks: blocks }
226
218
  }
227
219
 
228
220
  // Adds a new leaf for the given key/value pair
229
221
  // Throws if a leaf with that key already exists
230
222
  async add(key: string, value: CID, knownZeros?: number): Promise<MST> {
231
- const keyZeros =
232
- knownZeros ?? (await util.leadingZerosOnHash(key, this.fanout))
223
+ util.ensureValidMstKey(key)
224
+ const keyZeros = knownZeros ?? (await util.leadingZerosOnHash(key))
233
225
  const layer = await this.getLayer()
234
226
  const newLeaf = new Leaf(key, value)
235
227
  if (keyZeros === layer) {
@@ -288,9 +280,8 @@ export class MST implements DataStore {
288
280
  if (left) updated.push(left)
289
281
  updated.push(new Leaf(key, value))
290
282
  if (right) updated.push(right)
291
- const newRoot = await MST.create(this.blockstore, updated, {
283
+ const newRoot = await MST.create(this.storage, updated, {
292
284
  layer: keyZeros,
293
- fanout: this.fanout,
294
285
  })
295
286
  newRoot.outdatedPointer = true
296
287
  return newRoot
@@ -314,6 +305,7 @@ export class MST implements DataStore {
314
305
  // Edits the value at the given key
315
306
  // Throws if the given key does not exist
316
307
  async update(key: string, value: CID): Promise<MST> {
308
+ util.ensureValidMstKey(key)
317
309
  const index = await this.findGtOrEqualLeafIndex(key)
318
310
  const found = await this.atIndex(index)
319
311
  if (found && found.isLeaf() && found.key === key) {
@@ -329,6 +321,11 @@ export class MST implements DataStore {
329
321
 
330
322
  // Deletes the value at the given key
331
323
  async delete(key: string): Promise<MST> {
324
+ const altered = await this.deleteRecurse(key)
325
+ return altered.trimTop()
326
+ }
327
+
328
+ async deleteRecurse(key: string): Promise<MST> {
332
329
  const index = await this.findGtOrEqualLeafIndex(key)
333
330
  const found = await this.atIndex(index)
334
331
  // if found, remove it on this level
@@ -349,7 +346,7 @@ export class MST implements DataStore {
349
346
  // else recurse down to find it
350
347
  const prev = await this.atIndex(index - 1)
351
348
  if (prev?.isTree()) {
352
- const subtree = await prev.delete(key)
349
+ const subtree = await prev.deleteRecurse(key)
353
350
  const subTreeEntries = await subtree.getEntries()
354
351
  if (subTreeEntries.length === 0) {
355
352
  return this.removeEntry(index - 1)
@@ -361,114 +358,6 @@ export class MST implements DataStore {
361
358
  }
362
359
  }
363
360
 
364
- // Walk two MSTs to find the semantic changes
365
- async diff(other: MST): Promise<DataDiff> {
366
- await this.getPointer()
367
- await other.getPointer()
368
- const diff = new DataDiff()
369
-
370
- const leftWalker = new MstWalker(this)
371
- const rightWalker = new MstWalker(other)
372
- while (!leftWalker.status.done || !rightWalker.status.done) {
373
- // if one walker is finished, continue walking the other & logging all nodes
374
- if (leftWalker.status.done && !rightWalker.status.done) {
375
- const node = rightWalker.status.curr
376
- if (node.isLeaf()) {
377
- diff.recordAdd(node.key, node.value)
378
- } else {
379
- diff.recordNewCid(node.pointer)
380
- }
381
- await rightWalker.advance()
382
- continue
383
- } else if (!leftWalker.status.done && rightWalker.status.done) {
384
- const node = leftWalker.status.curr
385
- if (node.isLeaf()) {
386
- diff.recordDelete(node.key, node.value)
387
- }
388
- await leftWalker.advance()
389
- continue
390
- }
391
- if (leftWalker.status.done || rightWalker.status.done) break
392
- const left = leftWalker.status.curr
393
- const right = rightWalker.status.curr
394
- if (left === null || right === null) break
395
-
396
- // if both pointers are leaves, record an update & advance both or record the lowest key and advance that pointer
397
- if (left.isLeaf() && right.isLeaf()) {
398
- if (left.key === right.key) {
399
- if (!left.value.equals(right.value)) {
400
- diff.recordUpdate(left.key, left.value, right.value)
401
- }
402
- await leftWalker.advance()
403
- await rightWalker.advance()
404
- } else if (left.key < right.key) {
405
- diff.recordDelete(left.key, left.value)
406
- await leftWalker.advance()
407
- } else {
408
- diff.recordAdd(right.key, right.value)
409
- await rightWalker.advance()
410
- }
411
- continue
412
- }
413
-
414
- // next, ensure that we're on the same layer
415
- // if one walker is at a higher layer than the other, we need to do one of two things
416
- // if the higher walker is pointed at a tree, step into that tree to try to catch up with the lower
417
- // if the higher walker is pointed at a leaf, then advance the lower walker to try to catch up the higher
418
- if (leftWalker.layer() > rightWalker.layer()) {
419
- if (left.isLeaf()) {
420
- if (right.isLeaf()) {
421
- diff.recordAdd(right.key, right.value)
422
- } else {
423
- diff.recordNewCid(right.pointer)
424
- }
425
- await rightWalker.advance()
426
- } else {
427
- await leftWalker.stepInto()
428
- }
429
- continue
430
- } else if (leftWalker.layer() < rightWalker.layer()) {
431
- if (right.isLeaf()) {
432
- if (left.isLeaf()) {
433
- diff.recordDelete(left.key, left.value)
434
- }
435
- await leftWalker.advance()
436
- } else {
437
- diff.recordNewCid(right.pointer)
438
- await rightWalker.stepInto()
439
- }
440
- continue
441
- }
442
-
443
- // if we're on the same level, and both pointers are trees, do a comparison
444
- // if they're the same, step over. if they're different, step in to find the subdiff
445
- if (left.isTree() && right.isTree()) {
446
- if (left.pointer.equals(right.pointer)) {
447
- await leftWalker.stepOver()
448
- await rightWalker.stepOver()
449
- } else {
450
- diff.recordNewCid(right.pointer)
451
- await leftWalker.stepInto()
452
- await rightWalker.stepInto()
453
- }
454
- continue
455
- }
456
-
457
- // finally, if one pointer is a tree and the other is a leaf, simply step into the tree
458
- if (left.isLeaf() && right.isTree()) {
459
- await diff.recordNewCid(right.pointer)
460
- await rightWalker.stepInto()
461
- continue
462
- } else if (left.isTree() && right.isLeaf()) {
463
- await leftWalker.stepInto()
464
- continue
465
- }
466
-
467
- throw new Error('Unidentifiable case in diff walk')
468
- }
469
- return diff
470
- }
471
-
472
361
  // Simple Operations
473
362
  // -------------------
474
363
 
@@ -543,6 +432,16 @@ export class MST implements DataStore {
543
432
  return this.newTree(update)
544
433
  }
545
434
 
435
+ // if the topmost node in the tree only points to another tree, trim the top and return the subtree
436
+ async trimTop(): Promise<MST> {
437
+ const entries = await this.getEntries()
438
+ if (entries.length === 1 && entries[0].isTree()) {
439
+ return entries[0].trimTop()
440
+ } else {
441
+ return this
442
+ }
443
+ }
444
+
546
445
  // Subtree & Splits
547
446
  // -------------------
548
447
 
@@ -604,17 +503,15 @@ export class MST implements DataStore {
604
503
 
605
504
  async createChild(): Promise<MST> {
606
505
  const layer = await this.getLayer()
607
- return MST.create(this.blockstore, [], {
506
+ return MST.create(this.storage, [], {
608
507
  layer: layer - 1,
609
- fanout: this.fanout,
610
508
  })
611
509
  }
612
510
 
613
511
  async createParent(): Promise<MST> {
614
512
  const layer = await this.getLayer()
615
- const parent = await MST.create(this.blockstore, [this], {
513
+ const parent = await MST.create(this.storage, [this], {
616
514
  layer: layer + 1,
617
- fanout: this.fanout,
618
515
  })
619
516
  parent.outdatedPointer = true
620
517
  return parent
@@ -660,7 +557,11 @@ export class MST implements DataStore {
660
557
  }
661
558
  }
662
559
 
663
- async list(count: number, after?: string, before?: string): Promise<Leaf[]> {
560
+ async list(
561
+ count = Number.MAX_SAFE_INTEGER,
562
+ after?: string,
563
+ before?: string,
564
+ ): Promise<Leaf[]> {
664
565
  const vals: Leaf[] = []
665
566
  for await (const leaf of this.walkLeavesFrom(after || '')) {
666
567
  if (leaf.key === after) continue
@@ -726,6 +627,22 @@ export class MST implements DataStore {
726
627
  return nodes
727
628
  }
728
629
 
630
+ // Walks tree & returns all cids
631
+ async allCids(): Promise<CidSet> {
632
+ const cids = new CidSet()
633
+ const entries = await this.getEntries()
634
+ for (const entry of entries) {
635
+ if (entry.isLeaf()) {
636
+ cids.add(entry.value)
637
+ } else {
638
+ const subtreeCids = await entry.allCids()
639
+ cids.addSet(subtreeCids)
640
+ }
641
+ }
642
+ cids.add(await this.getPointer())
643
+ return cids
644
+ }
645
+
729
646
  // Walks tree & returns all leaves
730
647
  async leaves() {
731
648
  const leaves: Leaf[] = []
@@ -741,17 +658,101 @@ export class MST implements DataStore {
741
658
  return leaves.length
742
659
  }
743
660
 
661
+ // Reachable tree traversal
662
+ // -------------------
663
+
664
+ // Walk reachable branches of tree & emit nodes, consumer can bail at any point by returning false
665
+ async *walkReachable(): AsyncIterable<NodeEntry> {
666
+ yield this
667
+ const entries = await this.getEntries()
668
+ for (const entry of entries) {
669
+ if (entry.isTree()) {
670
+ try {
671
+ for await (const e of entry.walkReachable()) {
672
+ yield e
673
+ }
674
+ } catch (err) {
675
+ if (err instanceof MissingBlockError) {
676
+ continue
677
+ } else {
678
+ throw err
679
+ }
680
+ }
681
+ } else {
682
+ yield entry
683
+ }
684
+ }
685
+ }
686
+
687
+ async reachableLeaves(): Promise<Leaf[]> {
688
+ const leaves: Leaf[] = []
689
+ for await (const entry of this.walkReachable()) {
690
+ if (entry.isLeaf()) leaves.push(entry)
691
+ }
692
+ return leaves
693
+ }
694
+
744
695
  // Sync Protocol
745
696
 
746
697
  async writeToCarStream(car: BlockWriter): Promise<void> {
747
- for await (const entry of this.walk()) {
748
- if (entry.isTree()) {
749
- const pointer = await entry.getPointer()
750
- await this.blockstore.addToCar(car, pointer)
698
+ const entries = await this.getEntries()
699
+ const leaves = new CidSet()
700
+ let toFetch = new CidSet()
701
+ toFetch.add(await this.getPointer())
702
+ for (const entry of entries) {
703
+ if (entry.isLeaf()) {
704
+ leaves.add(entry.value)
751
705
  } else {
752
- await this.blockstore.addToCar(car, entry.value)
706
+ toFetch.add(await entry.getPointer())
707
+ }
708
+ }
709
+ while (toFetch.size() > 0) {
710
+ const nextLayer = new CidSet()
711
+ const fetched = await this.storage.getBlocks(toFetch.toList())
712
+ if (fetched.missing.length > 0) {
713
+ throw new MissingBlocksError('mst node', fetched.missing)
714
+ }
715
+ for (const cid of toFetch.toList()) {
716
+ const found = await parse.getAndParseByDef(
717
+ fetched.blocks,
718
+ cid,
719
+ nodeDataDef,
720
+ )
721
+ await car.put({ cid, bytes: found.bytes })
722
+ const entries = await util.deserializeNodeData(this.storage, found.obj)
723
+
724
+ for (const entry of entries) {
725
+ if (entry.isLeaf()) {
726
+ leaves.add(entry.value)
727
+ } else {
728
+ nextLayer.add(await entry.getPointer())
729
+ }
730
+ }
753
731
  }
732
+ toFetch = nextLayer
733
+ }
734
+ const leafData = await this.storage.getBlocks(leaves.toList())
735
+ if (leafData.missing.length > 0) {
736
+ throw new MissingBlocksError('mst leaf', leafData.missing)
737
+ }
738
+
739
+ for (const leaf of leafData.blocks.entries()) {
740
+ await car.put(leaf)
741
+ }
742
+ }
743
+
744
+ async cidsForPath(key: string): Promise<CID[]> {
745
+ const cids: CID[] = [await this.getPointer()]
746
+ const index = await this.findGtOrEqualLeafIndex(key)
747
+ const found = await this.atIndex(index)
748
+ if (found && found.isLeaf() && found.key === key) {
749
+ return [...cids, found.value]
750
+ }
751
+ const prev = await this.atIndex(index - 1)
752
+ if (prev && prev.isTree()) {
753
+ return [...cids, ...(await prev.cidsForPath(key))]
754
754
  }
755
+ return cids
755
756
  }
756
757
 
757
758
  // Matching Leaf interface
package/src/mst/util.ts CHANGED
@@ -1,68 +1,60 @@
1
1
  import { CID } from 'multiformats'
2
2
  import * as uint8arrays from 'uint8arrays'
3
- import IpldStore from '../blockstore/ipld-store'
3
+ import { ReadableBlockstore } from '../storage'
4
4
  import { sha256 } from '@atproto/crypto'
5
- import { MST, Leaf, NodeEntry, NodeData, MstOpts, Fanout } from './mst'
6
- import { cidForData } from '@atproto/common'
5
+ import { MST, Leaf, NodeEntry, NodeData, MstOpts } from './mst'
6
+ import { cidForCbor } from '@atproto/common'
7
7
 
8
- type SupportedBases = 'base2' | 'base8' | 'base16' | 'base32' | 'base64'
9
-
10
- export const leadingZerosOnHash = async (
11
- key: string,
12
- fanout: Fanout,
13
- ): Promise<number> => {
14
- if ([2, 8, 16, 32, 64].indexOf(fanout) < 0) {
15
- throw new Error(`Not a valid fanout: ${fanout}`)
16
- }
17
- const base: SupportedBases = `base${fanout}`
18
- const zeroChar = uint8arrays.toString(new Uint8Array(1), base)[0]
8
+ export const leadingZerosOnHash = async (key: string | Uint8Array) => {
19
9
  const hash = await sha256(key)
20
- const encoded = uint8arrays.toString(hash, base)
21
- let count = 0
22
- for (const char of encoded) {
23
- if (char === zeroChar) {
24
- count++
10
+ let leadingZeros = 0
11
+ for (let i = 0; i < hash.length; i++) {
12
+ const byte = hash[i]
13
+ if (byte < 64) leadingZeros++
14
+ if (byte < 16) leadingZeros++
15
+ if (byte < 4) leadingZeros++
16
+ if (byte === 0) {
17
+ leadingZeros++
25
18
  } else {
26
19
  break
27
20
  }
28
21
  }
29
- return count
22
+ return leadingZeros
30
23
  }
31
24
 
32
25
  export const layerForEntries = async (
33
26
  entries: NodeEntry[],
34
- fanout: Fanout,
35
27
  ): Promise<number | null> => {
36
28
  const firstLeaf = entries.find((entry) => entry.isLeaf())
37
29
  if (!firstLeaf || firstLeaf.isTree()) return null
38
- return await leadingZerosOnHash(firstLeaf.key, fanout)
30
+ return await leadingZerosOnHash(firstLeaf.key)
39
31
  }
40
32
 
41
33
  export const deserializeNodeData = async (
42
- blockstore: IpldStore,
34
+ storage: ReadableBlockstore,
43
35
  data: NodeData,
44
36
  opts?: Partial<MstOpts>,
45
37
  ): Promise<NodeEntry[]> => {
46
- const { layer, fanout } = opts || {}
38
+ const { layer } = opts || {}
47
39
  const entries: NodeEntry[] = []
48
40
  if (data.l !== null) {
49
41
  entries.push(
50
- await MST.load(blockstore, data.l, {
42
+ await MST.load(storage, data.l, {
51
43
  layer: layer ? layer - 1 : undefined,
52
- fanout,
53
44
  }),
54
45
  )
55
46
  }
56
47
  let lastKey = ''
57
48
  for (const entry of data.e) {
58
- const key = lastKey.slice(0, entry.p) + entry.k
49
+ const keyStr = uint8arrays.toString(entry.k, 'ascii')
50
+ const key = lastKey.slice(0, entry.p) + keyStr
51
+ ensureValidMstKey(key)
59
52
  entries.push(new Leaf(key, entry.v))
60
53
  lastKey = key
61
54
  if (entry.t !== null) {
62
55
  entries.push(
63
- await MST.load(blockstore, entry.t, {
56
+ await MST.load(storage, entry.t, {
64
57
  layer: layer ? layer - 1 : undefined,
65
- fanout,
66
58
  }),
67
59
  )
68
60
  }
@@ -93,10 +85,11 @@ export const serializeNodeData = (entries: NodeEntry[]): NodeData => {
93
85
  subtree = next.pointer
94
86
  i++
95
87
  }
88
+ ensureValidMstKey(leaf.key)
96
89
  const prefixLen = countPrefixLen(lastKey, leaf.key)
97
90
  data.e.push({
98
91
  p: prefixLen,
99
- k: leaf.key.slice(prefixLen),
92
+ k: uint8arrays.fromString(leaf.key.slice(prefixLen), 'ascii'),
100
93
  v: leaf.value,
101
94
  t: subtree,
102
95
  })
@@ -118,5 +111,35 @@ export const countPrefixLen = (a: string, b: string): number => {
118
111
 
119
112
  export const cidForEntries = async (entries: NodeEntry[]): Promise<CID> => {
120
113
  const data = serializeNodeData(entries)
121
- return cidForData(data)
114
+ return cidForCbor(data)
115
+ }
116
+
117
+ export const isValidMstKey = (str: string): boolean => {
118
+ const split = str.split('/')
119
+ return (
120
+ str.length <= 256 &&
121
+ split.length === 2 &&
122
+ split[0].length > 0 &&
123
+ split[1].length > 0 &&
124
+ isValidChars(split[0]) &&
125
+ isValidChars(split[1])
126
+ )
127
+ }
128
+
129
+ export const validCharsRegex = /^[a-zA-Z0-9_\-:.]*$/
130
+
131
+ export const isValidChars = (str: string): boolean => {
132
+ return str.match(validCharsRegex) !== null
133
+ }
134
+
135
+ export const ensureValidMstKey = (str: string) => {
136
+ if (!isValidMstKey(str)) {
137
+ throw new InvalidMstKeyError(str)
138
+ }
139
+ }
140
+
141
+ export class InvalidMstKeyError extends Error {
142
+ constructor(public key: string) {
143
+ super(`Not a valid MST key: ${key}`)
144
+ }
122
145
  }