@helia/unixfs 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,32 +2,31 @@ import * as dagPB from '@ipld/dag-pb'
2
2
  import { CID, Version } from 'multiformats/cid'
3
3
  import { logger } from '@libp2p/logger'
4
4
  import { UnixFS } from 'ipfs-unixfs'
5
- import { DirSharded } from './dir-sharded.js'
6
5
  import {
7
- updateHamtDirectory,
8
- recreateHamtLevel,
9
- recreateInitialHamtLevel,
10
6
  createShard,
7
+ recreateShardedDirectory,
11
8
  toPrefix,
12
- addLinksToHamtBucket
9
+ updateShardedDirectory
13
10
  } from './hamt-utils.js'
14
- import last from 'it-last'
15
11
  import type { PBNode, PBLink } from '@ipld/dag-pb/interface'
16
12
  import { sha256 } from 'multiformats/hashes/sha2'
17
- import type { Bucket } from 'hamt-sharding'
18
13
  import { AlreadyExistsError, InvalidParametersError, InvalidPBNodeError } from './errors.js'
19
14
  import type { ImportResult } from 'ipfs-unixfs-importer'
20
15
  import type { AbortOptions } from '@libp2p/interfaces'
21
16
  import type { Directory } from './cid-to-directory.js'
22
17
  import type { Blockstore } from 'interface-blockstore'
23
18
  import { isOverShardThreshold } from './is-over-shard-threshold.js'
19
+ import { hamtBucketBits, hamtHashFn } from './hamt-constants.js'
20
+ // @ts-expect-error no types
21
+ import SparseArray from 'sparse-array'
22
+ import { wrapHash } from './consumable-hash.js'
23
+ import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string'
24
24
 
25
25
  const log = logger('helia:unixfs:components:utils:add-link')
26
26
 
27
27
  export interface AddLinkResult {
28
28
  node: PBNode
29
29
  cid: CID
30
- size: number
31
30
  }
32
31
 
33
32
  export interface AddLinkOptions extends AbortOptions {
@@ -81,7 +80,7 @@ const convertToShardedDirectory = async (parent: Directory, blockstore: Blocksto
81
80
  cidVersion: parent.cid.version
82
81
  })
83
82
 
84
- log(`Converted directory to sharded directory ${result.cid}`)
83
+ log(`converted directory to sharded directory ${result.cid}`)
85
84
 
86
85
  return result
87
86
  }
@@ -134,187 +133,125 @@ const addToDirectory = async (parent: Directory, child: PBLink, blockstore: Bloc
134
133
 
135
134
  return {
136
135
  node: parent.node,
137
- cid,
138
- size: buf.length
136
+ cid
139
137
  }
140
138
  }
141
139
 
142
140
  const addToShardedDirectory = async (parent: Directory, child: Required<PBLink>, blockstore: Blockstore, options: AddLinkOptions): Promise<AddLinkResult> => {
143
- const {
144
- shard, path
145
- } = await addFileToShardedDirectory(parent, child, blockstore, options)
146
- const result = await last(shard.flush(blockstore))
141
+ const { path, hash } = await recreateShardedDirectory(parent.cid, child.Name, blockstore, options)
142
+ const finalSegment = path[path.length - 1]
147
143
 
148
- if (result == null) {
149
- throw new Error('No result from flushing shard')
144
+ if (finalSegment == null) {
145
+ throw new Error('Invalid HAMT, could not generate path')
150
146
  }
151
147
 
152
- const block = await blockstore.get(result.cid)
153
- const node = dagPB.decode(block)
148
+ // find the next prefix
149
+ // const index = await hash.take(hamtBucketBits)
150
+ const prefix = finalSegment.prefix
151
+ const index = parseInt(prefix, 16)
154
152
 
155
- // we have written out the shard, but only one sub-shard will have been written so replace it in the original shard
156
- const parentLinks = parent.node.Links.filter((link) => {
157
- return (link.Name ?? '').substring(0, 2) !== path[0].prefix
158
- })
159
-
160
- const newLink = node.Links
161
- .find(link => (link.Name ?? '').substring(0, 2) === path[0].prefix)
162
-
163
- if (newLink == null) {
164
- throw new Error(`No link found with prefix ${path[0].prefix}`)
165
- }
166
-
167
- parentLinks.push(newLink)
168
-
169
- return await updateHamtDirectory({
170
- Data: parent.node.Data,
171
- Links: parentLinks
172
- }, blockstore, path[0].bucket, options)
173
- }
153
+ log('next prefix for %s is %s', child.Name, prefix)
174
154
 
175
- const addFileToShardedDirectory = async (parent: Directory, child: Required<PBLink>, blockstore: Blockstore, options: AddLinkOptions): Promise<{ shard: DirSharded, path: BucketPath[] }> => {
176
- if (parent.node.Data == null) {
177
- throw new InvalidPBNodeError('Parent node with no data passed to addFileToShardedDirectory')
178
- }
155
+ const linkName = `${prefix}${child.Name}`
156
+ const existingLink = finalSegment.node.Links.find(l => (l.Name ?? '').startsWith(prefix))
179
157
 
180
- // start at the root bucket and descend, loading nodes as we go
181
- const rootBucket = await recreateInitialHamtLevel(parent.node.Links)
182
- const node = UnixFS.unmarshal(parent.node.Data)
183
-
184
- const shard = new DirSharded({
185
- root: true,
186
- dir: true,
187
- parent: undefined,
188
- parentKey: undefined,
189
- path: '',
190
- dirty: true,
191
- flat: false,
192
- mode: node.mode
193
- }, {
194
- ...options,
195
- cidVersion: parent.cid.version
196
- })
197
- shard._bucket = rootBucket
198
-
199
- if (node.mtime != null) {
200
- // update mtime if previously set
201
- shard.mtime = {
202
- secs: BigInt(Math.round(Date.now() / 1000))
203
- }
204
- }
205
-
206
- // load subshards until the bucket & position no longer changes
207
- const position = await rootBucket._findNewBucketAndPos(child.Name)
208
- const path = toBucketPath(position)
209
- path[0].node = parent.node
210
- let index = 0
211
-
212
- while (index < path.length) {
213
- const segment = path[index]
214
- index++
215
- const node = segment.node
216
-
217
- if (node == null) {
218
- throw new Error('Segment had no node')
219
- }
220
-
221
- const link = node.Links
222
- .find(link => (link.Name ?? '').substring(0, 2) === segment.prefix)
223
-
224
- if (link == null) {
225
- // prefix is new, file will be added to the current bucket
226
- log(`Link ${segment.prefix}${child.Name} will be added`)
227
- index = path.length
228
-
229
- break
230
- }
158
+ if (existingLink != null) {
159
+ log('link %s was present in shard', linkName)
160
+ // link is already present in shard
231
161
 
232
- if (link.Name === `${segment.prefix}${child.Name}`) {
162
+ if (existingLink.Name === linkName) {
163
+ // file with same name is already present in shard
233
164
  if (!options.allowOverwriting) {
234
165
  throw new AlreadyExistsError()
235
166
  }
236
167
 
237
- // file already existed, file will be added to the current bucket
238
- log(`Link ${segment.prefix}${child.Name} will be replaced`)
239
- index = path.length
240
-
241
- break
242
- }
243
-
244
- if ((link.Name ?? '').length > 2) {
245
- // another file had the same prefix, will be replaced with a subshard
246
- log(`Link ${link.Name} ${link.Hash} will be replaced with a subshard`)
247
- index = path.length
248
-
249
- break
250
- }
251
-
252
- // load sub-shard
253
- log(`Found subshard ${segment.prefix}`)
254
- const block = await blockstore.get(link.Hash)
255
- const subShard = dagPB.decode(block)
256
-
257
- // subshard hasn't been loaded, descend to the next level of the HAMT
258
- if (path[index] == null) {
259
- log(`Loaded new subshard ${segment.prefix}`)
260
- await recreateHamtLevel(blockstore, subShard.Links, rootBucket, segment.bucket, parseInt(segment.prefix, 16), options)
261
-
262
- const position = await rootBucket._findNewBucketAndPos(child.Name)
263
-
264
- path.push({
265
- bucket: position.bucket,
266
- prefix: toPrefix(position.pos),
267
- node: subShard
168
+ log('overwriting %s in subshard', child.Name)
169
+ finalSegment.node.Links = finalSegment.node.Links.filter(l => l.Name !== linkName)
170
+ finalSegment.node.Links.push({
171
+ Name: linkName,
172
+ Hash: child.Hash,
173
+ Tsize: child.Tsize
268
174
  })
175
+ } else if (existingLink.Name?.length === 2) {
176
+ throw new Error('Existing link was subshard?!')
177
+ } else {
178
+ // conflict, add a new HAMT segment
179
+ log('prefix %s already exists, creating new subshard', prefix)
180
+ // find the sibling we are going to replace
181
+ const index = finalSegment.node.Links.findIndex(l => l.Name?.startsWith(prefix))
182
+ const sibling = finalSegment.node.Links.splice(index, 1)[0]
183
+
184
+ // give the sibling a new HAMT prefix
185
+ const siblingName = (sibling.Name ?? '').substring(2)
186
+ const wrapped = wrapHash(hamtHashFn)
187
+ const siblingHash = wrapped(uint8ArrayFromString(siblingName))
188
+
189
+ // discard hash bits until we reach the subshard depth
190
+ for (let i = 0; i < path.length; i++) {
191
+ await siblingHash.take(hamtBucketBits)
192
+ }
269
193
 
270
- break
194
+ while (true) {
195
+ const siblingIndex = await siblingHash.take(hamtBucketBits)
196
+ const siblingPrefix = toPrefix(siblingIndex)
197
+ sibling.Name = `${siblingPrefix}${siblingName}`
198
+
199
+ // calculate the target file's HAMT prefix in the new sub-shard
200
+ const newIndex = await hash.take(hamtBucketBits)
201
+ const newPrefix = toPrefix(newIndex)
202
+
203
+ if (siblingPrefix === newPrefix) {
204
+ // the two sibling names have caused another conflict - add an intermediate node to
205
+ // the HAMT and try again
206
+
207
+ // create the child locations
208
+ const children = new SparseArray()
209
+ children.set(newIndex, true)
210
+
211
+ path.push({
212
+ prefix: newPrefix,
213
+ children,
214
+ node: {
215
+ Links: []
216
+ }
217
+ })
218
+
219
+ continue
220
+ }
221
+
222
+ // create the child locations
223
+ const children = new SparseArray()
224
+ children.set(newIndex, true)
225
+ children.set(siblingIndex, true)
226
+
227
+ // add our new segment
228
+ path.push({
229
+ prefix,
230
+ children,
231
+ node: {
232
+ Links: [
233
+ sibling, {
234
+ Name: `${newPrefix}${child.Name}`,
235
+ Hash: child.Hash,
236
+ Tsize: child.Tsize
237
+ }
238
+ ]
239
+ }
240
+ })
241
+
242
+ break
243
+ }
271
244
  }
245
+ } else {
246
+ log('link %s was not present in sub-shard', linkName)
272
247
 
273
- const nextSegment = path[index]
274
-
275
- // add next levels worth of links to bucket
276
- await addLinksToHamtBucket(blockstore, subShard.Links, nextSegment.bucket, rootBucket, options)
277
-
278
- nextSegment.node = subShard
279
- }
280
-
281
- // finally add the new file into the shard
282
- await shard._bucket.put(child.Name, {
283
- size: BigInt(child.Tsize),
284
- cid: child.Hash
285
- })
286
-
287
- return {
288
- shard, path
289
- }
290
- }
291
-
292
- export interface BucketPath {
293
- bucket: Bucket<any>
294
- prefix: string
295
- node?: PBNode
296
- }
297
-
298
- const toBucketPath = (position: { pos: number, bucket: Bucket<any> }): BucketPath[] => {
299
- const path = [{
300
- bucket: position.bucket,
301
- prefix: toPrefix(position.pos)
302
- }]
303
-
304
- let bucket = position.bucket._parent
305
- let positionInBucket = position.bucket._posAtParent
306
-
307
- while (bucket != null) {
308
- path.push({
309
- bucket,
310
- prefix: toPrefix(positionInBucket)
311
- })
248
+ // add new link to shard
249
+ child.Name = linkName
250
+ finalSegment.node.Links.push(child)
251
+ finalSegment.children.set(index, true)
312
252
 
313
- positionInBucket = bucket._posAtParent
314
- bucket = bucket._parent
253
+ log('adding %s to existing sub-shard', linkName)
315
254
  }
316
255
 
317
- path.reverse()
318
-
319
- return path
256
+ return await updateShardedDirectory(path, blockstore, options)
320
257
  }
@@ -0,0 +1,174 @@
1
+ import { concat as uint8ArrayConcat } from 'uint8arrays/concat'
2
+
3
+ export function wrapHash (hashFn: (value: Uint8Array) => Promise<Uint8Array>): (value: InfiniteHash | Uint8Array) => InfiniteHash {
4
+ function hashing (value: InfiniteHash | Uint8Array): InfiniteHash {
5
+ if (value instanceof InfiniteHash) {
6
+ // already a hash. return it
7
+ return value
8
+ } else {
9
+ return new InfiniteHash(value, hashFn)
10
+ }
11
+ }
12
+
13
+ return hashing
14
+ }
15
+
16
+ export class InfiniteHash {
17
+ _value: Uint8Array
18
+ _hashFn: (value: Uint8Array) => Promise<Uint8Array>
19
+ _depth: number
20
+ _availableBits: number
21
+ _currentBufferIndex: number
22
+ _buffers: ConsumableBuffer[]
23
+
24
+ constructor (value: Uint8Array, hashFn: (value: Uint8Array) => Promise<Uint8Array>) {
25
+ if (!(value instanceof Uint8Array)) {
26
+ throw new Error('can only hash Uint8Arrays')
27
+ }
28
+
29
+ this._value = value
30
+ this._hashFn = hashFn
31
+ this._depth = -1
32
+ this._availableBits = 0
33
+ this._currentBufferIndex = 0
34
+ this._buffers = []
35
+ }
36
+
37
+ async take (bits: number): Promise<number> {
38
+ let pendingBits = bits
39
+
40
+ while (this._availableBits < pendingBits) {
41
+ await this._produceMoreBits()
42
+ }
43
+
44
+ let result = 0
45
+
46
+ while (pendingBits > 0) {
47
+ const hash = this._buffers[this._currentBufferIndex]
48
+ const available = Math.min(hash.availableBits(), pendingBits)
49
+ const took = hash.take(available)
50
+ result = (result << available) + took
51
+ pendingBits -= available
52
+ this._availableBits -= available
53
+
54
+ if (hash.availableBits() === 0) {
55
+ this._currentBufferIndex++
56
+ }
57
+ }
58
+
59
+ return result
60
+ }
61
+
62
+ untake (bits: number): void {
63
+ let pendingBits = bits
64
+
65
+ while (pendingBits > 0) {
66
+ const hash = this._buffers[this._currentBufferIndex]
67
+ const availableForUntake = Math.min(hash.totalBits() - hash.availableBits(), pendingBits)
68
+ hash.untake(availableForUntake)
69
+ pendingBits -= availableForUntake
70
+ this._availableBits += availableForUntake
71
+
72
+ if (this._currentBufferIndex > 0 && hash.totalBits() === hash.availableBits()) {
73
+ this._depth--
74
+ this._currentBufferIndex--
75
+ }
76
+ }
77
+ }
78
+
79
+ async _produceMoreBits (): Promise<void> {
80
+ this._depth++
81
+
82
+ const value = this._depth > 0 ? uint8ArrayConcat([this._value, Uint8Array.from([this._depth])]) : this._value
83
+ const hashValue = await this._hashFn(value)
84
+ const buffer = new ConsumableBuffer(hashValue)
85
+
86
+ this._buffers.push(buffer)
87
+ this._availableBits += buffer.availableBits()
88
+ }
89
+ }
90
+
91
+ const START_MASKS = [
92
+ 0b11111111,
93
+ 0b11111110,
94
+ 0b11111100,
95
+ 0b11111000,
96
+ 0b11110000,
97
+ 0b11100000,
98
+ 0b11000000,
99
+ 0b10000000
100
+ ]
101
+
102
+ const STOP_MASKS = [
103
+ 0b00000001,
104
+ 0b00000011,
105
+ 0b00000111,
106
+ 0b00001111,
107
+ 0b00011111,
108
+ 0b00111111,
109
+ 0b01111111,
110
+ 0b11111111
111
+ ]
112
+
113
+ export class ConsumableBuffer {
114
+ _value: Uint8Array
115
+ _currentBytePos: number
116
+ _currentBitPos: number
117
+
118
+ constructor (value: Uint8Array) {
119
+ this._value = value
120
+ this._currentBytePos = value.length - 1
121
+ this._currentBitPos = 7
122
+ }
123
+
124
+ availableBits (): number {
125
+ return this._currentBitPos + 1 + this._currentBytePos * 8
126
+ }
127
+
128
+ totalBits (): number {
129
+ return this._value.length * 8
130
+ }
131
+
132
+ take (bits: number): number {
133
+ let pendingBits = bits
134
+ let result = 0
135
+ while (pendingBits > 0 && this._haveBits()) {
136
+ const byte = this._value[this._currentBytePos]
137
+ const availableBits = this._currentBitPos + 1
138
+ const taking = Math.min(availableBits, pendingBits)
139
+ const value = byteBitsToInt(byte, availableBits - taking, taking)
140
+ result = (result << taking) + value
141
+
142
+ pendingBits -= taking
143
+
144
+ this._currentBitPos -= taking
145
+ if (this._currentBitPos < 0) {
146
+ this._currentBitPos = 7
147
+ this._currentBytePos--
148
+ }
149
+ }
150
+
151
+ return result
152
+ }
153
+
154
+ untake (bits: number): void {
155
+ this._currentBitPos += bits
156
+ while (this._currentBitPos > 7) {
157
+ this._currentBitPos -= 8
158
+ this._currentBytePos += 1
159
+ }
160
+ }
161
+
162
+ _haveBits (): boolean {
163
+ return this._currentBytePos >= 0
164
+ }
165
+ }
166
+
167
+ function byteBitsToInt (byte: number, start: number, length: number): number {
168
+ const mask = maskFor(start, length)
169
+ return (byte & mask) >>> start
170
+ }
171
+
172
+ function maskFor (start: number, length: number): number {
173
+ return START_MASKS[start] & STOP_MASKS[Math.min(length + start - 1, 7)]
174
+ }