@helia/unixfs 4.0.3-c0bf36e → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@helia/unixfs",
3
- "version": "4.0.3-c0bf36e",
3
+ "version": "5.0.0",
4
4
  "description": "A Helia-compatible wrapper for UnixFS",
5
5
  "license": "Apache-2.0 OR MIT",
6
6
  "homepage": "https://github.com/ipfs/helia/tree/main/packages/unixfs#readme",
@@ -74,10 +74,11 @@
74
74
  "test:electron-main": "aegir test -t electron-main"
75
75
  },
76
76
  "dependencies": {
77
- "@helia/interface": "5.2.1-c0bf36e",
77
+ "@helia/interface": "^5.2.1",
78
78
  "@ipld/dag-pb": "^4.1.3",
79
79
  "@libp2p/interface": "^2.2.1",
80
80
  "@libp2p/logger": "^5.1.4",
81
+ "@libp2p/utils": "^6.6.0",
81
82
  "@multiformats/murmur3": "^2.1.8",
82
83
  "hamt-sharding": "^3.0.6",
83
84
  "interface-blockstore": "^5.3.1",
@@ -1,17 +1,21 @@
1
1
  import * as dagPb from '@ipld/dag-pb'
2
2
  import { logger } from '@libp2p/logger'
3
+ import { ScalableCuckooFilter } from '@libp2p/utils/filters'
3
4
  import { UnixFS } from 'ipfs-unixfs'
4
- import { exporter } from 'ipfs-unixfs-exporter'
5
+ import { exporter, type RawNode, type UnixFSDirectory, type UnixFSFile } from 'ipfs-unixfs-exporter'
5
6
  import mergeOpts from 'merge-options'
6
7
  import * as raw from 'multiformats/codecs/raw'
7
8
  import { InvalidPBNodeError, NotUnixFSError, UnknownError } from '../errors.js'
8
9
  import { resolve } from './utils/resolve.js'
9
- import type { StatOptions, UnixFSStats } from '../index.js'
10
+ import type { ExtendedStatOptions, ExtendedDirectoryStats, ExtendedFileStats, StatOptions, DirectoryStats, FileStats, RawStats, ExtendedRawStats } from '../index.js'
10
11
  import type { GetStore, HasStore } from '../unixfs.js'
11
- import type { AbortOptions } from '@libp2p/interface'
12
- import type { Mtime } from 'ipfs-unixfs'
12
+ import type { Filter } from '@libp2p/utils/filters'
13
13
  import type { CID } from 'multiformats/cid'
14
14
 
15
+ // https://github.com/ipfs/specs/blob/main/UNIXFS.md#metadata
16
+ const DEFAULT_DIR_MODE = 0x755
17
+ const DEFAULT_FILE_MODE = 0x644
18
+
15
19
  const mergeOptions = mergeOpts.bind({ ignoreUndefined: true })
16
20
  const log = logger('helia:unixfs:stat')
17
21
 
@@ -19,7 +23,9 @@ const defaultOptions: StatOptions = {
19
23
 
20
24
  }
21
25
 
22
- export async function stat (cid: CID, blockstore: GetStore & HasStore, options: Partial<StatOptions> = {}): Promise<UnixFSStats> {
26
+ export async function stat (cid: CID, blockstore: GetStore & HasStore, options?: StatOptions): Promise<FileStats | DirectoryStats | RawStats>
27
+ export async function stat (cid: CID, blockstore: GetStore & HasStore, options?: ExtendedStatOptions): Promise<ExtendedFileStats | ExtendedDirectoryStats | ExtendedRawStats>
28
+ export async function stat (cid: CID, blockstore: GetStore & HasStore, options: Partial<ExtendedStatOptions> = {}): Promise<any> {
23
29
  const opts: StatOptions = mergeOptions(defaultOptions, options)
24
30
  const resolved = await resolve(cid, options.path, blockstore, opts)
25
31
 
@@ -27,116 +33,186 @@ export async function stat (cid: CID, blockstore: GetStore & HasStore, options:
27
33
 
28
34
  const result = await exporter(resolved.cid, blockstore, opts)
29
35
 
30
- if (result.type !== 'file' && result.type !== 'directory' && result.type !== 'raw') {
31
- throw new NotUnixFSError()
36
+ if (result.type === 'raw') {
37
+ if (options.extended === true) {
38
+ return createExtendedRawStats(result)
39
+ }
40
+
41
+ return createRawStats(result)
42
+ } else if (result.type === 'file' || result.type === 'directory') {
43
+ if (options.extended === true) {
44
+ return createExtendedStats(result, blockstore, options.filter ?? new ScalableCuckooFilter({ filterSize: 1024 }), options)
45
+ }
46
+
47
+ return createStats(result)
32
48
  }
33
49
 
34
- let fileSize: bigint = 0n
35
- let dagSize: bigint = 0n
36
- let localFileSize: bigint = 0n
37
- let localDagSize: bigint = 0n
38
- let blocks: number = 0
39
- let mode: number | undefined
40
- let mtime: Mtime | undefined
41
- const type = result.type
42
- let unixfs: UnixFS | undefined
50
+ throw new NotUnixFSError()
51
+ }
43
52
 
44
- if (result.type === 'raw') {
45
- fileSize = BigInt(result.node.byteLength)
46
- dagSize = BigInt(result.node.byteLength)
47
- localFileSize = BigInt(result.node.byteLength)
48
- localDagSize = BigInt(result.node.byteLength)
49
- blocks = 1
53
+ function createStats (entry: UnixFSFile | UnixFSDirectory): FileStats | DirectoryStats {
54
+ return {
55
+ type: entry.type,
56
+ cid: entry.cid,
57
+ unixfs: entry.unixfs,
58
+ mode: entry.unixfs.mode ?? (entry.unixfs.isDirectory() ? DEFAULT_DIR_MODE : DEFAULT_FILE_MODE),
59
+ mtime: entry.unixfs.mtime,
60
+ size: entry.unixfs.fileSize()
50
61
  }
62
+ }
51
63
 
52
- if (result.type === 'directory') {
53
- fileSize = 0n
54
- dagSize = BigInt(result.unixfs.marshal().byteLength)
55
- localFileSize = 0n
56
- localDagSize = dagSize
57
- blocks = 1
58
- mode = result.unixfs.mode
59
- mtime = result.unixfs.mtime
60
- unixfs = result.unixfs
64
+ async function createExtendedStats (entry: UnixFSFile | UnixFSDirectory, blockstore: GetStore & HasStore, filter: Filter, options: StatOptions): Promise<ExtendedFileStats | ExtendedDirectoryStats> {
65
+ const stats = await inspectDag(entry.cid, blockstore, false, filter, options)
66
+
67
+ return {
68
+ type: entry.type,
69
+ cid: entry.cid,
70
+ unixfs: entry.unixfs,
71
+ size: entry.unixfs.isDirectory() ? stats.dirSize : entry.unixfs.fileSize(),
72
+ mode: entry.unixfs.mode ?? (entry.unixfs.isDirectory() ? DEFAULT_DIR_MODE : DEFAULT_FILE_MODE),
73
+ mtime: entry.unixfs.mtime,
74
+ localSize: stats.localSize,
75
+ dagSize: stats.dagSize,
76
+ deduplicatedDagSize: stats.deduplicatedDagSize,
77
+ blocks: stats.blocks,
78
+ uniqueBlocks: stats.uniqueBlocks
61
79
  }
80
+ }
62
81
 
63
- if (result.type === 'file') {
64
- const results = await inspectDag(resolved.cid, blockstore, opts)
65
-
66
- fileSize = result.unixfs.fileSize()
67
- dagSize = BigInt((result.node.Data?.byteLength ?? 0) + result.node.Links.reduce((acc, curr) => acc + (curr.Tsize ?? 0), 0))
68
- localFileSize = BigInt(results.localFileSize)
69
- localDagSize = BigInt(results.localDagSize)
70
- blocks = results.blocks
71
- mode = result.unixfs.mode
72
- mtime = result.unixfs.mtime
73
- unixfs = result.unixfs
82
+ function createRawStats (entry: RawNode): RawStats {
83
+ return {
84
+ type: entry.type,
85
+ cid: entry.cid,
86
+ unixfs: undefined,
87
+ mode: DEFAULT_FILE_MODE,
88
+ mtime: undefined,
89
+ size: BigInt(entry.node.byteLength)
74
90
  }
91
+ }
75
92
 
93
+ function createExtendedRawStats (entry: RawNode): ExtendedRawStats {
76
94
  return {
77
- cid: resolved.cid,
78
- mode,
79
- mtime,
80
- fileSize,
81
- dagSize,
82
- localFileSize,
83
- localDagSize,
84
- blocks,
85
- type,
86
- unixfs
95
+ type: entry.type,
96
+ cid: entry.cid,
97
+ unixfs: undefined,
98
+ mode: DEFAULT_FILE_MODE,
99
+ mtime: undefined,
100
+ size: BigInt(entry.node.byteLength),
101
+ localSize: BigInt(entry.node.byteLength),
102
+ dagSize: BigInt(entry.node.byteLength),
103
+ deduplicatedDagSize: BigInt(entry.node.byteLength),
104
+ blocks: 1n,
105
+ uniqueBlocks: 1n
87
106
  }
88
107
  }
89
108
 
90
109
  interface InspectDagResults {
91
- localFileSize: number
92
- localDagSize: number
93
- blocks: number
110
+ dirSize: bigint
111
+ localSize: bigint
112
+ dagSize: bigint
113
+ deduplicatedDagSize: bigint
114
+ blocks: bigint
115
+ uniqueBlocks: bigint
94
116
  }
95
117
 
96
- async function inspectDag (cid: CID, blockstore: GetStore & HasStore, options: AbortOptions): Promise<InspectDagResults> {
97
- const results = {
98
- localFileSize: 0,
99
- localDagSize: 0,
100
- blocks: 0
118
+ async function inspectDag (cid: CID, blockstore: GetStore & HasStore, isFile: boolean, filter: Filter, options: StatOptions): Promise<InspectDagResults> {
119
+ const results: InspectDagResults = {
120
+ dirSize: 0n,
121
+ localSize: 0n,
122
+ dagSize: 0n,
123
+ deduplicatedDagSize: 0n,
124
+ blocks: 0n,
125
+ uniqueBlocks: 0n
101
126
  }
102
127
 
103
- if (await blockstore.has(cid, options)) {
128
+ try {
129
+ const alreadyTraversed = filter.has(cid.bytes)
130
+ filter.add(cid.bytes)
131
+
104
132
  const block = await blockstore.get(cid, options)
105
133
  results.blocks++
106
- results.localDagSize += block.byteLength
134
+ results.dagSize += BigInt(block.byteLength)
135
+
136
+ if (!alreadyTraversed) {
137
+ results.uniqueBlocks++
138
+ results.deduplicatedDagSize += BigInt(block.byteLength)
139
+ }
107
140
 
108
141
  if (cid.code === raw.code) {
109
- results.localFileSize += block.byteLength
142
+ results.localSize += BigInt(block.byteLength)
143
+
144
+ if (isFile) {
145
+ results.dirSize += BigInt(block.byteLength)
146
+ }
110
147
  } else if (cid.code === dagPb.code) {
111
148
  const pbNode = dagPb.decode(block)
112
149
 
150
+ let unixfs: UnixFS | undefined
151
+
152
+ if (pbNode.Data != null) {
153
+ unixfs = UnixFS.unmarshal(pbNode.Data)
154
+ }
155
+
113
156
  if (pbNode.Links.length > 0) {
114
157
  // intermediate node
115
158
  for (const link of pbNode.Links) {
116
- const linkResult = await inspectDag(link.Hash, blockstore, options)
159
+ const linkResult = await inspectDag(link.Hash, blockstore, linkIsFile(link, unixfs), filter, options)
117
160
 
118
- results.localFileSize += linkResult.localFileSize
119
- results.localDagSize += linkResult.localDagSize
161
+ results.localSize += linkResult.localSize
162
+ results.dagSize += linkResult.dagSize
163
+ results.deduplicatedDagSize += linkResult.deduplicatedDagSize
120
164
  results.blocks += linkResult.blocks
165
+ results.uniqueBlocks += linkResult.uniqueBlocks
166
+ results.dirSize += linkResult.dirSize
167
+ }
168
+
169
+ // multi-block file node
170
+ if (isFile && unixfs != null) {
171
+ results.dirSize += unixfs.fileSize()
121
172
  }
122
173
  } else {
123
- // leaf node
124
- if (pbNode.Data == null) {
174
+ if (unixfs == null) {
125
175
  throw new InvalidPBNodeError(`PBNode ${cid.toString()} had no data`)
126
176
  }
127
177
 
128
- const unixfs = UnixFS.unmarshal(pbNode.Data)
129
-
130
- if (unixfs.data == null) {
131
- throw new InvalidPBNodeError(`UnixFS node ${cid.toString()} had no data`)
178
+ // multi-block file leaf node
179
+ if (unixfs.data != null) {
180
+ results.localSize += BigInt(unixfs.data.byteLength ?? 0)
132
181
  }
133
182
 
134
- results.localFileSize += unixfs.data.byteLength ?? 0
183
+ // single-block file node
184
+ if (isFile) {
185
+ results.dirSize += unixfs.fileSize()
186
+ }
135
187
  }
136
188
  } else {
137
189
  throw new UnknownError(`${cid.toString()} was neither DAG_PB nor RAW`)
138
190
  }
191
+ } catch (err: any) {
192
+ if (err.name !== 'NotFoundError' || options.offline !== true) {
193
+ throw err
194
+ }
139
195
  }
140
196
 
141
197
  return results
142
198
  }
199
+
200
+ function linkIsFile (link: dagPb.PBLink, parent?: UnixFS): boolean {
201
+ if (parent == null) {
202
+ return false
203
+ }
204
+
205
+ const name = link.Name
206
+
207
+ if (name == null) {
208
+ return false
209
+ }
210
+
211
+ if (parent.type === 'directory') {
212
+ return true
213
+ } else if (parent.type === 'hamt-sharded-directory' && name.length > 2) {
214
+ return true
215
+ }
216
+
217
+ return false
218
+ }
package/src/index.ts CHANGED
@@ -49,6 +49,7 @@
49
49
  import { UnixFS as UnixFSClass } from './unixfs.js'
50
50
  import type { GetBlockProgressEvents, PutBlockProgressEvents } from '@helia/interface/blocks'
51
51
  import type { AbortOptions } from '@libp2p/interface'
52
+ import type { Filter } from '@libp2p/utils/filters'
52
53
  import type { Blockstore } from 'interface-blockstore'
53
54
  import type { Mtime, UnixFS as IPFSUnixFS } from 'ipfs-unixfs'
54
55
  import type { ExporterProgressEvents, UnixFSEntry } from 'ipfs-unixfs-exporter'
@@ -248,15 +249,32 @@ export interface StatOptions extends AbortOptions, ProgressOptions<GetEvents> {
248
249
 
249
250
  /**
250
251
  * If true, do not perform any network operations and throw if blocks are
251
- * missing from the local store. (default: false)
252
+ * missing from the local store.
253
+ *
254
+ * @default false
252
255
  */
253
256
  offline?: boolean
254
257
  }
255
258
 
259
+ export interface ExtendedStatOptions extends StatOptions {
260
+ /**
261
+ * If true, traverse the whole DAG to return additional stats. If all data is
262
+ * not in the local blockstore, this may involve fetching them from the
263
+ * network.
264
+ */
265
+ extended: true
266
+
267
+ /**
268
+ * By default CIDs are deduplicated using a `ScalableCuckooFilter` - if you
269
+ * wish to use a different filter, pass it here.
270
+ */
271
+ filter?: Filter
272
+ }
273
+
256
274
  /**
257
275
  * Statistics relating to a UnixFS DAG
258
276
  */
259
- export interface UnixFSStats {
277
+ export interface Stats {
260
278
  /**
261
279
  * The file or directory CID
262
280
  */
@@ -265,7 +283,7 @@ export interface UnixFSStats {
265
283
  /**
266
284
  * The file or directory mode
267
285
  */
268
- mode?: number
286
+ mode: number
269
287
 
270
288
  /**
271
289
  * The file or directory mtime
@@ -273,41 +291,112 @@ export interface UnixFSStats {
273
291
  mtime?: Mtime
274
292
 
275
293
  /**
276
- * The size of the file in bytes
294
+ * The type of UnixFS node - 'file' or 'directory'
277
295
  */
278
- fileSize: bigint
296
+ type: 'file' | 'directory' | 'raw'
279
297
 
280
298
  /**
281
- * The size of the DAG that holds the file in bytes
299
+ * UnixFS metadata about this file or directory
282
300
  */
283
- dagSize: bigint
301
+ unixfs?: IPFSUnixFS
302
+
303
+ /**
304
+ * The size in bytes of the file as reported by the UnixFS metadata stored in
305
+ * the root DAG node, or if the CID resolves to a raw node, the size of the
306
+ * block that holds it.
307
+ *
308
+ * For directories this will return `0` as no size information is available in
309
+ * the root block - instead please stat with the `extended` option to traverse
310
+ * the DAG and calculate the size.
311
+ */
312
+ size: bigint
313
+ }
314
+
315
+ export interface FileStats extends Stats {
316
+ type: 'file'
317
+ unixfs: IPFSUnixFS
318
+ }
319
+
320
+ export interface DirectoryStats extends Stats {
321
+ type: 'directory'
322
+ unixfs: IPFSUnixFS
323
+ }
324
+
325
+ export interface RawStats extends Stats {
326
+ type: 'raw'
327
+ unixfs: undefined
328
+ }
284
329
 
330
+ /**
331
+ * More detailed statistics relating to a UnixFS DAG. These can involve
332
+ * traversing the DAG behind the CID so can involve network operations and/or
333
+ * more disk activity.
334
+ */
335
+ export interface ExtendedStats extends Stats {
285
336
  /**
286
- * How much of the file is in the local block store
337
+ * How many blocks make up the DAG.
338
+ *
339
+ * nb. this will only be accurate if either all blocks are present in the
340
+ * local blockstore or the `offline` option was not `true`
287
341
  */
288
- localFileSize: bigint
342
+ blocks: bigint
289
343
 
290
344
  /**
291
- * How much of the DAG that holds the file is in the local blockstore
345
+ * How many unique blocks make up the DAG - this count does not include any
346
+ * blocks that appear in the DAG more than once.
347
+ *
348
+ * nb. this will only be accurate if either all blocks are present in the
349
+ * local blockstore or the `offline` option was not `true`
292
350
  */
293
- localDagSize: bigint
351
+ uniqueBlocks: bigint
294
352
 
295
353
  /**
296
- * How many blocks make up the DAG - nb. this will only be accurate
297
- * if all blocks are present in the local blockstore
354
+ * The size of the DAG that holds the file or directory in bytes - this is
355
+ * the sum of all block sizes so includes any protobuf overhead, etc.
356
+ *
357
+ * Duplicate blocks are included in this measurement.
358
+ *
359
+ * nb. this will only be accurate if either all blocks are present in the
360
+ * local blockstore or the `offline` option was not `true`
298
361
  */
299
- blocks: number
362
+ dagSize: bigint
300
363
 
301
364
  /**
302
- * The type of file
365
+ * Similar to `dagSize` except duplicate blocks are not included in the
366
+ * reported amount.
367
+ *
368
+ * nb. this will only be accurate if either all blocks are present in the
369
+ * local blockstore or the `offline` option was not `true`
303
370
  */
304
- type: 'file' | 'directory' | 'raw'
371
+ deduplicatedDagSize: bigint
305
372
 
306
373
  /**
307
- * UnixFS metadata about this file or directory. Will not be present
308
- * if the node is a `raw` type.
374
+ * How much of the file or directory is in the local block store. If this is a
375
+ * directory it will include the `localSize` of all child files and
376
+ * directories.
377
+ *
378
+ * It does not include protobuf overhead, for that see `dagSize`.
379
+ *
380
+ * nb. if the `offline` option is `true`, and not all blocks for the
381
+ * file/directory are in the blockstore, this number may be smaller than
382
+ * `size`.
309
383
  */
310
- unixfs?: IPFSUnixFS
384
+ localSize: bigint
385
+ }
386
+
387
+ export interface ExtendedFileStats extends ExtendedStats {
388
+ type: 'file'
389
+ unixfs: IPFSUnixFS
390
+ }
391
+
392
+ export interface ExtendedDirectoryStats extends ExtendedStats {
393
+ type: 'directory'
394
+ unixfs: IPFSUnixFS
395
+ }
396
+
397
+ export interface ExtendedRawStats extends ExtendedStats {
398
+ type: 'raw'
399
+ unixfs: undefined
311
400
  }
312
401
 
313
402
  /**
@@ -571,7 +660,8 @@ export interface UnixFS {
571
660
  * console.info(stats)
572
661
  * ```
573
662
  */
574
- stat(cid: CID, options?: Partial<StatOptions>): Promise<UnixFSStats>
663
+ stat(cid: CID, options?: StatOptions): Promise<FileStats | DirectoryStats | RawStats>
664
+ stat(cid: CID, options?: ExtendedStatOptions): Promise<ExtendedFileStats | ExtendedDirectoryStats | ExtendedRawStats>
575
665
 
576
666
  /**
577
667
  * Update the mtime of a UnixFS DAG
package/src/unixfs.ts CHANGED
@@ -7,7 +7,7 @@ import { mkdir } from './commands/mkdir.js'
7
7
  import { rm } from './commands/rm.js'
8
8
  import { stat } from './commands/stat.js'
9
9
  import { touch } from './commands/touch.js'
10
- import type { AddOptions, CatOptions, ChmodOptions, CpOptions, FileCandidate, LsOptions, MkdirOptions, RmOptions, StatOptions, TouchOptions, UnixFSComponents, UnixFS as UnixFSInterface, UnixFSStats } from './index.js'
10
+ import type { AddOptions, CatOptions, ChmodOptions, CpOptions, ExtendedStatOptions, ExtendedDirectoryStats, ExtendedFileStats, FileCandidate, LsOptions, MkdirOptions, RmOptions, StatOptions, TouchOptions, UnixFSComponents, DirectoryStats, FileStats, UnixFS as UnixFSInterface, RawStats, ExtendedRawStats } from './index.js'
11
11
  import type { Blockstore } from 'interface-blockstore'
12
12
  import type { UnixFSEntry } from 'ipfs-unixfs-exporter'
13
13
  import type { ByteStream, DirectoryCandidate, ImportCandidateStream, ImportResult } from 'ipfs-unixfs-importer'
@@ -68,7 +68,9 @@ export class UnixFS implements UnixFSInterface {
68
68
  return rm(cid, path, this.components.blockstore, options)
69
69
  }
70
70
 
71
- async stat (cid: CID, options: Partial<StatOptions> = {}): Promise<UnixFSStats> {
71
+ async stat (cid: CID, options?: StatOptions): Promise<FileStats | DirectoryStats | RawStats>
72
+ async stat (cid: CID, options?: ExtendedStatOptions): Promise<ExtendedFileStats | ExtendedDirectoryStats | ExtendedRawStats>
73
+ async stat (cid: CID, options: Partial<StatOptions> = {}): Promise<FileStats | DirectoryStats | RawStats> {
72
74
  return stat(cid, this.components.blockstore, options)
73
75
  }
74
76