@helia/verified-fetch 0.0.0-6c88ee1 → 0.0.0-754c7af

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/README.md +226 -3
  2. package/dist/index.min.js +4 -29
  3. package/dist/src/index.d.ts +242 -10
  4. package/dist/src/index.d.ts.map +1 -1
  5. package/dist/src/index.js +215 -6
  6. package/dist/src/index.js.map +1 -1
  7. package/dist/src/utils/dag-cbor-to-safe-json.d.ts +7 -0
  8. package/dist/src/utils/dag-cbor-to-safe-json.d.ts.map +1 -0
  9. package/dist/src/utils/dag-cbor-to-safe-json.js +37 -0
  10. package/dist/src/utils/dag-cbor-to-safe-json.js.map +1 -0
  11. package/dist/src/utils/get-stream-from-async-iterable.d.ts +10 -0
  12. package/dist/src/utils/get-stream-from-async-iterable.d.ts.map +1 -0
  13. package/dist/src/utils/{get-stream-and-content-type.js → get-stream-from-async-iterable.js} +10 -9
  14. package/dist/src/utils/get-stream-from-async-iterable.js.map +1 -0
  15. package/dist/src/verified-fetch.d.ts +5 -12
  16. package/dist/src/verified-fetch.d.ts.map +1 -1
  17. package/dist/src/verified-fetch.js +88 -60
  18. package/dist/src/verified-fetch.js.map +1 -1
  19. package/package.json +25 -18
  20. package/src/index.ts +247 -11
  21. package/src/utils/dag-cbor-to-safe-json.ts +44 -0
  22. package/src/utils/{get-stream-and-content-type.ts → get-stream-from-async-iterable.ts} +9 -8
  23. package/src/verified-fetch.ts +99 -67
  24. package/dist/src/utils/get-content-type.d.ts +0 -11
  25. package/dist/src/utils/get-content-type.d.ts.map +0 -1
  26. package/dist/src/utils/get-content-type.js +0 -43
  27. package/dist/src/utils/get-content-type.js.map +0 -1
  28. package/dist/src/utils/get-stream-and-content-type.d.ts +0 -10
  29. package/dist/src/utils/get-stream-and-content-type.d.ts.map +0 -1
  30. package/dist/src/utils/get-stream-and-content-type.js.map +0 -1
  31. package/src/utils/get-content-type.ts +0 -55
package/src/index.ts CHANGED
@@ -3,7 +3,7 @@
3
3
  *
4
4
  * `@helia/verified-fetch` provides a [fetch](https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API)-like API for retrieving content from the [IPFS](https://ipfs.tech/) network.
5
5
  *
6
- * All content is retrieved in a [trustless manner](https://www.techopedia.com/definition/trustless), and the integrity of all bytes are verified by comparing hashes of the data.
6
+ * All content is retrieved in a [trustless manner](https://www.techopedia.com/definition/trustless), and the integrity of all bytes are verified by comparing hashes of the data. By default, CIDs are retrieved over HTTP from [trustless gateways](https://specs.ipfs.tech/http-gateways/trustless-gateway/).
7
7
  *
8
8
  * This is a marked improvement over `fetch` which offers no such protections and is vulnerable to all sorts of attacks like [Content Spoofing](https://owasp.org/www-community/attacks/Content_Spoofing), [DNS Hijacking](https://en.wikipedia.org/wiki/DNS_hijacking), etc.
9
9
  *
@@ -33,7 +33,7 @@
33
33
  * import { verifiedFetch } from '@helia/verified-fetch'
34
34
  * import { CID } from 'multiformats/cid'
35
35
  *
36
- * const cid = CID.parse('bafyFoo') // some image file
36
+ * const cid = CID.parse('bafyFoo') // some json file
37
37
  * const response = await verifiedFetch(cid)
38
38
  * const json = await response.json()
39
39
  * ```
@@ -75,7 +75,7 @@
75
75
  * const fetch = await createVerifiedFetch({
76
76
  * gateways: ['https://trustless-gateway.link'],
77
77
  * routers: ['http://delegated-ipfs.dev']
78
- *})
78
+ * })
79
79
  *
80
80
  * const resp = await fetch('ipfs://bafy...')
81
81
  *
@@ -112,6 +112,214 @@
112
112
  * const json = await resp.json()
113
113
  * ```
114
114
  *
115
+ * ### Custom content-type parsing
116
+ *
117
+ * By default, if the response can be parsed as JSON, `@helia/verified-fetch` sets the `Content-Type` header as `application/json`, otherwise it sets it as `application/octet-stream` - this is because the `.json()`, `.text()`, `.blob()`, and `.arrayBuffer()` methods will usually work as expected without a detailed content type.
118
+ *
119
+ * If you require an accurate content-type you can provide a `contentTypeParser` function as an option to `createVerifiedFetch` to handle parsing the content type.
120
+ *
121
+ * The function you provide will be called with the first chunk of bytes from the file and should return a string or a promise of a string.
122
+ *
123
+ * @example Customizing content-type parsing
124
+ *
125
+ * ```typescript
126
+ * import { createVerifiedFetch } from '@helia/verified-fetch'
127
+ * import { fileTypeFromBuffer } from '@sgtpooki/file-type'
128
+ *
129
+ * const fetch = await createVerifiedFetch({
130
+ * gateways: ['https://trustless-gateway.link'],
131
+ * routers: ['http://delegated-ipfs.dev']
132
+ * }, {
133
+ * contentTypeParser: async (bytes) => {
134
+ * // call to some magic-byte recognition library like magic-bytes, file-type, or your own custom byte recognition
135
+ * const result = await fileTypeFromBuffer(bytes)
136
+ * return result?.mime
137
+ * }
138
+ * })
139
+ * ```
140
+ *
141
+ * ### IPLD codec handling
142
+ *
143
+ * IPFS supports several data formats (typically referred to as codecs) which are included in the CID. `@helia/verified-fetch` attempts to abstract away some of the details for easier consumption.
144
+ *
145
+ * #### DAG-PB
146
+ *
147
+ * [DAG-PB](https://ipld.io/docs/codecs/known/dag-pb/) is the codec we are most likely to encounter, it is what [UnixFS](https://github.com/ipfs/specs/blob/main/UNIXFS.md) uses under the hood.
148
+ *
149
+ * ##### Using the DAG-PB codec as a Blob
150
+ *
151
+ * ```typescript
152
+ * import { verifiedFetch } from '@helia/verified-fetch'
153
+ *
154
+ * const res = await verifiedFetch('ipfs://Qmfoo')
155
+ * const blob = await res.blob()
156
+ *
157
+ * console.info(blob) // Blob { size: x, type: 'application/octet-stream' }
158
+ * ```
159
+ *
160
+ * ##### Using the DAG-PB codec as an ArrayBuffer
161
+ *
162
+ * ```typescript
163
+ * import { verifiedFetch } from '@helia/verified-fetch'
164
+ *
165
+ * const res = await verifiedFetch('ipfs://Qmfoo')
166
+ * const buf = await res.arrayBuffer()
167
+ *
168
+ * console.info(buf) // ArrayBuffer { [Uint8Contents]: < ... >, byteLength: x }
169
+ * ```
170
+ *
171
+ * ##### Using the DAG-PB codec as a stream
172
+ *
173
+ * ```typescript
174
+ * import { verifiedFetch } from '@helia/verified-fetch'
175
+ *
176
+ * const res = await verifiedFetch('ipfs://Qmfoo')
177
+ * const reader = res.body?.getReader()
178
+ *
179
+ * while (true) {
180
+ * const next = await reader.read()
181
+ *
182
+ * if (next?.done === true) {
183
+ * break
184
+ * }
185
+ *
186
+ * if (next?.value != null) {
187
+ * console.info(next.value) // Uint8Array(x) [ ... ]
188
+ * }
189
+ * }
190
+ * ```
191
+ *
192
+ * ##### Content-Type
193
+ *
194
+ * When fetching `DAG-PB` data, the content type will be set to `application/octet-stream` unless a custom content-type parser is configured.
195
+ *
196
+ * #### JSON
197
+ *
198
+ * The JSON codec is a very simple codec, a block parseable with this codec is a JSON string encoded into a `Uint8Array`.
199
+ *
200
+ * ##### Using the JSON codec
201
+ *
202
+ * ```typescript
203
+ * import * as json from 'multiformats/codecs/json'
204
+ *
205
+ * const block = new TextEncoder().encode('{ "hello": "world" }')
206
+ * const obj = json.decode(block)
207
+ *
208
+ * console.info(obj) // { hello: 'world' }
209
+ * ```
210
+ *
211
+ * ##### Content-Type
212
+ *
213
+ * When the `JSON` codec is encountered, the `Content-Type` header of the response will be set to `application/json`.
214
+ *
215
+ * ### DAG-JSON
216
+ *
217
+ * [DAG-JSON](https://ipld.io/docs/codecs/known/dag-json/) expands on the `JSON` codec, adding the ability to contain [CID](https://docs.ipfs.tech/concepts/content-addressing/)s which act as links to other blocks, and byte arrays.
218
+ *
219
+ * `CID`s and byte arrays are represented using special object structures with a single `"/"` property.
220
+ *
221
+ * Using `DAG-JSON` has two important caveats:
222
+ *
223
+ * 1. Your `JSON` structure cannot contain an object with only a `"/"` property, as it will be interpreted as a special type.
224
+ * 2. Since `JSON` has no technical limit on number sizes, `DAG-JSON` also allows numbers larger than `Number.MAX_SAFE_INTEGER`. JavaScript requires use of `BigInt`s to represent numbers larger than this, and `JSON.parse` does not support them, so precision will be lost.
225
+ *
226
+ * Otherwise this codec follows the same rules as the `JSON` codec.
227
+ *
228
+ * ##### Using the DAG-JSON codec
229
+ *
230
+ * ```typescript
231
+ * import * as dagJson from '@ipld/dag-json'
232
+ *
233
+ * const block = new TextEncoder().encode(`{
234
+ * "hello": "world",
235
+ * "cid": {
236
+ * "/": "baeaaac3imvwgy3zao5xxe3de"
237
+ * },
238
+ * "buf": {
239
+ * "/": {
240
+ * "bytes": "AAECAwQ"
241
+ * }
242
+ * }
243
+ * }`)
244
+ *
245
+ * const obj = dagJson.decode(block)
246
+ *
247
+ * console.info(obj)
248
+ * // {
249
+ * // hello: 'world',
250
+ * // cid: CID(baeaaac3imvwgy3zao5xxe3de),
251
+ * // buf: Uint8Array(5) [ 0, 1, 2, 3, 4 ]
252
+ * // }
253
+ * ```
254
+ *
255
+ * ##### Content-Type
256
+ *
257
+ * When the `DAG-JSON` codec is encountered in the requested CID, the `Content-Type` header of the response will be set to `application/json`.
258
+ *
259
+ * `DAG-JSON` data can be parsed from the response by using the `.json()` function, which will return `CID`s/byte arrays as plain `{ "/": ... }` objects:
260
+ *
261
+ * ```typescript
262
+ * import { verifiedFetch } from '@helia/verified-fetch'
263
+ * import * as dagJson from '@ipld/dag-json'
264
+ *
265
+ * const res = await verifiedFetch('ipfs://bafyDAGJSON')
266
+ *
267
+ * // either:
268
+ * const obj = await res.json()
269
+ * console.info(obj.cid) // { "/": "baeaaac3imvwgy3zao5xxe3de" }
270
+ * console.info(obj.buf) // { "/": { "bytes": "AAECAwQ" } }
271
+ * ```
272
+ *
273
+ * Alternatively, it can be decoded using the `@ipld/dag-json` module and the `.arrayBuffer()` method, in which case you will get `CID` objects and `Uint8Array`s:
274
+ *
275
+ *```typescript
276
+ * import { verifiedFetch } from '@helia/verified-fetch'
277
+ * import * as dagJson from '@ipld/dag-json'
278
+ *
279
+ * const res = await verifiedFetch('ipfs://bafyDAGJSON')
280
+ *
281
+ * // or:
282
+ * const obj = dagJson.decode(await res.arrayBuffer())
283
+ * console.info(obj.cid) // CID(baeaaac3imvwgy3zao5xxe3de)
284
+ * console.info(obj.buf) // Uint8Array(5) [ 0, 1, 2, 3, 4 ]
285
+ * ```
286
+ *
287
+ * #### DAG-CBOR
288
+ *
289
+ * [DAG-CBOR](https://ipld.io/docs/codecs/known/dag-cbor/) uses the [Concise Binary Object Representation](https://cbor.io/) format for serialization instead of JSON.
290
+ *
291
+ * This supports more datatypes in a safer way than JSON and is smaller on the wire to boot so is usually preferable to JSON or DAG-JSON.
292
+ *
293
+ * ##### Content-Type
294
+ *
295
+ * Not all data types supported by `DAG-CBOR` can be successfully turned into JSON and back into the same binary form.
296
+ *
297
+ * When a decoded block can be round-tripped to JSON, the `Content-Type` will be set to `application/json`. In this case the `.json()` method on the `Response` object can be used to obtain an object representation of the response.
298
+ *
299
+ * When it cannot, the `Content-Type` will be `application/octet-stream` - in this case the `@ipld/dag-json` module must be used to deserialize the return value from `.arrayBuffer()`.
300
+ *
301
+ * ##### Detecting JSON-safe DAG-CBOR
302
+ *
303
+ * If the `Content-Type` header of the response is `application/json`, the `.json()` method may be used to access the response body in object form, otherwise the `.arrayBuffer()` method must be used to decode the raw bytes using the `@ipld/dag-cbor` module.
304
+ *
305
+ * ```typescript
306
+ * import { verifiedFetch } from '@helia/verified-fetch'
307
+ * import * as dagCbor from '@ipld/dag-cbor'
308
+ *
309
+ * const res = await verifiedFetch('ipfs://bafyDagCborCID')
310
+ * let obj
311
+ *
312
+ * if (res.headers.get('Content-Type') === 'application/json') {
313
+ * // DAG-CBOR data can be safely decoded as JSON
314
+ * obj = await res.json()
315
+ * } else {
316
+ * // response contains non-JSON friendly data types
317
+ * obj = dagCbor.decode(await res.arrayBuffer())
318
+ * }
319
+ *
320
+ * console.info(obj) // ...
321
+ * ```
322
+ *
115
323
  * ## Comparison to fetch
116
324
  *
117
325
  * This module attempts to act as similarly to the `fetch()` API as possible.
@@ -129,7 +337,7 @@
129
337
  * 2. IPNS protocol: `ipns://<peerId>` & `ipns://<publicKey>` & `ipns://<hostUri_Supporting_DnsLink_TxtRecords>`
130
338
  * 3. CID instances: An actual CID instance `CID.parse('bafy...')`
131
339
  *
132
- * As well as support for pathing & params for item 1 & 2 above according to [IPFS - Path Gateway Specification](https://specs.ipfs.tech/http-gateways/path-gateway) & [IPFS - Trustless Gateway Specification](https://specs.ipfs.tech/http-gateways/trustless-gateway/). Further refinement of those specifications specifically for web-based scenarios can be found in the [Web Pathing Specification IPIP](https://github.com/ipfs/specs/pull/453).
340
+ * As well as support for pathing & params for items 1 & 2 above according to [IPFS - Path Gateway Specification](https://specs.ipfs.tech/http-gateways/path-gateway) & [IPFS - Trustless Gateway Specification](https://specs.ipfs.tech/http-gateways/trustless-gateway/). Further refinement of those specifications specifically for web-based scenarios can be found in the [Web Pathing Specification IPIP](https://github.com/ipfs/specs/pull/453).
133
341
  *
134
342
  * If you pass a CID instance, it assumes you want the content for that specific CID only, and does not support pathing or params for that CID.
135
343
  *
@@ -242,7 +450,7 @@ import type { ProgressEvent, ProgressOptions } from 'progress-events'
242
450
  export type Resource = string | CID
243
451
 
244
452
  export interface CIDDetail {
245
- cid: string
453
+ cid: CID
246
454
  path: string
247
455
  }
248
456
 
@@ -257,13 +465,38 @@ export interface VerifiedFetch {
257
465
  }
258
466
 
259
467
  /**
260
- * Instead of passing a Helia instance, you can pass a list of gateways and routers, and a HeliaHTTP instance will be created for you.
468
+ * Instead of passing a Helia instance, you can pass a list of gateways and
469
+ * routers, and a HeliaHTTP instance will be created for you.
261
470
  */
262
- export interface CreateVerifiedFetchWithOptions {
471
+ export interface CreateVerifiedFetchInit {
263
472
  gateways: string[]
264
473
  routers?: string[]
265
474
  }
266
475
 
476
+ export interface CreateVerifiedFetchOptions {
477
+ /**
478
+ * A function to handle parsing content type from bytes. The function you
479
+ * provide will be passed the first set of bytes we receive from the network,
480
+ * and should return a string that will be used as the value for the
481
+ * `Content-Type` header in the response.
482
+ */
483
+ contentTypeParser?: ContentTypeParser
484
+ }
485
+
486
+ /**
487
+ * A ContentTypeParser attempts to return the mime type of a given file. It
488
+ * receives the first chunk of the file data and the file name, if it is
489
+ * available. The function can be sync or async and if it returns/resolves to
490
+ * `undefined`, `application/octet-stream` will be used.
491
+ */
492
+ export interface ContentTypeParser {
493
+ /**
494
+ * Attempt to determine a mime type, either via of the passed bytes or the
495
+ * filename if it is available.
496
+ */
497
+ (bytes: Uint8Array, fileName?: string): Promise<string | undefined> | string | undefined
498
+ }
499
+
267
500
  export type BubbledProgressEvents =
268
501
  // unixfs
269
502
  GetEvents |
@@ -280,8 +513,9 @@ export type VerifiedFetchProgressEvents =
280
513
  /**
281
514
  * Options for the `fetch` function returned by `createVerifiedFetch`.
282
515
  *
283
- * This method accepts all the same options as the `fetch` function in the browser, plus an `onProgress` option to
284
- * listen for progress events.
516
+ * This interface contains all the same fields as the [options object](https://developer.mozilla.org/en-US/docs/Web/API/fetch#options)
517
+ * passed to `fetch` in browsers, plus an `onProgress` option to listen for
518
+ * progress events.
285
519
  */
286
520
  export interface VerifiedFetchInit extends RequestInit, ProgressOptions<BubbledProgressEvents | VerifiedFetchProgressEvents> {
287
521
  }
@@ -289,7 +523,9 @@ export interface VerifiedFetchInit extends RequestInit, ProgressOptions<BubbledP
289
523
  /**
290
524
  * Create and return a Helia node
291
525
  */
292
- export async function createVerifiedFetch (init?: Helia | CreateVerifiedFetchWithOptions): Promise<VerifiedFetch> {
526
+ export async function createVerifiedFetch (init?: Helia | CreateVerifiedFetchInit, options?: CreateVerifiedFetchOptions): Promise<VerifiedFetch> {
527
+ const contentTypeParser: ContentTypeParser | undefined = options?.contentTypeParser
528
+
293
529
  if (!isHelia(init)) {
294
530
  init = await createHeliaHTTP({
295
531
  blockBrokers: [
@@ -301,7 +537,7 @@ export async function createVerifiedFetch (init?: Helia | CreateVerifiedFetchWit
301
537
  })
302
538
  }
303
539
 
304
- const verifiedFetchInstance = new VerifiedFetchClass({ helia: init })
540
+ const verifiedFetchInstance = new VerifiedFetchClass({ helia: init }, { contentTypeParser })
305
541
  async function verifiedFetch (resource: Resource, options?: VerifiedFetchInit): Promise<Response> {
306
542
  return verifiedFetchInstance.fetch(resource, options)
307
543
  }
@@ -0,0 +1,44 @@
1
+ import { decode } from 'cborg'
2
+ import { encode } from 'cborg/json'
3
+ import { CID } from 'multiformats/cid'
4
+ import type { TagDecoder } from 'cborg'
5
+
6
+ // https://github.com/ipfs/go-ipfs/issues/3570#issuecomment-273931692
7
+ const CID_CBOR_TAG = 0x2A
8
+
9
+ function cidDecoder (bytes: Uint8Array): CID {
10
+ if (bytes[0] !== 0) {
11
+ throw new Error('Invalid CID for CBOR tag 42; expected leading 0x00')
12
+ }
13
+
14
+ return CID.decode(bytes.subarray(1)) // ignore leading 0x00
15
+ }
16
+
17
+ /**
18
+ * Take a `DAG-CBOR` encoded `Uint8Array`, deserialize it as an object and
19
+ * re-serialize it in a form that can be passed to `JSON.serialize` and then
20
+ * `JSON.parse` without losing any data.
21
+ */
22
+ export function dagCborToSafeJSON (buf: Uint8Array): string {
23
+ const tags: TagDecoder[] = []
24
+ tags[CID_CBOR_TAG] = cidDecoder
25
+
26
+ const obj = decode(buf, {
27
+ allowIndefinite: false,
28
+ coerceUndefinedToNull: true,
29
+ allowNaN: false,
30
+ allowInfinity: false,
31
+ strict: true,
32
+ useMaps: false,
33
+ rejectDuplicateMapKeys: true,
34
+ tags,
35
+
36
+ // this is different to `DAG-CBOR` - the reason we disallow BigInts is
37
+ // because we are about to re-encode to `JSON` which does not support
38
+ // BigInts. Blocks containing large numbers should be deserialized using a
39
+ // cbor decoder instead
40
+ allowBigInt: false
41
+ })
42
+
43
+ return new TextDecoder().decode(encode(obj))
44
+ }
@@ -1,27 +1,25 @@
1
1
  import { CustomProgressEvent } from 'progress-events'
2
- import { getContentType } from './get-content-type.js'
3
2
  import type { VerifiedFetchInit } from '../index.js'
4
3
  import type { ComponentLogger } from '@libp2p/interface'
5
4
 
6
5
  /**
7
- * Converts an async iterator of Uint8Array bytes to a stream and attempts to determine the content type of those bytes.
6
+ * Converts an async iterator of Uint8Array bytes to a stream and returns the first chunk of bytes
8
7
  */
9
- export async function getStreamAndContentType (iterator: AsyncIterable<Uint8Array>, path: string, logger: ComponentLogger, options?: Pick<VerifiedFetchInit, 'onProgress'>): Promise<{ contentType: string, stream: ReadableStream<Uint8Array> }> {
10
- const log = logger.forComponent('helia:verified-fetch:get-stream-and-content-type')
8
+ export async function getStreamFromAsyncIterable (iterator: AsyncIterable<Uint8Array>, path: string, logger: ComponentLogger, options?: Pick<VerifiedFetchInit, 'onProgress'>): Promise<{ stream: ReadableStream<Uint8Array>, firstChunk: Uint8Array }> {
9
+ const log = logger.forComponent('helia:verified-fetch:get-stream-from-async-iterable')
11
10
  const reader = iterator[Symbol.asyncIterator]()
12
- const { value, done } = await reader.next()
11
+ const { value: firstChunk, done } = await reader.next()
13
12
 
14
13
  if (done === true) {
15
14
  log.error('No content found for path', path)
16
15
  throw new Error('No content found')
17
16
  }
18
17
 
19
- const contentType = await getContentType({ bytes: value, path })
20
18
  const stream = new ReadableStream({
21
19
  async start (controller) {
22
20
  // the initial value is already available
23
21
  options?.onProgress?.(new CustomProgressEvent<void>('verified-fetch:request:progress:chunk'))
24
- controller.enqueue(value)
22
+ controller.enqueue(firstChunk)
25
23
  },
26
24
  async pull (controller) {
27
25
  const { value, done } = await reader.next()
@@ -40,5 +38,8 @@ export async function getStreamAndContentType (iterator: AsyncIterable<Uint8Arra
40
38
  }
41
39
  })
42
40
 
43
- return { contentType, stream }
41
+ return {
42
+ stream,
43
+ firstChunk
44
+ }
44
45
  }