@helia/trustless-gateway-client 0.0.0-1361bfa5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,279 @@
1
+ import { uriToMultiaddr } from '@multiformats/uri-to-multiaddr'
2
+ import { base64 } from 'multiformats/bases/base64'
3
+ import { CID } from 'multiformats/cid'
4
+ import { identity } from 'multiformats/hashes/identity'
5
+ import { CustomProgressEvent } from 'progress-events'
6
+ import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string'
7
+ import { DEFAULT_MAX_SIZE } from './index.ts'
8
+ import { limitedResponse } from './utils.ts'
9
+ import type { BlockBrokerConnectedProgressEvent, BlockBrokerConnectProgressEvent, BlockBrokerGetBlockProgressEvents, BlockBrokerReceiveBlockProgressEvent, BlockBrokerRequestBlockProgressEvent } from '@helia/interface'
10
+ import type { ComponentLogger, Logger } from 'birnam'
11
+ import type { ProgressOptions } from 'progress-events'
12
+
13
+ const TRANSPORT_IPFS_GATEWAY_HTTP_CODE = 0x0920
14
+
15
+ export interface TrustlessGatewayStats {
16
+ attempts: number
17
+ errors: number
18
+ invalidBlocks: number
19
+ successes: number
20
+ pendingResponses?: number
21
+ }
22
+
23
+ export interface TransformRequestInit {
24
+ (defaultReqInit: RequestInit): Promise<RequestInit> | RequestInit
25
+ }
26
+
27
+ export interface TrustlessGatewayComponents {
28
+ logger: ComponentLogger
29
+ transformRequestInit?: TransformRequestInit
30
+ routing: string
31
+ }
32
+
33
+ export interface GetRawBlockOptions extends ProgressOptions<BlockBrokerGetBlockProgressEvents> {
34
+ signal?: AbortSignal
35
+
36
+ /**
37
+ * The maximum number of bytes to allow when fetching a raw block.
38
+ *
39
+ * @default 2_097_152 (2MiB)
40
+ */
41
+ maxSize?: number
42
+ }
43
+
44
+ /**
45
+ * A `TrustlessGateway` keeps track of the number of attempts, errors, and
46
+ * successes for a given gateway url so that we can prioritize gateways that
47
+ * have been more reliable in the past, and ensure that requests are distributed
48
+ * across all gateways within a given `TrustlessGatewayBlockBroker` instance.
49
+ */
50
+ export class TrustlessGateway {
51
+ public readonly url: URL
52
+ private readonly peer: CID
53
+
54
+ /**
55
+ * The number of times this gateway has been attempted to be used to fetch a
56
+ * block. This includes successful, errored, and aborted attempts. By counting
57
+ * even aborted attempts, slow gateways that are out-raced by others will be
58
+ * considered less reliable.
59
+ */
60
+ #attempts = 0
61
+
62
+ /**
63
+ * The number of times this gateway has errored while attempting to fetch a
64
+ * block. This includes `response.ok === false` and any other errors that
65
+ * throw while attempting to fetch a block. This does not include aborted
66
+ * attempts.
67
+ */
68
+ #errors = 0
69
+
70
+ /**
71
+ * The number of times this gateway has returned an invalid block. A gateway
72
+ * that returns the wrong blocks for a CID should be considered for removal
73
+ * from the list of gateways to fetch blocks from.
74
+ */
75
+ #invalidBlocks = 0
76
+
77
+ /**
78
+ * The number of times this gateway has successfully fetched a block.
79
+ */
80
+ #successes = 0
81
+
82
+ /**
83
+ * A map of pending responses for this gateway. This is used to ensure that
84
+ * only one request per CID is made to a given gateway at a time, and that we
85
+ * don't make multiple in-flight requests for the same CID to the same gateway.
86
+ */
87
+ readonly #pendingResponses = new Map<string, Promise<Uint8Array>>()
88
+
89
+ private readonly log: Logger
90
+ private readonly transformRequestInit?: TransformRequestInit
91
+
92
+ public readonly routing: string
93
+
94
+ constructor (url: URL | string, { logger, transformRequestInit, routing }: TrustlessGatewayComponents) {
95
+ this.url = url instanceof URL ? url : new URL(url)
96
+ this.transformRequestInit = transformRequestInit
97
+ this.log = logger.forComponent(`helia:trustless-gateway-block-broker:${this.url.host}`)
98
+ this.routing = routing
99
+ this.peer = CID.createV1(TRANSPORT_IPFS_GATEWAY_HTTP_CODE, identity.digest(uint8ArrayFromString(this.url.toString())))
100
+ }
101
+
102
+ /**
103
+ * This function returns a unique string for the multihash.bytes of the CID.
104
+ *
105
+ * Some useful resources for why this is needed can be found using the links below:
106
+ *
107
+ * - https://github.com/ipfs/helia/pull/503#discussion_r1572451331
108
+ * - https://github.com/ipfs/kubo/issues/6815
109
+ * - https://www.notion.so/pl-strflt/Handling-ambiguity-around-CIDs-9d5e14f6516f438980b01ef188efe15d#d9d45cd1ed8b4d349b96285de4aed5ab
110
+ */
111
+ #uniqueBlockId (cid: CID): string {
112
+ const multihashBytes = cid.multihash.bytes
113
+ return base64.encode(multihashBytes)
114
+ }
115
+
116
+ /**
117
+ * Fetch a raw block from `this.url` following the specification defined at
118
+ * https://specs.ipfs.tech/http-gateways/trustless-gateway/
119
+ */
120
+ async getRawBlock (cid: CID, options: GetRawBlockOptions = {}): Promise<Uint8Array> {
121
+ const gwUrl = new URL(this.url.toString())
122
+ gwUrl.pathname = `/ipfs/${cid.toString()}`
123
+ const maxSize = options.maxSize ?? DEFAULT_MAX_SIZE
124
+
125
+ // necessary as not every gateway supports dag-cbor, but every should support
126
+ // sending raw block as-is
127
+ gwUrl.search = '?format=raw'
128
+
129
+ if (options.signal?.aborted === true) {
130
+ throw new Error(`Signal to fetch raw block for CID ${cid} from gateway ${this.url} was aborted prior to fetch`)
131
+ }
132
+
133
+ const blockId = this.#uniqueBlockId(cid)
134
+
135
+ // workaround for https://github.com/nodejs/node/issues/52635
136
+ const innerController = new AbortController()
137
+ const abortInnerSignal = (): void => {
138
+ innerController.abort()
139
+ }
140
+ options.signal?.addEventListener('abort', abortInnerSignal)
141
+
142
+ try {
143
+ let pendingResponse: Promise<Uint8Array> | undefined = this.#pendingResponses.get(blockId)
144
+ if (pendingResponse == null) {
145
+ this.#attempts++
146
+ const defaultReqInit: RequestInit = {
147
+ signal: innerController.signal,
148
+ headers: {
149
+ Accept: 'application/vnd.ipld.raw'
150
+ },
151
+ cache: 'force-cache'
152
+ }
153
+
154
+ const reqInit: RequestInit = this.transformRequestInit != null ? await this.transformRequestInit(defaultReqInit) : defaultReqInit
155
+
156
+ const headers = new Headers(reqInit.headers)
157
+ this.log(`sending request
158
+ %s %s HTTP/1.1
159
+ %s
160
+ `, reqInit.method ?? 'GET', gwUrl, [...headers.entries()].map(([key, value]) => `${key}: ${value}`).join('\n'))
161
+
162
+ options.onProgress?.(new CustomProgressEvent<BlockBrokerConnectProgressEvent>('helia:block-broker:connect', {
163
+ broker: 'trustless-gateway',
164
+ type: 'connect',
165
+ provider: this.peer,
166
+ cid
167
+ }))
168
+
169
+ pendingResponse = fetch(gwUrl.toString(), reqInit).then(async (res) => {
170
+ this.log(`received response
171
+ HTTP/1.1 %d %s
172
+ %s
173
+ `, res.status, res.statusText, [...res.headers.entries()].map(([key, value]) => `${key}: ${value}`).join('\n'))
174
+
175
+ if (!res.ok) {
176
+ this.#errors++
177
+ throw new Error(`Unable to fetch raw block for CID ${cid} from gateway ${this.url}, received ${res.status} ${res.statusText}`)
178
+ }
179
+
180
+ options.onProgress?.(new CustomProgressEvent<BlockBrokerConnectedProgressEvent>('helia:block-broker:connected', {
181
+ broker: 'trustless-gateway',
182
+ type: 'connected',
183
+ provider: this.peer,
184
+ address: uriToMultiaddr(gwUrl.toString()),
185
+ cid
186
+ }))
187
+
188
+ options.onProgress?.(new CustomProgressEvent<BlockBrokerRequestBlockProgressEvent>('helia:block-broker:request-block', {
189
+ broker: 'trustless-gateway',
190
+ type: 'request-block',
191
+ provider: this.peer,
192
+ cid
193
+ }))
194
+
195
+ // limited Response ensures the body is less than 2MiB (or configurable maxSize)
196
+ // see https://github.com/ipfs/helia/issues/790
197
+ const body = await limitedResponse(res, maxSize, { signal: innerController.signal, log: this.log })
198
+
199
+ options.onProgress?.(new CustomProgressEvent<BlockBrokerReceiveBlockProgressEvent>('helia:block-broker:receive-block', {
200
+ broker: 'trustless-gateway',
201
+ type: 'receive-block',
202
+ provider: this.peer,
203
+ cid
204
+ }))
205
+
206
+ this.#successes++
207
+ return body
208
+ })
209
+ this.#pendingResponses.set(blockId, pendingResponse)
210
+ }
211
+ return await pendingResponse
212
+ } catch (cause: any) {
213
+ // @ts-expect-error - TS thinks signal?.aborted can only be false now
214
+ // because it was checked for true above.
215
+ if (options.signal?.aborted === true) {
216
+ throw new Error(`Fetching raw block for CID ${cid} from gateway ${this.url} was aborted`)
217
+ }
218
+ this.#errors++
219
+ throw new Error(`Unable to fetch raw block for CID ${cid} - ${cause.message}`)
220
+ } finally {
221
+ options.signal?.removeEventListener('abort', abortInnerSignal)
222
+ this.#pendingResponses.delete(blockId)
223
+ }
224
+ }
225
+
226
+ /**
227
+ * Encapsulate the logic for determining whether a gateway is considered
228
+ * reliable, for prioritization. This is based on the number of successful attempts made
229
+ * and the number of errors encountered.
230
+ *
231
+ * Unused gateways have 100% reliability; They will be prioritized over
232
+ * gateways with a 100% success rate to ensure that we attempt all gateways.
233
+ */
234
+ reliability (): number {
235
+ /**
236
+ * if we have never tried to use this gateway, it is considered the most
237
+ * reliable until we determine otherwise (prioritize unused gateways)
238
+ */
239
+ if (this.#attempts === 0) {
240
+ return 1
241
+ }
242
+
243
+ if (this.#invalidBlocks > 0) {
244
+ // this gateway may not be trustworthy..
245
+ return -Infinity
246
+ }
247
+
248
+ /**
249
+ * We have attempted the gateway, so we need to calculate the reliability
250
+ * based on the number of attempts, errors, and successes. Gateways that
251
+ * return a single error should drop their reliability score more than a
252
+ * single success increases it.
253
+ *
254
+ * Play around with the below reliability function at https://www.desmos.com/calculator/d6hfhf5ukm
255
+ */
256
+ return this.#successes / (this.#attempts + (this.#errors * 3))
257
+ }
258
+
259
+ /**
260
+ * Increment the number of invalid blocks returned by this gateway.
261
+ */
262
+ incrementInvalidBlocks (): void {
263
+ this.#invalidBlocks++
264
+ }
265
+
266
+ getStats (): TrustlessGatewayStats {
267
+ return {
268
+ attempts: this.#attempts,
269
+ errors: this.#errors,
270
+ invalidBlocks: this.#invalidBlocks,
271
+ successes: this.#successes,
272
+ pendingResponses: this.#pendingResponses.size
273
+ }
274
+ }
275
+
276
+ toString (): string {
277
+ return `TrustlessGateway(${this.url})`
278
+ }
279
+ }
package/src/utils.ts ADDED
@@ -0,0 +1,141 @@
1
+ import { getNetConfig, isPrivate } from '@libp2p/utils'
2
+ import { DNS, HTTP, HTTPS } from '@multiformats/multiaddr-matcher'
3
+ import { multiaddrToUri } from '@multiformats/multiaddr-to-uri'
4
+ import { Uint8ArrayList } from 'uint8arraylist'
5
+ import { TrustlessGateway } from './trustless-gateway.ts'
6
+ import type { TrustlessGatewayGetBlockProgressEvents } from './index.ts'
7
+ import type { TransformRequestInit } from './trustless-gateway.ts'
8
+ import type { Routing } from '@helia/interface'
9
+ import type { Multiaddr } from '@multiformats/multiaddr'
10
+ import type { AbortOptions } from 'abort-error'
11
+ import type { ComponentLogger, Logger } from 'birnam'
12
+ import type { CID } from 'multiformats/cid'
13
+ import type { ProgressOptions } from 'progress-events'
14
+
15
+ export function filterNonHTTPMultiaddrs (multiaddrs: Multiaddr[], allowInsecure: boolean, allowLocal: boolean): Multiaddr[] {
16
+ return multiaddrs.filter(ma => {
17
+ const isHttps = HTTPS.exactMatch(ma)
18
+ const isHttp = HTTP.exactMatch(ma)
19
+
20
+ if (!isHttps && !isHttp) {
21
+ return false
22
+ }
23
+
24
+ if (isHttps || (allowInsecure && isHttp)) {
25
+ if (allowLocal) {
26
+ return true
27
+ }
28
+
29
+ if (DNS.matches(ma)) {
30
+ return true
31
+ }
32
+
33
+ return isPrivate(ma) === false
34
+ }
35
+
36
+ // When allowInsecure is false and allowLocal is true, allow multiaddrs with
37
+ // "127.0.0.1", "localhost", or any subdomain ending with ".localhost"
38
+ if (!allowInsecure && allowLocal) {
39
+ const { host } = getNetConfig(ma)
40
+
41
+ if (host === '127.0.0.1' || host === 'localhost' || host.endsWith('.localhost')) {
42
+ return true
43
+ }
44
+ }
45
+
46
+ return false
47
+ })
48
+ }
49
+
50
+ export interface FindHttpGatewayProvidersOptions extends AbortOptions, ProgressOptions<TrustlessGatewayGetBlockProgressEvents> {
51
+ transformRequestInit?: TransformRequestInit
52
+ }
53
+
54
+ export async function * findHttpGatewayProviders (cid: CID, routing: Routing, logger: ComponentLogger, allowInsecure: boolean, allowLocal: boolean, options: FindHttpGatewayProvidersOptions = {}): AsyncGenerator<TrustlessGateway> {
55
+ for await (const provider of routing.findProviders(cid, options)) {
56
+ // require http(s) addresses
57
+ const httpAddresses = filterNonHTTPMultiaddrs(provider.multiaddrs, allowInsecure, allowLocal)
58
+
59
+ if (httpAddresses.length === 0) {
60
+ continue
61
+ }
62
+
63
+ // take first address?
64
+ // /ip4/x.x.x.x/tcp/31337/http
65
+ // /ip4/x.x.x.x/tcp/31337/https
66
+ // etc
67
+ const uri = new URL(multiaddrToUri(httpAddresses[0]))
68
+
69
+ yield new TrustlessGateway(uri, {
70
+ logger,
71
+ transformRequestInit: options.transformRequestInit,
72
+ routing: provider.routing
73
+ })
74
+ }
75
+ }
76
+
77
+ interface LimitedResponseOptions {
78
+ signal?: AbortSignal
79
+ log?: Logger
80
+ }
81
+
82
+ /**
83
+ * A function that handles ensuring the content-length header and the response body is less than a given byte limit.
84
+ *
85
+ * If the response contains a content-length header greater than the limit or the actual bytes returned are greater than
86
+ * the limit, an error is thrown.
87
+ */
88
+ export async function limitedResponse (response: Response, byteLimit: number, options?: LimitedResponseOptions): Promise<Uint8Array> {
89
+ const { signal, log } = options ?? {}
90
+ const contentLength = response.headers.get('content-length')
91
+ if (contentLength != null) {
92
+ const contentLengthNumber = parseInt(contentLength, 10)
93
+ if (contentLengthNumber > byteLimit) {
94
+ log?.error('content-length header (%d) is greater than the limit (%d)', contentLengthNumber, byteLimit)
95
+ if (response.body != null) {
96
+ await response.body.cancel().catch(err => {
97
+ log?.error('error cancelling response body after content-length check - %e', err)
98
+ })
99
+ }
100
+ throw new Error(`Content-Length header (${contentLengthNumber}) is greater than the limit (${byteLimit}).`)
101
+ }
102
+ }
103
+
104
+ const reader = response.body?.getReader()
105
+ if (reader == null) {
106
+ // no body to consume if reader is null
107
+ throw new Error('Response body is not readable')
108
+ }
109
+
110
+ const chunkList = new Uint8ArrayList()
111
+
112
+ try {
113
+ while (true) {
114
+ if (signal?.aborted === true) {
115
+ throw new Error('Response body read was aborted.')
116
+ }
117
+
118
+ const { done, value } = await reader.read()
119
+ if (done) {
120
+ break
121
+ }
122
+
123
+ chunkList.append(value)
124
+
125
+ if (chunkList.byteLength > byteLimit) {
126
+ // No need to consume body here, as we were streaming and hit the limit
127
+ throw new Error(`Response body is greater than the limit (${byteLimit}), received ${chunkList.byteLength} bytes.`)
128
+ }
129
+ }
130
+ } finally {
131
+ reader.cancel()
132
+ .catch(err => {
133
+ log?.error('error cancelling reader - %e', err)
134
+ })
135
+ .finally(() => {
136
+ reader.releaseLock()
137
+ })
138
+ }
139
+
140
+ return chunkList.subarray()
141
+ }