@helia/utils 0.1.0-ecf5394 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,287 @@
1
+ import { DEFAULT_SESSION_MIN_PROVIDERS, DEFAULT_SESSION_MAX_PROVIDERS } from '@helia/interface'
2
+ import { CodeError, TypedEventEmitter, setMaxListeners } from '@libp2p/interface'
3
+ import { Queue } from '@libp2p/utils/queue'
4
+ import { base64 } from 'multiformats/bases/base64'
5
+ import pDefer from 'p-defer'
6
+ import { BloomFilter } from './bloom-filter.js'
7
+ import type { BlockBroker, BlockRetrievalOptions, CreateSessionOptions } from '@helia/interface'
8
+ import type { AbortOptions, ComponentLogger, Logger } from '@libp2p/interface'
9
+ import type { CID } from 'multiformats/cid'
10
+ import type { DeferredPromise } from 'p-defer'
11
+ import type { ProgressEvent } from 'progress-events'
12
+
13
+ export interface AbstractSessionComponents {
14
+ logger: ComponentLogger
15
+ }
16
+
17
+ export interface AbstractCreateSessionOptions extends CreateSessionOptions {
18
+ name: string
19
+ }
20
+
21
+ export interface BlockstoreSessionEvents<Provider> {
22
+ provider: CustomEvent<Provider>
23
+ }
24
+
25
+ export abstract class AbstractSession<Provider, RetrieveBlockProgressEvents extends ProgressEvent> extends TypedEventEmitter<BlockstoreSessionEvents<Provider>> implements BlockBroker<RetrieveBlockProgressEvents> {
26
+ private intialPeerSearchComplete?: Promise<void>
27
+ private readonly requests: Map<string, Promise<Uint8Array>>
28
+ private readonly name: string
29
+ protected log: Logger
30
+ protected logger: ComponentLogger
31
+ private readonly minProviders: number
32
+ private readonly maxProviders: number
33
+ public readonly providers: Provider[]
34
+ private readonly evictionFilter: BloomFilter
35
+
36
+ constructor (components: AbstractSessionComponents, init: AbstractCreateSessionOptions) {
37
+ super()
38
+
39
+ setMaxListeners(Infinity, this)
40
+ this.name = init.name
41
+ this.logger = components.logger
42
+ this.log = components.logger.forComponent(this.name)
43
+ this.requests = new Map()
44
+ this.minProviders = init.minProviders ?? DEFAULT_SESSION_MIN_PROVIDERS
45
+ this.maxProviders = init.maxProviders ?? DEFAULT_SESSION_MAX_PROVIDERS
46
+ this.providers = []
47
+ this.evictionFilter = BloomFilter.create(this.maxProviders)
48
+ }
49
+
50
+ async retrieve (cid: CID, options: BlockRetrievalOptions<RetrieveBlockProgressEvents> = {}): Promise<Uint8Array> {
51
+ // see if we are already requesting this CID in this session
52
+ const cidStr = base64.encode(cid.multihash.bytes)
53
+ const existingJob = this.requests.get(cidStr)
54
+
55
+ if (existingJob != null) {
56
+ this.log('join existing request for %c', cid)
57
+ return existingJob
58
+ }
59
+
60
+ const deferred: DeferredPromise<Uint8Array> = pDefer()
61
+ this.requests.set(cidStr, deferred.promise)
62
+
63
+ if (this.providers.length === 0) {
64
+ let first = false
65
+
66
+ if (this.intialPeerSearchComplete == null) {
67
+ first = true
68
+ this.log = this.logger.forComponent(`${this.name}:${cid}`)
69
+ this.intialPeerSearchComplete = this.findProviders(cid, this.minProviders, options)
70
+ }
71
+
72
+ await this.intialPeerSearchComplete
73
+
74
+ if (first) {
75
+ this.log('found initial session peers for %c', cid)
76
+ }
77
+ }
78
+
79
+ let foundBlock = false
80
+
81
+ // this queue manages outgoing requests - as new peers are added to the
82
+ // session they will be added to the queue so we can request the current
83
+ // block from multiple peers as they are discovered
84
+ const queue = new Queue<Uint8Array, { provider: Provider, priority?: number }>({
85
+ concurrency: this.maxProviders
86
+ })
87
+ queue.addEventListener('error', () => {})
88
+ queue.addEventListener('failure', (evt) => {
89
+ this.log.error('error querying provider %o, evicting from session', evt.detail.job.options.provider, evt.detail.error)
90
+ this.evict(evt.detail.job.options.provider)
91
+ })
92
+ queue.addEventListener('success', (evt) => {
93
+ // peer has sent block, return it to the caller
94
+ foundBlock = true
95
+ deferred.resolve(evt.detail.result)
96
+ })
97
+ queue.addEventListener('idle', () => {
98
+ if (foundBlock || options.signal?.aborted === true) {
99
+ // we either found the block or the user gave up
100
+ return
101
+ }
102
+
103
+ // find more session peers and retry
104
+ Promise.resolve()
105
+ .then(async () => {
106
+ this.log('no session peers had block for for %c, finding new providers', cid)
107
+
108
+ // evict this.minProviders random providers to make room for more
109
+ for (let i = 0; i < this.minProviders; i++) {
110
+ if (this.providers.length === 0) {
111
+ break
112
+ }
113
+
114
+ const provider = this.providers[Math.floor(Math.random() * this.providers.length)]
115
+ this.evict(provider)
116
+ }
117
+
118
+ // find new providers for the CID
119
+ await this.findProviders(cid, this.minProviders, options)
120
+
121
+ // keep trying until the abort signal fires
122
+ this.log('found new providers re-retrieving %c', cid)
123
+ this.requests.delete(cidStr)
124
+ deferred.resolve(await this.retrieve(cid, options))
125
+ })
126
+ .catch(err => {
127
+ this.log.error('could not find new providers for %c', cid, err)
128
+ deferred.reject(err)
129
+ })
130
+ })
131
+
132
+ const peerAddedToSessionListener = (event: CustomEvent<Provider>): void => {
133
+ queue.add(async () => {
134
+ return this.queryProvider(cid, event.detail, options)
135
+ }, {
136
+ provider: event.detail
137
+ })
138
+ .catch(err => {
139
+ if (options.signal?.aborted === true) {
140
+ // skip logging error if signal was aborted because abort can happen
141
+ // on success (e.g. another session found the block)
142
+ return
143
+ }
144
+
145
+ this.log.error('error retrieving session block for %c', cid, err)
146
+ })
147
+ }
148
+
149
+ // add new session peers to query as they are discovered
150
+ this.addEventListener('provider', peerAddedToSessionListener)
151
+
152
+ // query each session peer directly
153
+ Promise.all([...this.providers].map(async (provider) => {
154
+ return queue.add(async () => {
155
+ return this.queryProvider(cid, provider, options)
156
+ }, {
157
+ provider
158
+ })
159
+ }))
160
+ .catch(err => {
161
+ if (options.signal?.aborted === true) {
162
+ // skip logging error if signal was aborted because abort can happen
163
+ // on success (e.g. another session found the block)
164
+ return
165
+ }
166
+
167
+ this.log.error('error retrieving session block for %c', cid, err)
168
+ })
169
+
170
+ try {
171
+ return await deferred.promise
172
+ } finally {
173
+ this.removeEventListener('provider', peerAddedToSessionListener)
174
+ queue.clear()
175
+ this.requests.delete(cidStr)
176
+ }
177
+ }
178
+
179
+ evict (provider: Provider): void {
180
+ this.evictionFilter.add(this.toEvictionKey(provider))
181
+ const index = this.providers.findIndex(prov => this.equals(prov, provider))
182
+
183
+ if (index === -1) {
184
+ return
185
+ }
186
+
187
+ this.providers.splice(index, 1)
188
+ }
189
+
190
+ isEvicted (provider: Provider): boolean {
191
+ return this.providers.some(prov => this.equals(prov, provider))
192
+ }
193
+
194
+ hasProvider (provider: Provider): boolean {
195
+ // dedupe existing gateways
196
+ if (this.providers.find(prov => this.equals(prov, provider)) != null) {
197
+ return true
198
+ }
199
+
200
+ // dedupe failed session peers
201
+ if (this.isEvicted(provider)) {
202
+ return true
203
+ }
204
+
205
+ return false
206
+ }
207
+
208
+ private async findProviders (cid: CID, count: number, options: AbortOptions): Promise<void> {
209
+ const deferred: DeferredPromise<void> = pDefer()
210
+ let found = 0
211
+
212
+ // run async to resolve the deferred promise when `count` providers are
213
+ // found but continue util this.providers reaches this.maxProviders
214
+ void Promise.resolve()
215
+ .then(async () => {
216
+ this.log('finding %d-%d new provider(s) for %c', count, this.maxProviders, cid)
217
+
218
+ for await (const provider of this.findNewProviders(cid, options)) {
219
+ if (found === this.maxProviders || options.signal?.aborted === true) {
220
+ break
221
+ }
222
+
223
+ if (this.hasProvider(provider)) {
224
+ continue
225
+ }
226
+
227
+ this.log('found %d/%d new providers', found, this.maxProviders)
228
+ this.providers.push(provider)
229
+
230
+ // let the new peer join current queries
231
+ this.safeDispatchEvent('provider', {
232
+ detail: provider
233
+ })
234
+
235
+ found++
236
+
237
+ if (found === count) {
238
+ this.log('session is ready')
239
+ deferred.resolve()
240
+ // continue finding peers until we reach this.maxProviders
241
+ }
242
+
243
+ if (this.providers.length === this.maxProviders) {
244
+ this.log('found max session peers', found)
245
+ break
246
+ }
247
+ }
248
+
249
+ this.log('found %d/%d new session peers', found, this.maxProviders)
250
+
251
+ if (found < count) {
252
+ throw new CodeError(`Found ${found} of ${count} ${this.name} providers for ${cid}`, 'ERR_INSUFFICIENT_PROVIDERS_FOUND')
253
+ }
254
+ })
255
+ .catch(err => {
256
+ this.log.error('error searching routing for potential session peers for %c', cid, err.errors ?? err)
257
+ deferred.reject(err)
258
+ })
259
+
260
+ return deferred.promise
261
+ }
262
+
263
+ /**
264
+ * This method should search for new providers and yield them.
265
+ */
266
+ abstract findNewProviders (cid: CID, options: AbortOptions): AsyncGenerator<Provider>
267
+
268
+ /**
269
+ * The subclass should contact the provider and request the block from it.
270
+ *
271
+ * If the provider cannot provide the block an error should be thrown.
272
+ *
273
+ * The provider will then be excluded from ongoing queries.
274
+ */
275
+ abstract queryProvider (cid: CID, provider: Provider, options: AbortOptions): Promise<Uint8Array>
276
+
277
+ /**
278
+ * Turn a provider into a concise Uint8Array representation for use in a Bloom
279
+ * filter
280
+ */
281
+ abstract toEvictionKey (provider: Provider): Uint8Array | string
282
+
283
+ /**
284
+ * Return `true` if we consider one provider to be the same as another
285
+ */
286
+ abstract equals (providerA: Provider, providerB: Provider): boolean
287
+ }
@@ -0,0 +1,141 @@
1
+ // ported from xxbloom - https://github.com/ceejbot/xxbloom/blob/master/LICENSE
2
+ import { randomBytes } from '@libp2p/crypto'
3
+ import mur from 'murmurhash3js-revisited'
4
+ import { Uint8ArrayList } from 'uint8arraylist'
5
+ import { alloc } from 'uint8arrays/alloc'
6
+ import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string'
7
+
8
+ const LN2_SQUARED = Math.LN2 * Math.LN2
9
+
10
+ export interface BloomFilterOptions {
11
+ seeds?: number[]
12
+ hashes?: number
13
+ bits?: number
14
+ }
15
+
16
+ export class BloomFilter {
17
+ /**
18
+ * Create a `BloomFilter` with the smallest `bits` and `hashes` value for the
19
+ * specified item count and error rate.
20
+ */
21
+ static create (itemcount: number, errorRate: number = 0.005): BloomFilter {
22
+ const opts = optimize(itemcount, errorRate)
23
+ return new BloomFilter(opts)
24
+ }
25
+
26
+ public readonly seeds: number[]
27
+ public readonly bits: number
28
+ public buffer: Uint8Array
29
+
30
+ constructor (options: BloomFilterOptions = {}) {
31
+ if (options.seeds != null) {
32
+ this.seeds = options.seeds
33
+ } else {
34
+ this.seeds = generateSeeds(options.hashes ?? 8)
35
+ }
36
+
37
+ this.bits = options.bits ?? 1024
38
+ this.buffer = alloc(Math.ceil(this.bits / 8))
39
+ }
40
+
41
+ /**
42
+ * Add an item to the filter
43
+ */
44
+ add (item: Uint8Array | string): void {
45
+ if (typeof item === 'string') {
46
+ item = uint8ArrayFromString(item)
47
+ }
48
+
49
+ for (let i = 0; i < this.seeds.length; i++) {
50
+ const hash = mur.x86.hash32(item, this.seeds[i])
51
+ const bit = hash % this.bits
52
+
53
+ this.setbit(bit)
54
+ }
55
+ }
56
+
57
+ /**
58
+ * Test if the filter has an item. If it returns false it definitely does not
59
+ * have the item. If it returns true, it probably has the item but there's
60
+ * an `errorRate` chance it doesn't.
61
+ */
62
+ has (item: Uint8Array | string): boolean {
63
+ if (typeof item === 'string') {
64
+ item = uint8ArrayFromString(item)
65
+ }
66
+
67
+ for (let i = 0; i < this.seeds.length; i++) {
68
+ const hash = mur.x86.hash32(item, this.seeds[i])
69
+ const bit = hash % this.bits
70
+
71
+ const isSet = this.getbit(bit)
72
+
73
+ if (!isSet) {
74
+ return false
75
+ }
76
+ }
77
+
78
+ return true
79
+ }
80
+
81
+ /**
82
+ * Reset the filter
83
+ */
84
+ clear (): void {
85
+ this.buffer.fill(0)
86
+ }
87
+
88
+ setbit (bit: number): void {
89
+ let pos = 0
90
+ let shift = bit
91
+ while (shift > 7) {
92
+ pos++
93
+ shift -= 8
94
+ }
95
+
96
+ let bitfield = this.buffer[pos]
97
+ bitfield |= (0x1 << shift)
98
+ this.buffer[pos] = bitfield
99
+ }
100
+
101
+ getbit (bit: number): boolean {
102
+ let pos = 0
103
+ let shift = bit
104
+ while (shift > 7) {
105
+ pos++
106
+ shift -= 8
107
+ }
108
+
109
+ const bitfield = this.buffer[pos]
110
+ return (bitfield & (0x1 << shift)) !== 0
111
+ }
112
+ }
113
+
114
+ function optimize (itemcount: number, errorRate: number = 0.005): { bits: number, hashes: number } {
115
+ const bits = Math.round(-1 * itemcount * Math.log(errorRate) / LN2_SQUARED)
116
+ const hashes = Math.round((bits / itemcount) * Math.LN2)
117
+
118
+ return { bits, hashes }
119
+ }
120
+
121
+ function generateSeeds (count: number): number[] {
122
+ let buf: Uint8ArrayList
123
+ let j: number
124
+ const seeds = []
125
+
126
+ for (let i = 0; i < count; i++) {
127
+ buf = new Uint8ArrayList(randomBytes(4))
128
+ seeds[i] = buf.getUint32(0, true)
129
+
130
+ // Make sure we don't end up with two identical seeds,
131
+ // which is unlikely but possible.
132
+ for (j = 0; j < i; j++) {
133
+ if (seeds[i] === seeds[j]) {
134
+ i--
135
+ break
136
+ }
137
+ }
138
+ }
139
+
140
+ return seeds
141
+ }
package/src/index.ts CHANGED
@@ -39,6 +39,9 @@ import type { Datastore } from 'interface-datastore'
39
39
  import type { CID } from 'multiformats/cid'
40
40
  import type { MultihashHasher } from 'multiformats/hashes/interface'
41
41
 
42
+ export { AbstractSession, type AbstractCreateSessionOptions } from './abstract-session.js'
43
+ export { BloomFilter } from './bloom-filter.js'
44
+
42
45
  /**
43
46
  * Options used to create a Helia node.
44
47
  */
@@ -101,6 +104,24 @@ export interface HeliaInit {
101
104
  */
102
105
  routers?: Array<Partial<Routing>>
103
106
 
107
+ /**
108
+ * During provider lookups, peers can be returned from routing implementations
109
+ * with no multiaddrs.
110
+ *
111
+ * This can happen when they've been retrieved from network peers that only
112
+ * store multiaddrs for a limited amount of time.
113
+ *
114
+ * When this happens the peer's info has to be looked up with a further query.
115
+ *
116
+ * To not have this query block the yielding of other providers returned with
117
+ * multiaddrs, a separate queue is used to perform this lookup.
118
+ *
119
+ * This config value controls the concurrency of that queue.
120
+ *
121
+ * @default 5
122
+ */
123
+ providerLookupConcurrency?: number
124
+
104
125
  /**
105
126
  * Components used by subclasses
106
127
  */
@@ -119,6 +140,7 @@ interface Components {
119
140
  dagWalkers: Record<number, DAGWalker>
120
141
  logger: ComponentLogger
121
142
  blockBrokers: BlockBroker[]
143
+ routing: Routing
122
144
  dns: DNS
123
145
  }
124
146
 
@@ -140,6 +162,7 @@ export class Helia implements HeliaInterface {
140
162
  this.dagWalkers = defaultDagWalkers(init.dagWalkers)
141
163
  this.dns = init.dns ?? dns()
142
164
 
165
+ // @ts-expect-error routing is not set
143
166
  const components: Components = {
144
167
  blockstore: init.blockstore,
145
168
  datastore: init.datastore,
@@ -151,19 +174,7 @@ export class Helia implements HeliaInterface {
151
174
  ...(init.components ?? {})
152
175
  }
153
176
 
154
- components.blockBrokers = init.blockBrokers.map((fn) => {
155
- return fn(components)
156
- })
157
-
158
- const networkedStorage = new NetworkedStorage(components)
159
-
160
- this.pins = new PinsImpl(init.datastore, networkedStorage, this.dagWalkers)
161
-
162
- this.blockstore = new BlockStorage(networkedStorage, this.pins, {
163
- holdGcLock: init.holdGcLock ?? true
164
- })
165
- this.datastore = init.datastore
166
- this.routing = new RoutingClass(components, {
177
+ this.routing = components.routing = new RoutingClass(components, {
167
178
  routers: (init.routers ?? []).flatMap((router: any) => {
168
179
  // if the router itself is a router
169
180
  const routers = [
@@ -181,7 +192,19 @@ export class Helia implements HeliaInterface {
181
192
  }
182
193
 
183
194
  return routers
184
- })
195
+ }),
196
+ providerLookupConcurrency: init.providerLookupConcurrency
197
+ })
198
+
199
+ const networkedStorage = new NetworkedStorage(components)
200
+ this.pins = new PinsImpl(init.datastore, networkedStorage, this.dagWalkers)
201
+ this.blockstore = new BlockStorage(networkedStorage, this.pins, {
202
+ holdGcLock: init.holdGcLock ?? true
203
+ })
204
+ this.datastore = init.datastore
205
+
206
+ components.blockBrokers = init.blockBrokers.map((fn) => {
207
+ return fn(components)
185
208
  })
186
209
  }
187
210
 
package/src/routing.ts CHANGED
@@ -1,12 +1,15 @@
1
1
  import { CodeError, start, stop } from '@libp2p/interface'
2
- import { PeerSet } from '@libp2p/peer-collections'
2
+ import { PeerQueue } from '@libp2p/utils/peer-queue'
3
3
  import merge from 'it-merge'
4
4
  import type { Routing as RoutingInterface, Provider, RoutingOptions } from '@helia/interface'
5
5
  import type { AbortOptions, ComponentLogger, Logger, PeerId, PeerInfo, Startable } from '@libp2p/interface'
6
6
  import type { CID } from 'multiformats/cid'
7
7
 
8
+ const DEFAULT_PROVIDER_LOOKUP_CONCURRENCY = 5
9
+
8
10
  export interface RoutingInit {
9
11
  routers: Array<Partial<RoutingInterface>>
12
+ providerLookupConcurrency?: number
10
13
  }
11
14
 
12
15
  export interface RoutingComponents {
@@ -16,10 +19,12 @@ export interface RoutingComponents {
16
19
  export class Routing implements RoutingInterface, Startable {
17
20
  private readonly log: Logger
18
21
  private readonly routers: Array<Partial<RoutingInterface>>
22
+ private readonly providerLookupConcurrency: number
19
23
 
20
24
  constructor (components: RoutingComponents, init: RoutingInit) {
21
25
  this.log = components.logger.forComponent('helia:routing')
22
26
  this.routers = init.routers ?? []
27
+ this.providerLookupConcurrency = init.providerLookupConcurrency ?? DEFAULT_PROVIDER_LOOKUP_CONCURRENCY
23
28
  }
24
29
 
25
30
  async start (): Promise<void> {
@@ -31,16 +36,25 @@ export class Routing implements RoutingInterface, Startable {
31
36
  }
32
37
 
33
38
  /**
34
- * Iterates over all content routers in parallel to find providers of the given key
39
+ * Iterates over all content routers in parallel to find providers of the
40
+ * given key
35
41
  */
36
42
  async * findProviders (key: CID, options: RoutingOptions = {}): AsyncIterable<Provider> {
37
43
  if (this.routers.length === 0) {
38
44
  throw new CodeError('No content routers available', 'ERR_NO_ROUTERS_AVAILABLE')
39
45
  }
40
46
 
41
- const seen = new PeerSet()
47
+ // provider multiaddrs are only cached for a limited time, so they can come
48
+ // back as an empty array - when this happens we have to do a FIND_PEER
49
+ // query to get updated addresses, but we shouldn't block on this so use a
50
+ // separate bounded queue to perform this lookup
51
+ const queue = new PeerQueue<Provider | null>({
52
+ concurrency: this.providerLookupConcurrency
53
+ })
54
+ queue.addEventListener('error', () => {})
42
55
 
43
56
  for await (const peer of merge(
57
+ queue.toGenerator(),
44
58
  ...supports(this.routers, 'findProviders')
45
59
  .map(router => router.findProviders(key, options))
46
60
  )) {
@@ -50,12 +64,42 @@ export class Routing implements RoutingInterface, Startable {
50
64
  continue
51
65
  }
52
66
 
53
- // deduplicate peers
54
- if (seen.has(peer.id)) {
55
- continue
56
- }
67
+ peer.multiaddrs = peer.multiaddrs.map(ma => {
68
+ if (ma.getPeerId() != null) {
69
+ return ma
70
+ }
57
71
 
58
- seen.add(peer.id)
72
+ return ma.encapsulate(`/p2p/${peer.id}`)
73
+ })
74
+
75
+ // have to refresh peer info for this peer to get updated multiaddrs
76
+ if (peer.multiaddrs.length === 0) {
77
+ // already looking this peer up
78
+ if (queue.find(peer.id) != null) {
79
+ continue
80
+ }
81
+
82
+ queue.add(async () => {
83
+ try {
84
+ const provider = await this.findPeer(peer.id, options)
85
+
86
+ if (provider.multiaddrs.length === 0) {
87
+ return null
88
+ }
89
+
90
+ return provider
91
+ } catch (err) {
92
+ this.log.error('could not load multiaddrs for peer', peer.id, err)
93
+ return null
94
+ }
95
+ }, {
96
+ peerId: peer.id,
97
+ signal: options.signal
98
+ })
99
+ .catch(err => {
100
+ this.log.error('could not load multiaddrs for peer', peer.id, err)
101
+ })
102
+ }
59
103
 
60
104
  yield peer
61
105
  }
@@ -142,8 +186,6 @@ export class Routing implements RoutingInterface, Startable {
142
186
  throw new CodeError('No peer routers available', 'ERR_NO_ROUTERS_AVAILABLE')
143
187
  }
144
188
 
145
- const seen = new PeerSet()
146
-
147
189
  for await (const peer of merge(
148
190
  ...supports(this.routers, 'getClosestPeers')
149
191
  .map(router => router.getClosestPeers(key, options))
@@ -152,13 +194,6 @@ export class Routing implements RoutingInterface, Startable {
152
194
  continue
153
195
  }
154
196
 
155
- // deduplicate peers
156
- if (seen.has(peer.id)) {
157
- continue
158
- }
159
-
160
- seen.add(peer.id)
161
-
162
197
  yield peer
163
198
  }
164
199
  }