@hatk/hatk 0.0.1-alpha.60 → 0.0.1-alpha.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAiBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAIjD,6CAA6C;AAC7C,UAAU,YAAY;IACpB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,8FAA8F;IAC9F,MAAM,EAAE,MAAM,CAAA;IACd,yEAAyE;IACzE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,wDAAwD;IACxD,MAAM,EAAE,cAAc,CAAA;CACvB;AAoGD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAkK/G;AA8BD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAkIrE"}
1
+ {"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAiBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAIjD,6CAA6C;AAC7C,UAAU,YAAY;IACpB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,8FAA8F;IAC9F,MAAM,EAAE,MAAM,CAAA;IACd,yEAAyE;IACzE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,wDAAwD;IACxD,MAAM,EAAE,cAAc,CAAA;CACvB;AA+FD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAkK/G;AA8BD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAkIrE"}
package/dist/backfill.js CHANGED
@@ -5,14 +5,14 @@ import { setRepoStatus, getRepoStatus, getRepoRev, getRepoRetryInfo, listRetryEl
5
5
  import { emit, timer } from "./logger.js";
6
6
  import { validateRecord } from '@bigmoves/lexicon';
7
7
  import { getLexiconArray } from "./database/schema.js";
8
- /** In-memory cache of DID → PDS resolution results to avoid redundant lookups. */
9
- const pdsCache = new Map();
10
8
  let plcUrl;
11
9
  /**
12
10
  * Resolves a DID to its PDS endpoint and handle by fetching the DID document.
13
11
  *
14
12
  * Supports both `did:web` (fetches `/.well-known/did.json`) and `did:plc`
15
- * (fetches from the PLC directory). Results are cached for the lifetime of the process.
13
+ * (fetches from the PLC directory). Always fetches fresh DID docs change
14
+ * (handle renames, PDS migrations) and a stale cache silently rewrites stale
15
+ * handles back into `_repos` on every backfill.
16
16
  *
17
17
  * @example
18
18
  * ```ts
@@ -22,9 +22,6 @@ let plcUrl;
22
22
  * ```
23
23
  */
24
24
  async function resolvePds(did) {
25
- const cached = pdsCache.get(did);
26
- if (cached)
27
- return cached;
28
25
  let didDoc;
29
26
  if (did.startsWith('did:web:')) {
30
27
  const domain = did.slice('did:web:'.length);
@@ -42,12 +39,10 @@ async function resolvePds(did) {
42
39
  const pds = didDoc.service?.find((s) => s.id === '#atproto_pds')?.serviceEndpoint;
43
40
  if (!pds)
44
41
  throw new Error(`No PDS endpoint in DID document for ${did}`);
45
- // Extract handle from alsoKnownAs (format: "at://handle")
42
+ // First at:// entry in alsoKnownAs is the canonical handle (per @atproto/identity convention)
46
43
  const aka = didDoc.alsoKnownAs?.find((u) => u.startsWith('at://'));
47
44
  const handle = aka ? aka.slice('at://'.length) : null;
48
- const result = { pds, handle };
49
- pdsCache.set(did, result);
50
- return result;
45
+ return { pds, handle };
51
46
  }
52
47
  /**
53
48
  * Paginates through all active repos on a relay/PDS using `com.atproto.sync.listRepos`.
@@ -1 +1 @@
1
- {"version":3,"file":"dev-entry.d.ts","sourceRoot":"","sources":["../src/dev-entry.ts"],"names":[],"mappings":"AA6GA,eAAO,MAAM,OAAO,yCAKlB,CAAA;AAEF,yEAAyE;AACzE,wBAAsB,YAAY,kBAEjC;AAED,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAA;AAC1C,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAA;AAC3C,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAA;AACpC,OAAO,EAAE,kBAAkB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAA"}
1
+ {"version":3,"file":"dev-entry.d.ts","sourceRoot":"","sources":["../src/dev-entry.ts"],"names":[],"mappings":"AA8GA,eAAO,MAAM,OAAO,yCAKlB,CAAA;AAEF,yEAAyE;AACzE,wBAAsB,YAAY,kBAEjC;AAED,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAA;AAC1C,OAAO,EAAE,WAAW,EAAE,MAAM,eAAe,CAAA;AAC3C,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAA;AACpC,OAAO,EAAE,kBAAkB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAA"}
package/dist/dev-entry.js CHANGED
@@ -73,6 +73,7 @@ const collectionSet = new Set(collections);
73
73
  const cursor = await getCursor('relay');
74
74
  startIndexer({
75
75
  relayUrl: config.relay,
76
+ plcUrl: config.plc,
76
77
  collections: collectionSet,
77
78
  signalCollections: config.backfill.signalCollections ? new Set(config.backfill.signalCollections) : undefined,
78
79
  pinnedRepos: config.backfill.repos ? new Set(config.backfill.repos) : undefined,
package/dist/indexer.d.ts CHANGED
@@ -12,6 +12,7 @@ export declare function triggerAutoBackfill(did: string, attempt?: number): Prom
12
12
  /** Configuration for the firehose indexer. */
13
13
  interface IndexerOpts {
14
14
  relayUrl: string;
15
+ plcUrl: string;
15
16
  collections: Set<string>;
16
17
  signalCollections?: Set<string>;
17
18
  pinnedRepos?: Set<string>;
@@ -1 +1 @@
1
- {"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../src/indexer.ts"],"names":[],"mappings":"AAmKA;;;;;;;GAOG;AACH,iEAAiE;AACjE,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGxD;AAED,wBAAsB,mBAAmB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,SAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CA4EjF;AAED,8CAA8C;AAC9C,UAAU,WAAW;IACnB,QAAQ,EAAE,MAAM,CAAA;IAChB,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IAC/B,WAAW,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACzB,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACtB,YAAY,EAAE,MAAM,CAAA;IACpB,UAAU,EAAE,MAAM,CAAA;IAClB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,kBAAkB,CAAC,EAAE,MAAM,CAAA;CAC5B;AAyBD;;;;;;;;;GASG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,SAAS,CAAC,CAmDxE"}
1
+ {"version":3,"file":"indexer.d.ts","sourceRoot":"","sources":["../src/indexer.ts"],"names":[],"mappings":"AAoKA;;;;;;;GAOG;AACH,iEAAiE;AACjE,wBAAgB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAGxD;AAED,wBAAsB,mBAAmB,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,SAAI,GAAG,OAAO,CAAC,IAAI,CAAC,CA4EjF;AAED,8CAA8C;AAC9C,UAAU,WAAW;IACnB,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IAC/B,WAAW,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACzB,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;IACtB,YAAY,EAAE,MAAM,CAAA;IACpB,UAAU,EAAE,MAAM,CAAA;IAClB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,kBAAkB,CAAC,EAAE,MAAM,CAAA;CAC5B;AAyBD;;;;;;;;;GASG;AACH,wBAAsB,YAAY,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,SAAS,CAAC,CAoDxE"}
package/dist/indexer.js CHANGED
@@ -29,6 +29,7 @@ let indexerSignalCollections;
29
29
  let indexerPinnedRepos = null;
30
30
  let indexerFetchTimeout;
31
31
  let indexerMaxRetries;
32
+ let indexerPlcUrl;
32
33
  let maxConcurrentBackfills = 3;
33
34
  /**
34
35
  * Flush the write buffer — insert all buffered records, update the relay cursor,
@@ -259,6 +260,7 @@ export async function startIndexer(opts) {
259
260
  indexerPinnedRepos = opts.pinnedRepos || null;
260
261
  indexerFetchTimeout = fetchTimeout;
261
262
  indexerMaxRetries = opts.maxRetries;
263
+ indexerPlcUrl = opts.plcUrl;
262
264
  maxConcurrentBackfills = opts.parallelism ?? 3;
263
265
  // Pre-populate repo status cache from DB so non-signal updates
264
266
  // (e.g. profile changes) are processed for already-tracked DIDs
@@ -287,8 +289,8 @@ export async function startIndexer(opts) {
287
289
  const bytes = new Uint8Array(event.data);
288
290
  processMessage(bytes, collections);
289
291
  }
290
- catch {
291
- // Skip unparseable firehose messages silently
292
+ catch (err) {
293
+ emit('indexer', 'decode_error', { error: err instanceof Error ? err.message : String(err) });
292
294
  }
293
295
  });
294
296
  ws.addEventListener('open', () => log('[indexer] Connected to relay'));
@@ -298,6 +300,60 @@ export async function startIndexer(opts) {
298
300
  });
299
301
  return ws;
300
302
  }
303
+ /**
304
+ * Handle a `#identity` firehose event for a DID. The `handle` field on the
305
+ * event is optional per the lexicon, and some emitters omit it (signalling
306
+ * "re-resolve"). When absent, we re-resolve from the PLC directory so handle
307
+ * renames propagate even when the relay payload is sparse.
308
+ *
309
+ * Only updates DIDs we already track (present in repoStatusCache) to avoid
310
+ * writing rows for the entire network.
311
+ */
312
+ async function handleIdentityEvent(did, payloadHandle) {
313
+ if (!repoStatusCache.has(did))
314
+ return;
315
+ let handle = payloadHandle;
316
+ const payloadHadHandle = handle !== undefined;
317
+ if (!handle) {
318
+ try {
319
+ // Bound the PLC fetch so a slow plc.directory can't pile up unbounded
320
+ // promises during an identity-event burst (fire-and-forget caller).
321
+ const res = await fetch(`${indexerPlcUrl}/${did}`, {
322
+ signal: AbortSignal.timeout(indexerFetchTimeout * 1000),
323
+ });
324
+ if (res.ok) {
325
+ const doc = (await res.json());
326
+ // First at:// entry is the canonical handle (per @atproto/identity convention)
327
+ const aka = doc.alsoKnownAs?.find((u) => u.startsWith('at://'));
328
+ handle = aka ? aka.slice('at://'.length) : undefined;
329
+ }
330
+ else {
331
+ emit('indexer', 'identity_resolve_error', { did, status: res.status });
332
+ }
333
+ }
334
+ catch (err) {
335
+ emit('indexer', 'identity_resolve_error', {
336
+ did,
337
+ error: err instanceof Error ? err.message : String(err),
338
+ });
339
+ }
340
+ }
341
+ if (!handle) {
342
+ emit('indexer', 'identity_no_handle', { did, payload_had_handle: payloadHadHandle });
343
+ return;
344
+ }
345
+ try {
346
+ await updateRepoHandle(did, handle);
347
+ emit('indexer', 'identity_handle_update', { did, handle, payload_had_handle: payloadHadHandle });
348
+ }
349
+ catch (err) {
350
+ emit('indexer', 'identity_update_error', {
351
+ did,
352
+ handle,
353
+ error: err instanceof Error ? err.message : String(err),
354
+ });
355
+ }
356
+ }
301
357
  /**
302
358
  * Process a single firehose message. Decodes the CBOR header/body, filters
303
359
  * for relevant collections, validates records against lexicons, and routes
@@ -306,13 +362,13 @@ export async function startIndexer(opts) {
306
362
  function processMessage(bytes, collections) {
307
363
  const header = cborDecode(bytes, 0);
308
364
  const body = cborDecode(bytes, header.offset);
309
- // Handle identity events (handle changes)
365
+ // Handle identity events (handle changes). Fire-and-forget — keeps
366
+ // processMessage synchronous so the WS event loop drains without backpressure.
310
367
  if (header.value.t === '#identity') {
311
- const did = body.value.did;
312
- const handle = body.value.handle;
313
- if (did && handle && repoStatusCache.has(did)) {
314
- updateRepoHandle(did, handle).catch(() => { });
315
- }
368
+ const did = typeof body.value.did === 'string' ? body.value.did : undefined;
369
+ const handle = typeof body.value.handle === 'string' ? body.value.handle : undefined;
370
+ if (did)
371
+ handleIdentityEvent(did, handle);
316
372
  return;
317
373
  }
318
374
  if (header.value.op !== 1 || header.value.t !== '#commit')
package/dist/main.js CHANGED
@@ -200,6 +200,7 @@ log(` Feeds: ${listFeeds()
200
200
  const cursor = await getCursor('relay');
201
201
  startIndexer({
202
202
  relayUrl: config.relay,
203
+ plcUrl: config.plc,
203
204
  collections: collectionSet,
204
205
  signalCollections: config.backfill.signalCollections ? new Set(config.backfill.signalCollections) : undefined,
205
206
  pinnedRepos: config.backfill.repos ? new Set(config.backfill.repos) : undefined,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hatk/hatk",
3
- "version": "0.0.1-alpha.60",
3
+ "version": "0.0.1-alpha.61",
4
4
  "license": "MIT",
5
5
  "bin": {
6
6
  "hatk": "dist/cli.js"