@hatk/hatk 0.0.1-alpha.5 → 0.0.1-alpha.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,70 @@
1
1
  import type { BackfillConfig } from './config.ts';
2
+ /** Options passed to {@link runBackfill}. */
2
3
  interface BackfillOpts {
4
+ /** Base URL of the relay or PDS to enumerate repos from (e.g. `wss://bsky.network`). */
3
5
  pdsUrl: string;
6
+ /** PLC directory URL used to resolve `did:plc` identifiers (e.g. `https://plc.directory`). */
4
7
  plcUrl: string;
8
+ /** AT Protocol collection NSIDs to index (e.g. `app.bsky.feed.post`). */
5
9
  collections: Set<string>;
10
+ /** Backfill behavior settings from `config.yaml`. */
6
11
  config: BackfillConfig;
7
12
  }
13
+ /**
14
+ * Downloads and indexes a single user's repo via `com.atproto.sync.getRepo`.
15
+ *
16
+ * The full flow:
17
+ * 1. Resolve the DID to find the user's PDS endpoint
18
+ * 2. Fetch the repo as a CAR file from the PDS
19
+ * 3. Parse the CAR, decode the commit, and walk the MST (Merkle Search Tree)
20
+ * 4. Delete any existing records for this DID (so deletions are reflected)
21
+ * 5. Bulk-insert all records matching the target collections
22
+ *
23
+ * On failure, applies exponential backoff retry logic. HTTP 4xx errors are
24
+ * treated as permanent failures (repo doesn't exist or is deactivated) and
25
+ * are not retried.
26
+ *
27
+ * @param did - The DID of the repo to backfill (e.g. `did:plc:abc123`)
28
+ * @param collections - Collection NSIDs to index; records in other collections are skipped
29
+ * @param fetchTimeout - Maximum seconds to wait for the CAR download before aborting
30
+ * @returns The number of records successfully indexed
31
+ *
32
+ * @example
33
+ * ```ts
34
+ * const count = await backfillRepo('did:plc:abc123', new Set(['app.bsky.feed.post']), 30)
35
+ * console.log(`Indexed ${count} records`)
36
+ * ```
37
+ */
8
38
  export declare function backfillRepo(did: string, collections: Set<string>, fetchTimeout: number): Promise<number>;
39
+ /**
40
+ * Orchestrates a full backfill run: enumerate repos, filter to pending, download, and index.
41
+ *
42
+ * Operates in one of three modes based on config:
43
+ * - **Pinned repos** — backfill only the DIDs listed in `config.repos`
44
+ * - **Full network** — enumerate every active repo on the relay via `listRepos`
45
+ * - **Collection signal** (default) — use `listReposByCollection` to discover repos that
46
+ * contain records in the configured signal collections, falling back to `listRepos`
47
+ * if the relay doesn't support collection-scoped enumeration
48
+ *
49
+ * After the initial pass, failed repos are retried with exponential backoff
50
+ * (up to `config.maxRetries` attempts). The run emits structured log events for
51
+ * monitoring via the `backfill.run` and `backfill.retry_round` event types.
52
+ *
53
+ * @example
54
+ * ```ts
55
+ * await runBackfill({
56
+ * pdsUrl: 'wss://bsky.network',
57
+ * plcUrl: 'https://plc.directory',
58
+ * collections: new Set(['xyz.statusphere.status']),
59
+ * config: {
60
+ * fullNetwork: false,
61
+ * parallelism: 10,
62
+ * fetchTimeout: 30,
63
+ * maxRetries: 5,
64
+ * },
65
+ * })
66
+ * ```
67
+ */
9
68
  export declare function runBackfill(opts: BackfillOpts): Promise<void>;
10
69
  export {};
11
70
  //# sourceMappingURL=backfill.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjD,UAAU,YAAY;IACpB,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,MAAM,EAAE,cAAc,CAAA;CACvB;AA+ED,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAiH/G;AAwBD,wBAAsB,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAiInE"}
1
+ {"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjD,6CAA6C;AAC7C,UAAU,YAAY;IACpB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,8FAA8F;IAC9F,MAAM,EAAE,MAAM,CAAA;IACd,yEAAyE;IACzE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,qDAAqD;IACrD,MAAM,EAAE,cAAc,CAAA;CACvB;AAuGD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAmH/G;AAgCD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAiInE"}
package/dist/backfill.js CHANGED
@@ -3,8 +3,22 @@ import { cborDecode } from "./cbor.js";
3
3
  import { walkMst } from "./mst.js";
4
4
  import { setRepoStatus, getRepoStatus, getRepoRetryInfo, listRetryEligibleRepos, listPendingRepos, querySQL, runSQL, getSchema, bulkInsertRecords, } from "./db.js";
5
5
  import { emit, timer } from "./logger.js";
6
+ /** In-memory cache of DID → PDS resolution results to avoid redundant lookups. */
6
7
  const pdsCache = new Map();
7
8
  let plcUrl;
9
+ /**
10
+ * Resolves a DID to its PDS endpoint and handle by fetching the DID document.
11
+ *
12
+ * Supports both `did:web` (fetches `/.well-known/did.json`) and `did:plc`
13
+ * (fetches from the PLC directory). Results are cached for the lifetime of the process.
14
+ *
15
+ * @example
16
+ * ```ts
17
+ * const { pds, handle } = await resolvePds('did:plc:abc123')
18
+ * // pds = "https://puffball.us-east.host.bsky.network"
19
+ * // handle = "alice.bsky.social"
20
+ * ```
21
+ */
8
22
  async function resolvePds(did) {
9
23
  const cached = pdsCache.get(did);
10
24
  if (cached)
@@ -33,7 +47,10 @@ async function resolvePds(did) {
33
47
  pdsCache.set(did, result);
34
48
  return result;
35
49
  }
36
- // --- Repo Enumeration ---
50
+ /**
51
+ * Paginates through all active repos on a relay/PDS using `com.atproto.sync.listRepos`.
52
+ * Yields `{ did, rev }` for each active repo. Skips deactivated repos.
53
+ */
37
54
  async function* listRepos(pdsUrl) {
38
55
  let cursor;
39
56
  while (true) {
@@ -53,6 +70,13 @@ async function* listRepos(pdsUrl) {
53
70
  cursor = data.cursor;
54
71
  }
55
72
  }
73
+ /**
74
+ * Paginates through repos that contain records in a specific collection using
75
+ * `com.atproto.sync.listReposByCollection`. More efficient than {@link listRepos}
76
+ * when only a few collections are needed, since the relay can filter server-side.
77
+ *
78
+ * Not all relays support this endpoint — callers should fall back to {@link listRepos}.
79
+ */
56
80
  async function* listReposByCollection(pdsUrl, collection) {
57
81
  let cursor;
58
82
  while (true) {
@@ -71,7 +95,31 @@ async function* listReposByCollection(pdsUrl, collection) {
71
95
  cursor = data.cursor;
72
96
  }
73
97
  }
74
- // --- Single Repo Backfill ---
98
+ /**
99
+ * Downloads and indexes a single user's repo via `com.atproto.sync.getRepo`.
100
+ *
101
+ * The full flow:
102
+ * 1. Resolve the DID to find the user's PDS endpoint
103
+ * 2. Fetch the repo as a CAR file from the PDS
104
+ * 3. Parse the CAR, decode the commit, and walk the MST (Merkle Search Tree)
105
+ * 4. Delete any existing records for this DID (so deletions are reflected)
106
+ * 5. Bulk-insert all records matching the target collections
107
+ *
108
+ * On failure, applies exponential backoff retry logic. HTTP 4xx errors are
109
+ * treated as permanent failures (repo doesn't exist or is deactivated) and
110
+ * are not retried.
111
+ *
112
+ * @param did - The DID of the repo to backfill (e.g. `did:plc:abc123`)
113
+ * @param collections - Collection NSIDs to index; records in other collections are skipped
114
+ * @param fetchTimeout - Maximum seconds to wait for the CAR download before aborting
115
+ * @returns The number of records successfully indexed
116
+ *
117
+ * @example
118
+ * ```ts
119
+ * const count = await backfillRepo('did:plc:abc123', new Set(['app.bsky.feed.post']), 30)
120
+ * console.log(`Indexed ${count} records`)
121
+ * ```
122
+ */
75
123
  export async function backfillRepo(did, collections, fetchTimeout) {
76
124
  const elapsed = timer();
77
125
  let count = 0;
@@ -97,9 +145,10 @@ export async function backfillRepo(did, collections, fetchTimeout) {
97
145
  httpErr.httpStatus = res.status;
98
146
  throw httpErr;
99
147
  }
100
- const carBytes = new Uint8Array(await res.arrayBuffer());
148
+ let carBytes = new Uint8Array(await res.arrayBuffer());
101
149
  carSizeBytes = carBytes.length;
102
- const { roots, blocks } = parseCarFrame(carBytes);
150
+ let { roots, blocks } = parseCarFrame(carBytes);
151
+ carBytes = null; // free CAR bytes before bulk insert
103
152
  // Decode commit to get MST root
104
153
  const rootData = blocks.get(roots[0]);
105
154
  if (!rootData)
@@ -132,6 +181,7 @@ export async function backfillRepo(did, collections, fetchTimeout) {
132
181
  });
133
182
  }
134
183
  }
184
+ blocks = null; // free block map before bulk insert
135
185
  // Delete existing records for this DID before re-importing so deletions are reflected
136
186
  for (const col of collections) {
137
187
  const schema = getSchema(col);
@@ -185,7 +235,16 @@ export async function backfillRepo(did, collections, fetchTimeout) {
185
235
  });
186
236
  }
187
237
  }
188
- // --- Worker Pool ---
238
+ /**
239
+ * Processes items concurrently with a fixed number of workers.
240
+ * Workers pull from a shared index so the pool stays saturated even when
241
+ * individual items complete at different speeds. Errors from `fn` are
242
+ * swallowed (they're expected to be captured via structured logging).
243
+ *
244
+ * @param items - The work items to process
245
+ * @param parallelism - Maximum number of concurrent workers
246
+ * @param fn - Async function to run for each item
247
+ */
189
248
  async function runWorkerPool(items, parallelism, fn) {
190
249
  let index = 0;
191
250
  async function worker() {
@@ -202,7 +261,35 @@ async function runWorkerPool(items, parallelism, fn) {
202
261
  const workers = Array.from({ length: Math.min(parallelism, items.length) }, () => worker());
203
262
  await Promise.all(workers);
204
263
  }
205
- // --- Main Backfill Entry Point ---
264
+ /**
265
+ * Orchestrates a full backfill run: enumerate repos, filter to pending, download, and index.
266
+ *
267
+ * Operates in one of three modes based on config:
268
+ * - **Pinned repos** — backfill only the DIDs listed in `config.repos`
269
+ * - **Full network** — enumerate every active repo on the relay via `listRepos`
270
+ * - **Collection signal** (default) — use `listReposByCollection` to discover repos that
271
+ * contain records in the configured signal collections, falling back to `listRepos`
272
+ * if the relay doesn't support collection-scoped enumeration
273
+ *
274
+ * After the initial pass, failed repos are retried with exponential backoff
275
+ * (up to `config.maxRetries` attempts). The run emits structured log events for
276
+ * monitoring via the `backfill.run` and `backfill.retry_round` event types.
277
+ *
278
+ * @example
279
+ * ```ts
280
+ * await runBackfill({
281
+ * pdsUrl: 'wss://bsky.network',
282
+ * plcUrl: 'https://plc.directory',
283
+ * collections: new Set(['xyz.statusphere.status']),
284
+ * config: {
285
+ * fullNetwork: false,
286
+ * parallelism: 10,
287
+ * fetchTimeout: 30,
288
+ * maxRetries: 5,
289
+ * },
290
+ * })
291
+ * ```
292
+ */
206
293
  export async function runBackfill(opts) {
207
294
  const { pdsUrl, collections, config } = opts;
208
295
  plcUrl = opts.plcUrl;
package/dist/car.d.ts CHANGED
@@ -1,3 +1,29 @@
1
+ /**
2
+ * CAR (Content Addressable aRchive) parser.
3
+ *
4
+ * CAR files bundle content-addressed blocks into a single binary container.
5
+ * They're used by the AT Protocol firehose (`com.atproto.sync.getRepo`) to
6
+ * deliver entire repos and by commit events to deliver individual changes.
7
+ *
8
+ * Format: `varint(headerLen) | CBOR(header) | block*`
9
+ * Each block: `varint(blockLen) | CID | data`
10
+ *
11
+ * @see https://ipld.io/specs/transport/car/carv1/
12
+ * @module
13
+ */
14
+ /**
15
+ * Parses a CARv1 binary frame into its root CIDs and block map.
16
+ *
17
+ * @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
18
+ * @returns `roots` — ordered list of root CID strings; `blocks` — map of CID string → raw block data
19
+ *
20
+ * @example
21
+ * ```ts
22
+ * const car = new Uint8Array(await res.arrayBuffer())
23
+ * const { roots, blocks } = parseCarFrame(car)
24
+ * const commitData = blocks.get(roots[0])
25
+ * ```
26
+ */
1
27
  export declare function parseCarFrame(carBytes: Uint8Array): {
2
28
  roots: string[];
3
29
  blocks: Map<string, Uint8Array>;
package/dist/car.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAgCA,wBAAgB,aAAa,CAAC,QAAQ,EAAE,UAAU,GAAG;IACnD,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,CAAA;CAChC,CAmCA"}
1
+ {"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAuCH;;;;;;;;;;;;GAYG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,UAAU,GAAG;IACnD,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,CAAA;CAChC,CAmCA"}
package/dist/car.js CHANGED
@@ -1,7 +1,26 @@
1
- // CAR (Content Addressable aRchive) parser from scratch
2
- // CAR files bundle content-addressed blocks — used in firehose events
1
+ /**
2
+ * CAR (Content Addressable aRchive) parser.
3
+ *
4
+ * CAR files bundle content-addressed blocks into a single binary container.
5
+ * They're used by the AT Protocol firehose (`com.atproto.sync.getRepo`) to
6
+ * deliver entire repos and by commit events to deliver individual changes.
7
+ *
8
+ * Format: `varint(headerLen) | CBOR(header) | block*`
9
+ * Each block: `varint(blockLen) | CID | data`
10
+ *
11
+ * @see https://ipld.io/specs/transport/car/carv1/
12
+ * @module
13
+ */
3
14
  import { cborDecode } from "./cbor.js";
4
15
  import { cidToString, readVarint } from "./cid.js";
16
+ /**
17
+ * Parses a CID (Content Identifier) from raw bytes at the given offset.
18
+ *
19
+ * Handles both CIDv0 (bare SHA-256 multihash, starts with `0x12`) and
20
+ * CIDv1 (version + codec + multihash with varint-encoded lengths).
21
+ *
22
+ * @returns A tuple of `[cidBytes, nextOffset]`
23
+ */
5
24
  function parseCidFromBytes(bytes, offset) {
6
25
  const firstByte = bytes[offset];
7
26
  if (firstByte === 0x12) {
@@ -22,6 +41,19 @@ function parseCidFromBytes(bytes, offset) {
22
41
  pos = afterDigestLen + digestLen;
23
42
  return [bytes.slice(offset, pos), pos];
24
43
  }
44
+ /**
45
+ * Parses a CARv1 binary frame into its root CIDs and block map.
46
+ *
47
+ * @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
48
+ * @returns `roots` — ordered list of root CID strings; `blocks` — map of CID string → raw block data
49
+ *
50
+ * @example
51
+ * ```ts
52
+ * const car = new Uint8Array(await res.arrayBuffer())
53
+ * const { roots, blocks } = parseCarFrame(car)
54
+ * const commitData = blocks.get(roots[0])
55
+ * ```
56
+ */
25
57
  export function parseCarFrame(carBytes) {
26
58
  let offset = 0;
27
59
  // Read header length (varint-prefixed CBOR)
package/dist/cbor.d.ts CHANGED
@@ -1,7 +1,44 @@
1
+ /**
2
+ * Minimal CBOR (RFC 8949) decoder with DAG-CBOR CID support.
3
+ *
4
+ * Returns `{ value, offset }` so callers can decode concatenated CBOR values —
5
+ * the AT Protocol firehose sends frames as two back-to-back CBOR items
6
+ * (header + body).
7
+ *
8
+ * DAG-CBOR tag 42 (CID links) are decoded as `{ $link: "bafy..." }` objects,
9
+ * matching the convention used by the AT Protocol.
10
+ *
11
+ * @see https://www.rfc-editor.org/rfc/rfc8949 — CBOR spec
12
+ * @see https://ipld.io/specs/codecs/dag-cbor/spec/ — DAG-CBOR spec
13
+ * @module
14
+ */
1
15
  interface DecodeResult {
16
+ /** The decoded JavaScript value. */
2
17
  value: any;
18
+ /** Byte offset immediately after the decoded value — use as `startOffset` to decode the next item. */
3
19
  offset: number;
4
20
  }
21
+ /**
22
+ * Decodes a single CBOR value from a byte array.
23
+ *
24
+ * Supports all major types: unsigned/negative integers, byte/text strings,
25
+ * arrays, maps, tags (with special handling for CID tag 42), and simple
26
+ * values (true, false, null).
27
+ *
28
+ * @param bytes - Raw CBOR bytes
29
+ * @param startOffset - Byte position to start decoding from (default `0`)
30
+ * @returns The decoded value and the offset of the next byte after it
31
+ *
32
+ * @example
33
+ * ```ts
34
+ * // Decode a single value
35
+ * const { value } = cborDecode(bytes)
36
+ *
37
+ * // Decode two concatenated values (firehose frame)
38
+ * const { value: header, offset } = cborDecode(frameBytes)
39
+ * const { value: body } = cborDecode(frameBytes, offset)
40
+ * ```
41
+ */
5
42
  export declare function cborDecode(bytes: Uint8Array, startOffset?: number): DecodeResult;
6
43
  export {};
7
44
  //# sourceMappingURL=cbor.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"cbor.d.ts","sourceRoot":"","sources":["../src/cbor.ts"],"names":[],"mappings":"AAQA,UAAU,YAAY;IACpB,KAAK,EAAE,GAAG,CAAA;IACV,MAAM,EAAE,MAAM,CAAA;CACf;AAED,wBAAgB,UAAU,CAAC,KAAK,EAAE,UAAU,EAAE,WAAW,SAAI,GAAG,YAAY,CAgF3E"}
1
+ {"version":3,"file":"cbor.d.ts","sourceRoot":"","sources":["../src/cbor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAOH,UAAU,YAAY;IACpB,oCAAoC;IACpC,KAAK,EAAE,GAAG,CAAA;IACV,sGAAsG;IACtG,MAAM,EAAE,MAAM,CAAA;CACf;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,UAAU,EAAE,WAAW,SAAI,GAAG,YAAY,CAgF3E"}
package/dist/cbor.js CHANGED
@@ -1,8 +1,41 @@
1
- // CBOR decoder from scratch (RFC 8949)
2
- // Returns { value, offset } so we can split firehose frames
3
- // (two concatenated CBOR values: header + body)
1
+ /**
2
+ * Minimal CBOR (RFC 8949) decoder with DAG-CBOR CID support.
3
+ *
4
+ * Returns `{ value, offset }` so callers can decode concatenated CBOR values —
5
+ * the AT Protocol firehose sends frames as two back-to-back CBOR items
6
+ * (header + body).
7
+ *
8
+ * DAG-CBOR tag 42 (CID links) are decoded as `{ $link: "bafy..." }` objects,
9
+ * matching the convention used by the AT Protocol.
10
+ *
11
+ * @see https://www.rfc-editor.org/rfc/rfc8949 — CBOR spec
12
+ * @see https://ipld.io/specs/codecs/dag-cbor/spec/ — DAG-CBOR spec
13
+ * @module
14
+ */
4
15
  import { cidToString } from "./cid.js";
16
+ /** CBOR tag number for DAG-CBOR CID links. */
5
17
  const CBOR_TAG_CID = 42;
18
+ /**
19
+ * Decodes a single CBOR value from a byte array.
20
+ *
21
+ * Supports all major types: unsigned/negative integers, byte/text strings,
22
+ * arrays, maps, tags (with special handling for CID tag 42), and simple
23
+ * values (true, false, null).
24
+ *
25
+ * @param bytes - Raw CBOR bytes
26
+ * @param startOffset - Byte position to start decoding from (default `0`)
27
+ * @returns The decoded value and the offset of the next byte after it
28
+ *
29
+ * @example
30
+ * ```ts
31
+ * // Decode a single value
32
+ * const { value } = cborDecode(bytes)
33
+ *
34
+ * // Decode two concatenated values (firehose frame)
35
+ * const { value: header, offset } = cborDecode(frameBytes)
36
+ * const { value: body } = cborDecode(frameBytes, offset)
37
+ * ```
38
+ */
6
39
  export function cborDecode(bytes, startOffset = 0) {
7
40
  let offset = startOffset;
8
41
  function read() {
package/dist/cid.d.ts CHANGED
@@ -1,4 +1,41 @@
1
+ /**
2
+ * CID (Content Identifier), base32, and varint primitives.
3
+ *
4
+ * CIDs are self-describing content hashes used throughout the AT Protocol
5
+ * to reference blocks in repos and CAR files. This module provides the
6
+ * low-level encoding needed to convert raw CID bytes into their string
7
+ * representation (base32lower with `b` multibase prefix).
8
+ *
9
+ * @see https://github.com/multiformats/cid
10
+ * @module
11
+ */
12
+ /**
13
+ * Encodes raw bytes as a base32 lowercase string (RFC 4648, no padding).
14
+ *
15
+ * @example
16
+ * ```ts
17
+ * base32Encode(new Uint8Array([0x01, 0x71])) // "afyq"
18
+ * ```
19
+ */
1
20
  export declare function base32Encode(bytes: Uint8Array): string;
21
+ /**
22
+ * Converts raw CID bytes to their multibase-encoded string form (`b` prefix + base32lower).
23
+ *
24
+ * @example
25
+ * ```ts
26
+ * cidToString(cidBytes) // "bafyreig..."
27
+ * ```
28
+ */
2
29
  export declare function cidToString(cidBytes: Uint8Array): string;
30
+ /**
31
+ * Reads an unsigned LEB128 varint from a byte array.
32
+ *
33
+ * Varints are used extensively in CID encoding and CAR framing to represent
34
+ * variable-length integers in a compact form.
35
+ *
36
+ * @param bytes - Source byte array
37
+ * @param offset - Position to start reading from
38
+ * @returns A tuple of `[value, nextOffset]`
39
+ */
3
40
  export declare function readVarint(bytes: Uint8Array, offset: number): [number, number];
4
41
  //# sourceMappingURL=cid.d.ts.map
package/dist/cid.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"cid.d.ts","sourceRoot":"","sources":["../src/cid.ts"],"names":[],"mappings":"AAKA,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,GAAG,MAAM,CAmBtD;AAED,wBAAgB,WAAW,CAAC,QAAQ,EAAE,UAAU,GAAG,MAAM,CAGxD;AAED,wBAAgB,UAAU,CAAC,KAAK,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAc9E"}
1
+ {"version":3,"file":"cid.d.ts","sourceRoot":"","sources":["../src/cid.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAKH;;;;;;;GAOG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,GAAG,MAAM,CAmBtD;AAED;;;;;;;GAOG;AACH,wBAAgB,WAAW,CAAC,QAAQ,EAAE,UAAU,GAAG,MAAM,CAExD;AAED;;;;;;;;;GASG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAc9E"}
package/dist/cid.js CHANGED
@@ -1,6 +1,24 @@
1
- // CID (Content Identifier) + base32 + varint — from scratch
2
- // CIDs are self-describing content hashes used throughout AT Protocol
1
+ /**
2
+ * CID (Content Identifier), base32, and varint primitives.
3
+ *
4
+ * CIDs are self-describing content hashes used throughout the AT Protocol
5
+ * to reference blocks in repos and CAR files. This module provides the
6
+ * low-level encoding needed to convert raw CID bytes into their string
7
+ * representation (base32lower with `b` multibase prefix).
8
+ *
9
+ * @see https://github.com/multiformats/cid
10
+ * @module
11
+ */
12
+ /** RFC 4648 base32 lowercase alphabet (no padding). */
3
13
  const BASE32_ALPHABET = 'abcdefghijklmnopqrstuvwxyz234567';
14
+ /**
15
+ * Encodes raw bytes as a base32 lowercase string (RFC 4648, no padding).
16
+ *
17
+ * @example
18
+ * ```ts
19
+ * base32Encode(new Uint8Array([0x01, 0x71])) // "afyq"
20
+ * ```
21
+ */
4
22
  export function base32Encode(bytes) {
5
23
  let result = '';
6
24
  let bits = 0;
@@ -18,10 +36,27 @@ export function base32Encode(bytes) {
18
36
  }
19
37
  return result;
20
38
  }
39
+ /**
40
+ * Converts raw CID bytes to their multibase-encoded string form (`b` prefix + base32lower).
41
+ *
42
+ * @example
43
+ * ```ts
44
+ * cidToString(cidBytes) // "bafyreig..."
45
+ * ```
46
+ */
21
47
  export function cidToString(cidBytes) {
22
- // base32lower with 'b' multibase prefix
23
48
  return `b${base32Encode(cidBytes)}`;
24
49
  }
50
+ /**
51
+ * Reads an unsigned LEB128 varint from a byte array.
52
+ *
53
+ * Varints are used extensively in CID encoding and CAR framing to represent
54
+ * variable-length integers in a compact form.
55
+ *
56
+ * @param bytes - Source byte array
57
+ * @param offset - Position to start reading from
58
+ * @returns A tuple of `[value, nextOffset]`
59
+ */
25
60
  export function readVarint(bytes, offset) {
26
61
  let value = 0;
27
62
  let shift = 0;
package/dist/cli.js CHANGED
@@ -591,6 +591,53 @@ backfill:
591
591
  },
592
592
  },
593
593
  }, null, 2) + '\n');
594
+ writeFileSync(join(coreLexDir, 'getPreferences.json'), JSON.stringify({
595
+ lexicon: 1,
596
+ id: 'dev.hatk.getPreferences',
597
+ defs: {
598
+ main: {
599
+ type: 'query',
600
+ description: 'Get all preferences for the authenticated user.',
601
+ output: {
602
+ encoding: 'application/json',
603
+ schema: {
604
+ type: 'object',
605
+ properties: {
606
+ preferences: { type: 'unknown' },
607
+ },
608
+ },
609
+ },
610
+ },
611
+ },
612
+ }, null, 2) + '\n');
613
+ writeFileSync(join(coreLexDir, 'putPreference.json'), JSON.stringify({
614
+ lexicon: 1,
615
+ id: 'dev.hatk.putPreference',
616
+ defs: {
617
+ main: {
618
+ type: 'procedure',
619
+ description: 'Set a single preference by key.',
620
+ input: {
621
+ encoding: 'application/json',
622
+ schema: {
623
+ type: 'object',
624
+ required: ['key', 'value'],
625
+ properties: {
626
+ key: { type: 'string' },
627
+ value: { type: 'unknown' },
628
+ },
629
+ },
630
+ },
631
+ output: {
632
+ encoding: 'application/json',
633
+ schema: {
634
+ type: 'object',
635
+ properties: {},
636
+ },
637
+ },
638
+ },
639
+ },
640
+ }, null, 2) + '\n');
594
641
  writeFileSync(join(coreLexDir, 'getFeed.json'), JSON.stringify({
595
642
  lexicon: 1,
596
643
  id: 'dev.hatk.getFeed',
@@ -801,7 +848,7 @@ COPY . .
801
848
  RUN node_modules/.bin/hatk build
802
849
  RUN npm prune --omit=dev
803
850
  EXPOSE 3000
804
- CMD ["node", "node_modules/@hatk/hatk/dist/main.js", "config.yaml"]
851
+ CMD ["node", "--max-old-space-size=256", "node_modules/@hatk/hatk/dist/main.js", "config.yaml"]
805
852
  `);
806
853
  const pkgDeps = { '@hatk/oauth-client': '*', hatk: '*' };
807
854
  const pkgDevDeps = {
package/package.json CHANGED
@@ -1,6 +1,7 @@
1
1
  {
2
2
  "name": "@hatk/hatk",
3
- "version": "0.0.1-alpha.5",
3
+ "version": "0.0.1-alpha.6",
4
+ "license": "MIT",
4
5
  "bin": {
5
6
  "hatk": "dist/cli.js"
6
7
  },