@hatk/hatk 0.0.1-alpha.4 → 0.0.1-alpha.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/backfill.d.ts +59 -0
- package/dist/backfill.d.ts.map +1 -1
- package/dist/backfill.js +93 -6
- package/dist/car.d.ts +26 -0
- package/dist/car.d.ts.map +1 -1
- package/dist/car.js +34 -2
- package/dist/cbor.d.ts +37 -0
- package/dist/cbor.d.ts.map +1 -1
- package/dist/cbor.js +36 -3
- package/dist/cid.d.ts +37 -0
- package/dist/cid.d.ts.map +1 -1
- package/dist/cid.js +38 -3
- package/dist/cli.js +48 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +8 -1
- package/package.json +2 -1
package/dist/backfill.d.ts
CHANGED
|
@@ -1,11 +1,70 @@
|
|
|
1
1
|
import type { BackfillConfig } from './config.ts';
|
|
2
|
+
/** Options passed to {@link runBackfill}. */
|
|
2
3
|
interface BackfillOpts {
|
|
4
|
+
/** Base URL of the relay or PDS to enumerate repos from (e.g. `wss://bsky.network`). */
|
|
3
5
|
pdsUrl: string;
|
|
6
|
+
/** PLC directory URL used to resolve `did:plc` identifiers (e.g. `https://plc.directory`). */
|
|
4
7
|
plcUrl: string;
|
|
8
|
+
/** AT Protocol collection NSIDs to index (e.g. `app.bsky.feed.post`). */
|
|
5
9
|
collections: Set<string>;
|
|
10
|
+
/** Backfill behavior settings from `config.yaml`. */
|
|
6
11
|
config: BackfillConfig;
|
|
7
12
|
}
|
|
13
|
+
/**
|
|
14
|
+
* Downloads and indexes a single user's repo via `com.atproto.sync.getRepo`.
|
|
15
|
+
*
|
|
16
|
+
* The full flow:
|
|
17
|
+
* 1. Resolve the DID to find the user's PDS endpoint
|
|
18
|
+
* 2. Fetch the repo as a CAR file from the PDS
|
|
19
|
+
* 3. Parse the CAR, decode the commit, and walk the MST (Merkle Search Tree)
|
|
20
|
+
* 4. Delete any existing records for this DID (so deletions are reflected)
|
|
21
|
+
* 5. Bulk-insert all records matching the target collections
|
|
22
|
+
*
|
|
23
|
+
* On failure, applies exponential backoff retry logic. HTTP 4xx errors are
|
|
24
|
+
* treated as permanent failures (repo doesn't exist or is deactivated) and
|
|
25
|
+
* are not retried.
|
|
26
|
+
*
|
|
27
|
+
* @param did - The DID of the repo to backfill (e.g. `did:plc:abc123`)
|
|
28
|
+
* @param collections - Collection NSIDs to index; records in other collections are skipped
|
|
29
|
+
* @param fetchTimeout - Maximum seconds to wait for the CAR download before aborting
|
|
30
|
+
* @returns The number of records successfully indexed
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* ```ts
|
|
34
|
+
* const count = await backfillRepo('did:plc:abc123', new Set(['app.bsky.feed.post']), 30)
|
|
35
|
+
* console.log(`Indexed ${count} records`)
|
|
36
|
+
* ```
|
|
37
|
+
*/
|
|
8
38
|
export declare function backfillRepo(did: string, collections: Set<string>, fetchTimeout: number): Promise<number>;
|
|
39
|
+
/**
|
|
40
|
+
* Orchestrates a full backfill run: enumerate repos, filter to pending, download, and index.
|
|
41
|
+
*
|
|
42
|
+
* Operates in one of three modes based on config:
|
|
43
|
+
* - **Pinned repos** — backfill only the DIDs listed in `config.repos`
|
|
44
|
+
* - **Full network** — enumerate every active repo on the relay via `listRepos`
|
|
45
|
+
* - **Collection signal** (default) — use `listReposByCollection` to discover repos that
|
|
46
|
+
* contain records in the configured signal collections, falling back to `listRepos`
|
|
47
|
+
* if the relay doesn't support collection-scoped enumeration
|
|
48
|
+
*
|
|
49
|
+
* After the initial pass, failed repos are retried with exponential backoff
|
|
50
|
+
* (up to `config.maxRetries` attempts). The run emits structured log events for
|
|
51
|
+
* monitoring via the `backfill.run` and `backfill.retry_round` event types.
|
|
52
|
+
*
|
|
53
|
+
* @example
|
|
54
|
+
* ```ts
|
|
55
|
+
* await runBackfill({
|
|
56
|
+
* pdsUrl: 'wss://bsky.network',
|
|
57
|
+
* plcUrl: 'https://plc.directory',
|
|
58
|
+
* collections: new Set(['xyz.statusphere.status']),
|
|
59
|
+
* config: {
|
|
60
|
+
* fullNetwork: false,
|
|
61
|
+
* parallelism: 10,
|
|
62
|
+
* fetchTimeout: 30,
|
|
63
|
+
* maxRetries: 5,
|
|
64
|
+
* },
|
|
65
|
+
* })
|
|
66
|
+
* ```
|
|
67
|
+
*/
|
|
9
68
|
export declare function runBackfill(opts: BackfillOpts): Promise<void>;
|
|
10
69
|
export {};
|
|
11
70
|
//# sourceMappingURL=backfill.d.ts.map
|
package/dist/backfill.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjD,UAAU,YAAY;IACpB,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,MAAM,EAAE,cAAc,CAAA;CACvB;
|
|
1
|
+
{"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjD,6CAA6C;AAC7C,UAAU,YAAY;IACpB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,8FAA8F;IAC9F,MAAM,EAAE,MAAM,CAAA;IACd,yEAAyE;IACzE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,qDAAqD;IACrD,MAAM,EAAE,cAAc,CAAA;CACvB;AAuGD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAmH/G;AAgCD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAiInE"}
|
package/dist/backfill.js
CHANGED
|
@@ -3,8 +3,22 @@ import { cborDecode } from "./cbor.js";
|
|
|
3
3
|
import { walkMst } from "./mst.js";
|
|
4
4
|
import { setRepoStatus, getRepoStatus, getRepoRetryInfo, listRetryEligibleRepos, listPendingRepos, querySQL, runSQL, getSchema, bulkInsertRecords, } from "./db.js";
|
|
5
5
|
import { emit, timer } from "./logger.js";
|
|
6
|
+
/** In-memory cache of DID → PDS resolution results to avoid redundant lookups. */
|
|
6
7
|
const pdsCache = new Map();
|
|
7
8
|
let plcUrl;
|
|
9
|
+
/**
|
|
10
|
+
* Resolves a DID to its PDS endpoint and handle by fetching the DID document.
|
|
11
|
+
*
|
|
12
|
+
* Supports both `did:web` (fetches `/.well-known/did.json`) and `did:plc`
|
|
13
|
+
* (fetches from the PLC directory). Results are cached for the lifetime of the process.
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```ts
|
|
17
|
+
* const { pds, handle } = await resolvePds('did:plc:abc123')
|
|
18
|
+
* // pds = "https://puffball.us-east.host.bsky.network"
|
|
19
|
+
* // handle = "alice.bsky.social"
|
|
20
|
+
* ```
|
|
21
|
+
*/
|
|
8
22
|
async function resolvePds(did) {
|
|
9
23
|
const cached = pdsCache.get(did);
|
|
10
24
|
if (cached)
|
|
@@ -33,7 +47,10 @@ async function resolvePds(did) {
|
|
|
33
47
|
pdsCache.set(did, result);
|
|
34
48
|
return result;
|
|
35
49
|
}
|
|
36
|
-
|
|
50
|
+
/**
|
|
51
|
+
* Paginates through all active repos on a relay/PDS using `com.atproto.sync.listRepos`.
|
|
52
|
+
* Yields `{ did, rev }` for each active repo. Skips deactivated repos.
|
|
53
|
+
*/
|
|
37
54
|
async function* listRepos(pdsUrl) {
|
|
38
55
|
let cursor;
|
|
39
56
|
while (true) {
|
|
@@ -53,6 +70,13 @@ async function* listRepos(pdsUrl) {
|
|
|
53
70
|
cursor = data.cursor;
|
|
54
71
|
}
|
|
55
72
|
}
|
|
73
|
+
/**
|
|
74
|
+
* Paginates through repos that contain records in a specific collection using
|
|
75
|
+
* `com.atproto.sync.listReposByCollection`. More efficient than {@link listRepos}
|
|
76
|
+
* when only a few collections are needed, since the relay can filter server-side.
|
|
77
|
+
*
|
|
78
|
+
* Not all relays support this endpoint — callers should fall back to {@link listRepos}.
|
|
79
|
+
*/
|
|
56
80
|
async function* listReposByCollection(pdsUrl, collection) {
|
|
57
81
|
let cursor;
|
|
58
82
|
while (true) {
|
|
@@ -71,7 +95,31 @@ async function* listReposByCollection(pdsUrl, collection) {
|
|
|
71
95
|
cursor = data.cursor;
|
|
72
96
|
}
|
|
73
97
|
}
|
|
74
|
-
|
|
98
|
+
/**
|
|
99
|
+
* Downloads and indexes a single user's repo via `com.atproto.sync.getRepo`.
|
|
100
|
+
*
|
|
101
|
+
* The full flow:
|
|
102
|
+
* 1. Resolve the DID to find the user's PDS endpoint
|
|
103
|
+
* 2. Fetch the repo as a CAR file from the PDS
|
|
104
|
+
* 3. Parse the CAR, decode the commit, and walk the MST (Merkle Search Tree)
|
|
105
|
+
* 4. Delete any existing records for this DID (so deletions are reflected)
|
|
106
|
+
* 5. Bulk-insert all records matching the target collections
|
|
107
|
+
*
|
|
108
|
+
* On failure, applies exponential backoff retry logic. HTTP 4xx errors are
|
|
109
|
+
* treated as permanent failures (repo doesn't exist or is deactivated) and
|
|
110
|
+
* are not retried.
|
|
111
|
+
*
|
|
112
|
+
* @param did - The DID of the repo to backfill (e.g. `did:plc:abc123`)
|
|
113
|
+
* @param collections - Collection NSIDs to index; records in other collections are skipped
|
|
114
|
+
* @param fetchTimeout - Maximum seconds to wait for the CAR download before aborting
|
|
115
|
+
* @returns The number of records successfully indexed
|
|
116
|
+
*
|
|
117
|
+
* @example
|
|
118
|
+
* ```ts
|
|
119
|
+
* const count = await backfillRepo('did:plc:abc123', new Set(['app.bsky.feed.post']), 30)
|
|
120
|
+
* console.log(`Indexed ${count} records`)
|
|
121
|
+
* ```
|
|
122
|
+
*/
|
|
75
123
|
export async function backfillRepo(did, collections, fetchTimeout) {
|
|
76
124
|
const elapsed = timer();
|
|
77
125
|
let count = 0;
|
|
@@ -97,9 +145,10 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
97
145
|
httpErr.httpStatus = res.status;
|
|
98
146
|
throw httpErr;
|
|
99
147
|
}
|
|
100
|
-
|
|
148
|
+
let carBytes = new Uint8Array(await res.arrayBuffer());
|
|
101
149
|
carSizeBytes = carBytes.length;
|
|
102
|
-
|
|
150
|
+
let { roots, blocks } = parseCarFrame(carBytes);
|
|
151
|
+
carBytes = null; // free CAR bytes before bulk insert
|
|
103
152
|
// Decode commit to get MST root
|
|
104
153
|
const rootData = blocks.get(roots[0]);
|
|
105
154
|
if (!rootData)
|
|
@@ -132,6 +181,7 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
132
181
|
});
|
|
133
182
|
}
|
|
134
183
|
}
|
|
184
|
+
blocks = null; // free block map before bulk insert
|
|
135
185
|
// Delete existing records for this DID before re-importing so deletions are reflected
|
|
136
186
|
for (const col of collections) {
|
|
137
187
|
const schema = getSchema(col);
|
|
@@ -185,7 +235,16 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
185
235
|
});
|
|
186
236
|
}
|
|
187
237
|
}
|
|
188
|
-
|
|
238
|
+
/**
|
|
239
|
+
* Processes items concurrently with a fixed number of workers.
|
|
240
|
+
* Workers pull from a shared index so the pool stays saturated even when
|
|
241
|
+
* individual items complete at different speeds. Errors from `fn` are
|
|
242
|
+
* swallowed (they're expected to be captured via structured logging).
|
|
243
|
+
*
|
|
244
|
+
* @param items - The work items to process
|
|
245
|
+
* @param parallelism - Maximum number of concurrent workers
|
|
246
|
+
* @param fn - Async function to run for each item
|
|
247
|
+
*/
|
|
189
248
|
async function runWorkerPool(items, parallelism, fn) {
|
|
190
249
|
let index = 0;
|
|
191
250
|
async function worker() {
|
|
@@ -202,7 +261,35 @@ async function runWorkerPool(items, parallelism, fn) {
|
|
|
202
261
|
const workers = Array.from({ length: Math.min(parallelism, items.length) }, () => worker());
|
|
203
262
|
await Promise.all(workers);
|
|
204
263
|
}
|
|
205
|
-
|
|
264
|
+
/**
|
|
265
|
+
* Orchestrates a full backfill run: enumerate repos, filter to pending, download, and index.
|
|
266
|
+
*
|
|
267
|
+
* Operates in one of three modes based on config:
|
|
268
|
+
* - **Pinned repos** — backfill only the DIDs listed in `config.repos`
|
|
269
|
+
* - **Full network** — enumerate every active repo on the relay via `listRepos`
|
|
270
|
+
* - **Collection signal** (default) — use `listReposByCollection` to discover repos that
|
|
271
|
+
* contain records in the configured signal collections, falling back to `listRepos`
|
|
272
|
+
* if the relay doesn't support collection-scoped enumeration
|
|
273
|
+
*
|
|
274
|
+
* After the initial pass, failed repos are retried with exponential backoff
|
|
275
|
+
* (up to `config.maxRetries` attempts). The run emits structured log events for
|
|
276
|
+
* monitoring via the `backfill.run` and `backfill.retry_round` event types.
|
|
277
|
+
*
|
|
278
|
+
* @example
|
|
279
|
+
* ```ts
|
|
280
|
+
* await runBackfill({
|
|
281
|
+
* pdsUrl: 'wss://bsky.network',
|
|
282
|
+
* plcUrl: 'https://plc.directory',
|
|
283
|
+
* collections: new Set(['xyz.statusphere.status']),
|
|
284
|
+
* config: {
|
|
285
|
+
* fullNetwork: false,
|
|
286
|
+
* parallelism: 10,
|
|
287
|
+
* fetchTimeout: 30,
|
|
288
|
+
* maxRetries: 5,
|
|
289
|
+
* },
|
|
290
|
+
* })
|
|
291
|
+
* ```
|
|
292
|
+
*/
|
|
206
293
|
export async function runBackfill(opts) {
|
|
207
294
|
const { pdsUrl, collections, config } = opts;
|
|
208
295
|
plcUrl = opts.plcUrl;
|
package/dist/car.d.ts
CHANGED
|
@@ -1,3 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CAR (Content Addressable aRchive) parser.
|
|
3
|
+
*
|
|
4
|
+
* CAR files bundle content-addressed blocks into a single binary container.
|
|
5
|
+
* They're used by the AT Protocol firehose (`com.atproto.sync.getRepo`) to
|
|
6
|
+
* deliver entire repos and by commit events to deliver individual changes.
|
|
7
|
+
*
|
|
8
|
+
* Format: `varint(headerLen) | CBOR(header) | block*`
|
|
9
|
+
* Each block: `varint(blockLen) | CID | data`
|
|
10
|
+
*
|
|
11
|
+
* @see https://ipld.io/specs/transport/car/carv1/
|
|
12
|
+
* @module
|
|
13
|
+
*/
|
|
14
|
+
/**
|
|
15
|
+
* Parses a CARv1 binary frame into its root CIDs and block map.
|
|
16
|
+
*
|
|
17
|
+
* @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
|
|
18
|
+
* @returns `roots` — ordered list of root CID strings; `blocks` — map of CID string → raw block data
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* ```ts
|
|
22
|
+
* const car = new Uint8Array(await res.arrayBuffer())
|
|
23
|
+
* const { roots, blocks } = parseCarFrame(car)
|
|
24
|
+
* const commitData = blocks.get(roots[0])
|
|
25
|
+
* ```
|
|
26
|
+
*/
|
|
1
27
|
export declare function parseCarFrame(carBytes: Uint8Array): {
|
|
2
28
|
roots: string[];
|
|
3
29
|
blocks: Map<string, Uint8Array>;
|
package/dist/car.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAuCH;;;;;;;;;;;;GAYG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,UAAU,GAAG;IACnD,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,CAAA;CAChC,CAmCA"}
|
package/dist/car.js
CHANGED
|
@@ -1,7 +1,26 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
/**
|
|
2
|
+
* CAR (Content Addressable aRchive) parser.
|
|
3
|
+
*
|
|
4
|
+
* CAR files bundle content-addressed blocks into a single binary container.
|
|
5
|
+
* They're used by the AT Protocol firehose (`com.atproto.sync.getRepo`) to
|
|
6
|
+
* deliver entire repos and by commit events to deliver individual changes.
|
|
7
|
+
*
|
|
8
|
+
* Format: `varint(headerLen) | CBOR(header) | block*`
|
|
9
|
+
* Each block: `varint(blockLen) | CID | data`
|
|
10
|
+
*
|
|
11
|
+
* @see https://ipld.io/specs/transport/car/carv1/
|
|
12
|
+
* @module
|
|
13
|
+
*/
|
|
3
14
|
import { cborDecode } from "./cbor.js";
|
|
4
15
|
import { cidToString, readVarint } from "./cid.js";
|
|
16
|
+
/**
|
|
17
|
+
* Parses a CID (Content Identifier) from raw bytes at the given offset.
|
|
18
|
+
*
|
|
19
|
+
* Handles both CIDv0 (bare SHA-256 multihash, starts with `0x12`) and
|
|
20
|
+
* CIDv1 (version + codec + multihash with varint-encoded lengths).
|
|
21
|
+
*
|
|
22
|
+
* @returns A tuple of `[cidBytes, nextOffset]`
|
|
23
|
+
*/
|
|
5
24
|
function parseCidFromBytes(bytes, offset) {
|
|
6
25
|
const firstByte = bytes[offset];
|
|
7
26
|
if (firstByte === 0x12) {
|
|
@@ -22,6 +41,19 @@ function parseCidFromBytes(bytes, offset) {
|
|
|
22
41
|
pos = afterDigestLen + digestLen;
|
|
23
42
|
return [bytes.slice(offset, pos), pos];
|
|
24
43
|
}
|
|
44
|
+
/**
|
|
45
|
+
* Parses a CARv1 binary frame into its root CIDs and block map.
|
|
46
|
+
*
|
|
47
|
+
* @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
|
|
48
|
+
* @returns `roots` — ordered list of root CID strings; `blocks` — map of CID string → raw block data
|
|
49
|
+
*
|
|
50
|
+
* @example
|
|
51
|
+
* ```ts
|
|
52
|
+
* const car = new Uint8Array(await res.arrayBuffer())
|
|
53
|
+
* const { roots, blocks } = parseCarFrame(car)
|
|
54
|
+
* const commitData = blocks.get(roots[0])
|
|
55
|
+
* ```
|
|
56
|
+
*/
|
|
25
57
|
export function parseCarFrame(carBytes) {
|
|
26
58
|
let offset = 0;
|
|
27
59
|
// Read header length (varint-prefixed CBOR)
|
package/dist/cbor.d.ts
CHANGED
|
@@ -1,7 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minimal CBOR (RFC 8949) decoder with DAG-CBOR CID support.
|
|
3
|
+
*
|
|
4
|
+
* Returns `{ value, offset }` so callers can decode concatenated CBOR values —
|
|
5
|
+
* the AT Protocol firehose sends frames as two back-to-back CBOR items
|
|
6
|
+
* (header + body).
|
|
7
|
+
*
|
|
8
|
+
* DAG-CBOR tag 42 (CID links) are decoded as `{ $link: "bafy..." }` objects,
|
|
9
|
+
* matching the convention used by the AT Protocol.
|
|
10
|
+
*
|
|
11
|
+
* @see https://www.rfc-editor.org/rfc/rfc8949 — CBOR spec
|
|
12
|
+
* @see https://ipld.io/specs/codecs/dag-cbor/spec/ — DAG-CBOR spec
|
|
13
|
+
* @module
|
|
14
|
+
*/
|
|
1
15
|
interface DecodeResult {
|
|
16
|
+
/** The decoded JavaScript value. */
|
|
2
17
|
value: any;
|
|
18
|
+
/** Byte offset immediately after the decoded value — use as `startOffset` to decode the next item. */
|
|
3
19
|
offset: number;
|
|
4
20
|
}
|
|
21
|
+
/**
|
|
22
|
+
* Decodes a single CBOR value from a byte array.
|
|
23
|
+
*
|
|
24
|
+
* Supports all major types: unsigned/negative integers, byte/text strings,
|
|
25
|
+
* arrays, maps, tags (with special handling for CID tag 42), and simple
|
|
26
|
+
* values (true, false, null).
|
|
27
|
+
*
|
|
28
|
+
* @param bytes - Raw CBOR bytes
|
|
29
|
+
* @param startOffset - Byte position to start decoding from (default `0`)
|
|
30
|
+
* @returns The decoded value and the offset of the next byte after it
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* ```ts
|
|
34
|
+
* // Decode a single value
|
|
35
|
+
* const { value } = cborDecode(bytes)
|
|
36
|
+
*
|
|
37
|
+
* // Decode two concatenated values (firehose frame)
|
|
38
|
+
* const { value: header, offset } = cborDecode(frameBytes)
|
|
39
|
+
* const { value: body } = cborDecode(frameBytes, offset)
|
|
40
|
+
* ```
|
|
41
|
+
*/
|
|
5
42
|
export declare function cborDecode(bytes: Uint8Array, startOffset?: number): DecodeResult;
|
|
6
43
|
export {};
|
|
7
44
|
//# sourceMappingURL=cbor.d.ts.map
|
package/dist/cbor.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cbor.d.ts","sourceRoot":"","sources":["../src/cbor.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"cbor.d.ts","sourceRoot":"","sources":["../src/cbor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAOH,UAAU,YAAY;IACpB,oCAAoC;IACpC,KAAK,EAAE,GAAG,CAAA;IACV,sGAAsG;IACtG,MAAM,EAAE,MAAM,CAAA;CACf;AAED;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,UAAU,EAAE,WAAW,SAAI,GAAG,YAAY,CAgF3E"}
|
package/dist/cbor.js
CHANGED
|
@@ -1,8 +1,41 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Minimal CBOR (RFC 8949) decoder with DAG-CBOR CID support.
|
|
3
|
+
*
|
|
4
|
+
* Returns `{ value, offset }` so callers can decode concatenated CBOR values —
|
|
5
|
+
* the AT Protocol firehose sends frames as two back-to-back CBOR items
|
|
6
|
+
* (header + body).
|
|
7
|
+
*
|
|
8
|
+
* DAG-CBOR tag 42 (CID links) are decoded as `{ $link: "bafy..." }` objects,
|
|
9
|
+
* matching the convention used by the AT Protocol.
|
|
10
|
+
*
|
|
11
|
+
* @see https://www.rfc-editor.org/rfc/rfc8949 — CBOR spec
|
|
12
|
+
* @see https://ipld.io/specs/codecs/dag-cbor/spec/ — DAG-CBOR spec
|
|
13
|
+
* @module
|
|
14
|
+
*/
|
|
4
15
|
import { cidToString } from "./cid.js";
|
|
16
|
+
/** CBOR tag number for DAG-CBOR CID links. */
|
|
5
17
|
const CBOR_TAG_CID = 42;
|
|
18
|
+
/**
|
|
19
|
+
* Decodes a single CBOR value from a byte array.
|
|
20
|
+
*
|
|
21
|
+
* Supports all major types: unsigned/negative integers, byte/text strings,
|
|
22
|
+
* arrays, maps, tags (with special handling for CID tag 42), and simple
|
|
23
|
+
* values (true, false, null).
|
|
24
|
+
*
|
|
25
|
+
* @param bytes - Raw CBOR bytes
|
|
26
|
+
* @param startOffset - Byte position to start decoding from (default `0`)
|
|
27
|
+
* @returns The decoded value and the offset of the next byte after it
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```ts
|
|
31
|
+
* // Decode a single value
|
|
32
|
+
* const { value } = cborDecode(bytes)
|
|
33
|
+
*
|
|
34
|
+
* // Decode two concatenated values (firehose frame)
|
|
35
|
+
* const { value: header, offset } = cborDecode(frameBytes)
|
|
36
|
+
* const { value: body } = cborDecode(frameBytes, offset)
|
|
37
|
+
* ```
|
|
38
|
+
*/
|
|
6
39
|
export function cborDecode(bytes, startOffset = 0) {
|
|
7
40
|
let offset = startOffset;
|
|
8
41
|
function read() {
|
package/dist/cid.d.ts
CHANGED
|
@@ -1,4 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CID (Content Identifier), base32, and varint primitives.
|
|
3
|
+
*
|
|
4
|
+
* CIDs are self-describing content hashes used throughout the AT Protocol
|
|
5
|
+
* to reference blocks in repos and CAR files. This module provides the
|
|
6
|
+
* low-level encoding needed to convert raw CID bytes into their string
|
|
7
|
+
* representation (base32lower with `b` multibase prefix).
|
|
8
|
+
*
|
|
9
|
+
* @see https://github.com/multiformats/cid
|
|
10
|
+
* @module
|
|
11
|
+
*/
|
|
12
|
+
/**
|
|
13
|
+
* Encodes raw bytes as a base32 lowercase string (RFC 4648, no padding).
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```ts
|
|
17
|
+
* base32Encode(new Uint8Array([0x01, 0x71])) // "afyq"
|
|
18
|
+
* ```
|
|
19
|
+
*/
|
|
1
20
|
export declare function base32Encode(bytes: Uint8Array): string;
|
|
21
|
+
/**
|
|
22
|
+
* Converts raw CID bytes to their multibase-encoded string form (`b` prefix + base32lower).
|
|
23
|
+
*
|
|
24
|
+
* @example
|
|
25
|
+
* ```ts
|
|
26
|
+
* cidToString(cidBytes) // "bafyreig..."
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
2
29
|
export declare function cidToString(cidBytes: Uint8Array): string;
|
|
30
|
+
/**
|
|
31
|
+
* Reads an unsigned LEB128 varint from a byte array.
|
|
32
|
+
*
|
|
33
|
+
* Varints are used extensively in CID encoding and CAR framing to represent
|
|
34
|
+
* variable-length integers in a compact form.
|
|
35
|
+
*
|
|
36
|
+
* @param bytes - Source byte array
|
|
37
|
+
* @param offset - Position to start reading from
|
|
38
|
+
* @returns A tuple of `[value, nextOffset]`
|
|
39
|
+
*/
|
|
3
40
|
export declare function readVarint(bytes: Uint8Array, offset: number): [number, number];
|
|
4
41
|
//# sourceMappingURL=cid.d.ts.map
|
package/dist/cid.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cid.d.ts","sourceRoot":"","sources":["../src/cid.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"cid.d.ts","sourceRoot":"","sources":["../src/cid.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAKH;;;;;;;GAOG;AACH,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,GAAG,MAAM,CAmBtD;AAED;;;;;;;GAOG;AACH,wBAAgB,WAAW,CAAC,QAAQ,EAAE,UAAU,GAAG,MAAM,CAExD;AAED;;;;;;;;;GASG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAc9E"}
|
package/dist/cid.js
CHANGED
|
@@ -1,6 +1,24 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
/**
|
|
2
|
+
* CID (Content Identifier), base32, and varint primitives.
|
|
3
|
+
*
|
|
4
|
+
* CIDs are self-describing content hashes used throughout the AT Protocol
|
|
5
|
+
* to reference blocks in repos and CAR files. This module provides the
|
|
6
|
+
* low-level encoding needed to convert raw CID bytes into their string
|
|
7
|
+
* representation (base32lower with `b` multibase prefix).
|
|
8
|
+
*
|
|
9
|
+
* @see https://github.com/multiformats/cid
|
|
10
|
+
* @module
|
|
11
|
+
*/
|
|
12
|
+
/** RFC 4648 base32 lowercase alphabet (no padding). */
|
|
3
13
|
const BASE32_ALPHABET = 'abcdefghijklmnopqrstuvwxyz234567';
|
|
14
|
+
/**
|
|
15
|
+
* Encodes raw bytes as a base32 lowercase string (RFC 4648, no padding).
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```ts
|
|
19
|
+
* base32Encode(new Uint8Array([0x01, 0x71])) // "afyq"
|
|
20
|
+
* ```
|
|
21
|
+
*/
|
|
4
22
|
export function base32Encode(bytes) {
|
|
5
23
|
let result = '';
|
|
6
24
|
let bits = 0;
|
|
@@ -18,10 +36,27 @@ export function base32Encode(bytes) {
|
|
|
18
36
|
}
|
|
19
37
|
return result;
|
|
20
38
|
}
|
|
39
|
+
/**
|
|
40
|
+
* Converts raw CID bytes to their multibase-encoded string form (`b` prefix + base32lower).
|
|
41
|
+
*
|
|
42
|
+
* @example
|
|
43
|
+
* ```ts
|
|
44
|
+
* cidToString(cidBytes) // "bafyreig..."
|
|
45
|
+
* ```
|
|
46
|
+
*/
|
|
21
47
|
export function cidToString(cidBytes) {
|
|
22
|
-
// base32lower with 'b' multibase prefix
|
|
23
48
|
return `b${base32Encode(cidBytes)}`;
|
|
24
49
|
}
|
|
50
|
+
/**
|
|
51
|
+
* Reads an unsigned LEB128 varint from a byte array.
|
|
52
|
+
*
|
|
53
|
+
* Varints are used extensively in CID encoding and CAR framing to represent
|
|
54
|
+
* variable-length integers in a compact form.
|
|
55
|
+
*
|
|
56
|
+
* @param bytes - Source byte array
|
|
57
|
+
* @param offset - Position to start reading from
|
|
58
|
+
* @returns A tuple of `[value, nextOffset]`
|
|
59
|
+
*/
|
|
25
60
|
export function readVarint(bytes, offset) {
|
|
26
61
|
let value = 0;
|
|
27
62
|
let shift = 0;
|
package/dist/cli.js
CHANGED
|
@@ -591,6 +591,53 @@ backfill:
|
|
|
591
591
|
},
|
|
592
592
|
},
|
|
593
593
|
}, null, 2) + '\n');
|
|
594
|
+
writeFileSync(join(coreLexDir, 'getPreferences.json'), JSON.stringify({
|
|
595
|
+
lexicon: 1,
|
|
596
|
+
id: 'dev.hatk.getPreferences',
|
|
597
|
+
defs: {
|
|
598
|
+
main: {
|
|
599
|
+
type: 'query',
|
|
600
|
+
description: 'Get all preferences for the authenticated user.',
|
|
601
|
+
output: {
|
|
602
|
+
encoding: 'application/json',
|
|
603
|
+
schema: {
|
|
604
|
+
type: 'object',
|
|
605
|
+
properties: {
|
|
606
|
+
preferences: { type: 'unknown' },
|
|
607
|
+
},
|
|
608
|
+
},
|
|
609
|
+
},
|
|
610
|
+
},
|
|
611
|
+
},
|
|
612
|
+
}, null, 2) + '\n');
|
|
613
|
+
writeFileSync(join(coreLexDir, 'putPreference.json'), JSON.stringify({
|
|
614
|
+
lexicon: 1,
|
|
615
|
+
id: 'dev.hatk.putPreference',
|
|
616
|
+
defs: {
|
|
617
|
+
main: {
|
|
618
|
+
type: 'procedure',
|
|
619
|
+
description: 'Set a single preference by key.',
|
|
620
|
+
input: {
|
|
621
|
+
encoding: 'application/json',
|
|
622
|
+
schema: {
|
|
623
|
+
type: 'object',
|
|
624
|
+
required: ['key', 'value'],
|
|
625
|
+
properties: {
|
|
626
|
+
key: { type: 'string' },
|
|
627
|
+
value: { type: 'unknown' },
|
|
628
|
+
},
|
|
629
|
+
},
|
|
630
|
+
},
|
|
631
|
+
output: {
|
|
632
|
+
encoding: 'application/json',
|
|
633
|
+
schema: {
|
|
634
|
+
type: 'object',
|
|
635
|
+
properties: {},
|
|
636
|
+
},
|
|
637
|
+
},
|
|
638
|
+
},
|
|
639
|
+
},
|
|
640
|
+
}, null, 2) + '\n');
|
|
594
641
|
writeFileSync(join(coreLexDir, 'getFeed.json'), JSON.stringify({
|
|
595
642
|
lexicon: 1,
|
|
596
643
|
id: 'dev.hatk.getFeed',
|
|
@@ -801,7 +848,7 @@ COPY . .
|
|
|
801
848
|
RUN node_modules/.bin/hatk build
|
|
802
849
|
RUN npm prune --omit=dev
|
|
803
850
|
EXPOSE 3000
|
|
804
|
-
CMD ["node", "node_modules/@hatk/hatk/dist/main.js", "config.yaml"]
|
|
851
|
+
CMD ["node", "--max-old-space-size=256", "node_modules/@hatk/hatk/dist/main.js", "config.yaml"]
|
|
805
852
|
`);
|
|
806
853
|
const pkgDeps = { '@hatk/oauth-client': '*', hatk: '*' };
|
|
807
854
|
const pkgDevDeps = {
|
package/dist/server.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAgB,KAAK,MAAM,EAAE,KAAK,eAAe,EAAE,MAAM,WAAW,CAAA;AAmD3E,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAA;AA2B9C,wBAAgB,WAAW,CACzB,IAAI,EAAE,MAAM,EACZ,WAAW,EAAE,MAAM,EAAE,EACrB,SAAS,EAAE,MAAM,GAAG,IAAI,EACxB,KAAK,EAAE,WAAW,GAAG,IAAI,EACzB,MAAM,GAAE,MAAM,EAAO,EACrB,aAAa,CAAC,EAAE,CAAC,GAAG,EAAE,eAAe,KAAK;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,GAC/D,MAAM,
|
|
1
|
+
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAgB,KAAK,MAAM,EAAE,KAAK,eAAe,EAAE,MAAM,WAAW,CAAA;AAmD3E,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAA;AA2B9C,wBAAgB,WAAW,CACzB,IAAI,EAAE,MAAM,EACZ,WAAW,EAAE,MAAM,EAAE,EACrB,SAAS,EAAE,MAAM,GAAG,IAAI,EACxB,KAAK,EAAE,WAAW,GAAG,IAAI,EACzB,MAAM,GAAE,MAAM,EAAO,EACrB,aAAa,CAAC,EAAE,CAAC,GAAG,EAAE,eAAe,KAAK;IAAE,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,GAC/D,MAAM,CA28BR"}
|
package/dist/server.js
CHANGED
|
@@ -481,7 +481,14 @@ export function startServer(port, collections, publicDir, oauth, admins = [], re
|
|
|
481
481
|
const sizeRows = await querySQL(`SELECT database_size, memory_usage, memory_limit FROM pragma_database_size()`);
|
|
482
482
|
const dbInfo = sizeRows[0] ?? {};
|
|
483
483
|
const collectionCounts = await getCollectionCounts();
|
|
484
|
-
|
|
484
|
+
const mem = process.memoryUsage();
|
|
485
|
+
const node = {
|
|
486
|
+
rss: `${(mem.rss / 1024 / 1024).toFixed(1)} MiB`,
|
|
487
|
+
heapUsed: `${(mem.heapUsed / 1024 / 1024).toFixed(1)} MiB`,
|
|
488
|
+
heapTotal: `${(mem.heapTotal / 1024 / 1024).toFixed(1)} MiB`,
|
|
489
|
+
external: `${(mem.external / 1024 / 1024).toFixed(1)} MiB`,
|
|
490
|
+
};
|
|
491
|
+
jsonResponse(res, { repos: counts, duckdb: dbInfo, node, collections: collectionCounts });
|
|
485
492
|
return;
|
|
486
493
|
}
|
|
487
494
|
// GET /admin/info/:did — repo status info
|