@hatk/hatk 0.0.1-alpha.6 → 0.0.1-alpha.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapter.d.ts +19 -0
- package/dist/adapter.d.ts.map +1 -0
- package/dist/adapter.js +108 -0
- package/dist/backfill.d.ts +2 -2
- package/dist/backfill.d.ts.map +1 -1
- package/dist/backfill.js +83 -41
- package/dist/car.d.ts +42 -10
- package/dist/car.d.ts.map +1 -1
- package/dist/car.js +154 -14
- package/dist/cli.js +243 -1043
- package/dist/config.d.ts +31 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +40 -9
- package/dist/database/adapter-factory.d.ts +6 -0
- package/dist/database/adapter-factory.d.ts.map +1 -0
- package/dist/database/adapter-factory.js +20 -0
- package/dist/database/adapters/duckdb-search.d.ts +12 -0
- package/dist/database/adapters/duckdb-search.d.ts.map +1 -0
- package/dist/database/adapters/duckdb-search.js +27 -0
- package/dist/database/adapters/duckdb.d.ts +25 -0
- package/dist/database/adapters/duckdb.d.ts.map +1 -0
- package/dist/database/adapters/duckdb.js +161 -0
- package/dist/database/adapters/sqlite-search.d.ts +23 -0
- package/dist/database/adapters/sqlite-search.d.ts.map +1 -0
- package/dist/database/adapters/sqlite-search.js +74 -0
- package/dist/database/adapters/sqlite.d.ts +18 -0
- package/dist/database/adapters/sqlite.d.ts.map +1 -0
- package/dist/database/adapters/sqlite.js +88 -0
- package/dist/{db.d.ts → database/db.d.ts} +57 -6
- package/dist/database/db.d.ts.map +1 -0
- package/dist/{db.js → database/db.js} +730 -549
- package/dist/database/dialect.d.ts +45 -0
- package/dist/database/dialect.d.ts.map +1 -0
- package/dist/database/dialect.js +72 -0
- package/dist/{fts.d.ts → database/fts.d.ts} +7 -0
- package/dist/database/fts.d.ts.map +1 -0
- package/dist/{fts.js → database/fts.js} +116 -32
- package/dist/database/index.d.ts +7 -0
- package/dist/database/index.d.ts.map +1 -0
- package/dist/database/index.js +6 -0
- package/dist/database/ports.d.ts +50 -0
- package/dist/database/ports.d.ts.map +1 -0
- package/dist/database/ports.js +1 -0
- package/dist/{schema.d.ts → database/schema.d.ts} +14 -3
- package/dist/database/schema.d.ts.map +1 -0
- package/dist/{schema.js → database/schema.js} +81 -41
- package/dist/dev-entry.d.ts +8 -0
- package/dist/dev-entry.d.ts.map +1 -0
- package/dist/dev-entry.js +113 -0
- package/dist/feeds.d.ts +12 -8
- package/dist/feeds.d.ts.map +1 -1
- package/dist/feeds.js +51 -6
- package/dist/hooks.d.ts +85 -0
- package/dist/hooks.d.ts.map +1 -0
- package/dist/hooks.js +161 -0
- package/dist/hydrate.d.ts +7 -6
- package/dist/hydrate.d.ts.map +1 -1
- package/dist/hydrate.js +4 -16
- package/dist/indexer.d.ts +23 -0
- package/dist/indexer.d.ts.map +1 -1
- package/dist/indexer.js +181 -34
- package/dist/labels.d.ts +36 -0
- package/dist/labels.d.ts.map +1 -1
- package/dist/labels.js +71 -6
- package/dist/lexicon-resolve.d.ts.map +1 -1
- package/dist/lexicon-resolve.js +27 -112
- package/dist/lexicons/com/atproto/label/defs.json +75 -0
- package/dist/lexicons/com/atproto/moderation/defs.json +30 -0
- package/dist/lexicons/com/atproto/repo/strongRef.json +24 -0
- package/dist/lexicons/dev/hatk/applyWrites.json +87 -0
- package/dist/lexicons/dev/hatk/createRecord.json +40 -0
- package/dist/lexicons/dev/hatk/createReport.json +48 -0
- package/dist/lexicons/dev/hatk/deleteRecord.json +25 -0
- package/dist/lexicons/dev/hatk/describeCollections.json +41 -0
- package/dist/lexicons/dev/hatk/describeFeeds.json +29 -0
- package/dist/lexicons/dev/hatk/describeLabels.json +45 -0
- package/dist/lexicons/dev/hatk/getFeed.json +30 -0
- package/dist/lexicons/dev/hatk/getPreferences.json +19 -0
- package/dist/lexicons/dev/hatk/getRecord.json +26 -0
- package/dist/lexicons/dev/hatk/getRecords.json +32 -0
- package/dist/lexicons/dev/hatk/putPreference.json +28 -0
- package/dist/lexicons/dev/hatk/putRecord.json +41 -0
- package/dist/lexicons/dev/hatk/searchRecords.json +32 -0
- package/dist/lexicons/dev/hatk/uploadBlob.json +23 -0
- package/dist/logger.d.ts +29 -0
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +29 -0
- package/dist/main.js +138 -67
- package/dist/mst.d.ts +18 -1
- package/dist/mst.d.ts.map +1 -1
- package/dist/mst.js +19 -8
- package/dist/oauth/db.d.ts +3 -1
- package/dist/oauth/db.d.ts.map +1 -1
- package/dist/oauth/db.js +48 -19
- package/dist/oauth/server.d.ts +24 -0
- package/dist/oauth/server.d.ts.map +1 -1
- package/dist/oauth/server.js +198 -22
- package/dist/oauth/session.d.ts +11 -0
- package/dist/oauth/session.d.ts.map +1 -0
- package/dist/oauth/session.js +65 -0
- package/dist/opengraph.d.ts +10 -0
- package/dist/opengraph.d.ts.map +1 -1
- package/dist/opengraph.js +80 -40
- package/dist/pds-proxy.d.ts +60 -0
- package/dist/pds-proxy.d.ts.map +1 -0
- package/dist/pds-proxy.js +277 -0
- package/dist/push.d.ts +34 -0
- package/dist/push.d.ts.map +1 -0
- package/dist/push.js +184 -0
- package/dist/renderer.d.ts +27 -0
- package/dist/renderer.d.ts.map +1 -0
- package/dist/renderer.js +46 -0
- package/dist/resolve-hatk.d.ts +6 -0
- package/dist/resolve-hatk.d.ts.map +1 -0
- package/dist/resolve-hatk.js +20 -0
- package/dist/response.d.ts +16 -0
- package/dist/response.d.ts.map +1 -0
- package/dist/response.js +69 -0
- package/dist/scanner.d.ts +21 -0
- package/dist/scanner.d.ts.map +1 -0
- package/dist/scanner.js +88 -0
- package/dist/seed.d.ts +19 -0
- package/dist/seed.d.ts.map +1 -1
- package/dist/seed.js +43 -4
- package/dist/server-init.d.ts +8 -0
- package/dist/server-init.d.ts.map +1 -0
- package/dist/server-init.js +62 -0
- package/dist/server.d.ts +26 -3
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +629 -635
- package/dist/setup.d.ts +28 -1
- package/dist/setup.d.ts.map +1 -1
- package/dist/setup.js +50 -3
- package/dist/templates/feed.tpl +14 -0
- package/dist/templates/hook.tpl +5 -0
- package/dist/templates/label.tpl +15 -0
- package/dist/templates/og.tpl +17 -0
- package/dist/templates/seed.tpl +11 -0
- package/dist/templates/setup.tpl +5 -0
- package/dist/templates/test-feed.tpl +19 -0
- package/dist/templates/test-xrpc.tpl +19 -0
- package/dist/templates/xrpc.tpl +41 -0
- package/dist/test.d.ts +1 -1
- package/dist/test.d.ts.map +1 -1
- package/dist/test.js +39 -32
- package/dist/views.js +1 -1
- package/dist/vite-plugin.d.ts +1 -1
- package/dist/vite-plugin.d.ts.map +1 -1
- package/dist/vite-plugin.js +254 -66
- package/dist/xrpc.d.ts +75 -11
- package/dist/xrpc.d.ts.map +1 -1
- package/dist/xrpc.js +189 -39
- package/package.json +14 -7
- package/public/admin.html +133 -54
- package/dist/db.d.ts.map +0 -1
- package/dist/fts.d.ts.map +0 -1
- package/dist/oauth/hooks.d.ts +0 -10
- package/dist/oauth/hooks.d.ts.map +0 -1
- package/dist/oauth/hooks.js +0 -40
- package/dist/schema.d.ts.map +0 -1
- package/dist/test-browser.d.ts +0 -14
- package/dist/test-browser.d.ts.map +0 -1
- package/dist/test-browser.js +0 -26
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { type IncomingMessage, type ServerResponse } from 'node:http';
|
|
2
|
+
/**
|
|
3
|
+
* Convert a Node.js IncomingMessage to a Web Standard Request.
|
|
4
|
+
*/
|
|
5
|
+
export declare function toRequest(req: IncomingMessage, base: string): Request;
|
|
6
|
+
/**
|
|
7
|
+
* Pipe a Web Standard Response back to a Node.js ServerResponse.
|
|
8
|
+
*/
|
|
9
|
+
export declare function sendResponse(res: ServerResponse, response: Response): Promise<void>;
|
|
10
|
+
/** Routes handled by hatk — everything else can fall through to a framework handler. */
|
|
11
|
+
export declare const HATK_ROUTES: string[];
|
|
12
|
+
export declare function isHatkRoute(pathname: string): boolean;
|
|
13
|
+
/**
|
|
14
|
+
* Create a Node.js HTTP server from a Web Standard fetch handler.
|
|
15
|
+
* If a fallback Node middleware is provided, non-hatk routes are sent to it
|
|
16
|
+
* (e.g. SvelteKit's handler from build/handler.js).
|
|
17
|
+
*/
|
|
18
|
+
export declare function serve(handler: (request: Request) => Promise<Response>, port: number, base?: string, fallback?: (req: IncomingMessage, res: ServerResponse, next: () => void) => void): import("node:http").Server<typeof IncomingMessage, typeof ServerResponse>;
|
|
19
|
+
//# sourceMappingURL=adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../src/adapter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,eAAe,EAAE,KAAK,cAAc,EAAgB,MAAM,WAAW,CAAA;AAEnF;;GAEG;AACH,wBAAgB,SAAS,CAAC,GAAG,EAAE,eAAe,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CA0BrE;AAED;;GAEG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,cAAc,EAAE,QAAQ,EAAE,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAuBzF;AAED,wFAAwF;AACxF,eAAO,MAAM,WAAW,UAcvB,CAAA;AAED,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAErD;AAED;;;;GAIG;AACH,wBAAgB,KAAK,CACnB,OAAO,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,OAAO,CAAC,QAAQ,CAAC,EAChD,IAAI,EAAE,MAAM,EACZ,IAAI,CAAC,EAAE,MAAM,EACb,QAAQ,CAAC,EAAE,CAAC,GAAG,EAAE,eAAe,EAAE,GAAG,EAAE,cAAc,EAAE,IAAI,EAAE,MAAM,IAAI,KAAK,IAAI,6EA4BjF"}
|
package/dist/adapter.js
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import { createServer } from 'node:http';
|
|
2
|
+
/**
|
|
3
|
+
* Convert a Node.js IncomingMessage to a Web Standard Request.
|
|
4
|
+
*/
|
|
5
|
+
export function toRequest(req, base) {
|
|
6
|
+
const url = new URL(req.url, base);
|
|
7
|
+
const headers = new Headers();
|
|
8
|
+
for (const [key, value] of Object.entries(req.headers)) {
|
|
9
|
+
if (value) {
|
|
10
|
+
if (Array.isArray(value)) {
|
|
11
|
+
for (const v of value)
|
|
12
|
+
headers.append(key, v);
|
|
13
|
+
}
|
|
14
|
+
else {
|
|
15
|
+
headers.set(key, value);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
const init = {
|
|
20
|
+
method: req.method,
|
|
21
|
+
headers,
|
|
22
|
+
};
|
|
23
|
+
// GET and HEAD requests cannot have a body
|
|
24
|
+
if (req.method !== 'GET' && req.method !== 'HEAD') {
|
|
25
|
+
// @ts-expect-error — Node.js streams are valid body sources
|
|
26
|
+
init.body = req;
|
|
27
|
+
init.duplex = 'half';
|
|
28
|
+
}
|
|
29
|
+
return new Request(url.href, init);
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Pipe a Web Standard Response back to a Node.js ServerResponse.
|
|
33
|
+
*/
|
|
34
|
+
export async function sendResponse(res, response) {
|
|
35
|
+
const rawHeaders = [];
|
|
36
|
+
response.headers.forEach((value, name) => {
|
|
37
|
+
rawHeaders.push(name, value);
|
|
38
|
+
});
|
|
39
|
+
res.writeHead(response.status, rawHeaders);
|
|
40
|
+
if (!response.body) {
|
|
41
|
+
res.end();
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
const reader = response.body.getReader();
|
|
45
|
+
try {
|
|
46
|
+
while (true) {
|
|
47
|
+
const { done, value } = await reader.read();
|
|
48
|
+
if (done)
|
|
49
|
+
break;
|
|
50
|
+
res.write(value);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
finally {
|
|
54
|
+
reader.releaseLock();
|
|
55
|
+
res.end();
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
/** Routes handled by hatk — everything else can fall through to a framework handler. */
|
|
59
|
+
export const HATK_ROUTES = [
|
|
60
|
+
'/xrpc/',
|
|
61
|
+
'/oauth/',
|
|
62
|
+
'/oauth-client-metadata.json',
|
|
63
|
+
'/.well-known/oauth-authorization-server',
|
|
64
|
+
'/.well-known/oauth-protected-resource',
|
|
65
|
+
'/og/',
|
|
66
|
+
'/admin',
|
|
67
|
+
'/repos',
|
|
68
|
+
'/info/',
|
|
69
|
+
'/_health',
|
|
70
|
+
'/robots.txt',
|
|
71
|
+
'/auth/logout',
|
|
72
|
+
'/__dev/',
|
|
73
|
+
];
|
|
74
|
+
export function isHatkRoute(pathname) {
|
|
75
|
+
return HATK_ROUTES.some((r) => pathname.startsWith(r) || pathname === r);
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Create a Node.js HTTP server from a Web Standard fetch handler.
|
|
79
|
+
* If a fallback Node middleware is provided, non-hatk routes are sent to it
|
|
80
|
+
* (e.g. SvelteKit's handler from build/handler.js).
|
|
81
|
+
*/
|
|
82
|
+
export function serve(handler, port, base, fallback) {
|
|
83
|
+
const origin = base || `http://localhost:${port}`;
|
|
84
|
+
const server = createServer(async (req, res) => {
|
|
85
|
+
try {
|
|
86
|
+
const url = new URL(req.url, origin);
|
|
87
|
+
// If we have a fallback (e.g. SvelteKit) and this isn't a hatk route, skip hatk
|
|
88
|
+
if (fallback && !isHatkRoute(url.pathname)) {
|
|
89
|
+
fallback(req, res, () => {
|
|
90
|
+
res.writeHead(404);
|
|
91
|
+
res.end('Not found');
|
|
92
|
+
});
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
const request = toRequest(req, origin);
|
|
96
|
+
const response = await handler(request);
|
|
97
|
+
await sendResponse(res, response);
|
|
98
|
+
}
|
|
99
|
+
catch (err) {
|
|
100
|
+
if (!res.headersSent) {
|
|
101
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
102
|
+
}
|
|
103
|
+
res.end(JSON.stringify({ error: err.message }));
|
|
104
|
+
}
|
|
105
|
+
});
|
|
106
|
+
server.listen(port);
|
|
107
|
+
return server;
|
|
108
|
+
}
|
package/dist/backfill.d.ts
CHANGED
|
@@ -7,7 +7,7 @@ interface BackfillOpts {
|
|
|
7
7
|
plcUrl: string;
|
|
8
8
|
/** AT Protocol collection NSIDs to index (e.g. `app.bsky.feed.post`). */
|
|
9
9
|
collections: Set<string>;
|
|
10
|
-
/** Backfill behavior settings from `config.
|
|
10
|
+
/** Backfill behavior settings from `hatk.config.ts`. */
|
|
11
11
|
config: BackfillConfig;
|
|
12
12
|
}
|
|
13
13
|
/**
|
|
@@ -65,6 +65,6 @@ export declare function backfillRepo(did: string, collections: Set<string>, fetc
|
|
|
65
65
|
* })
|
|
66
66
|
* ```
|
|
67
67
|
*/
|
|
68
|
-
export declare function runBackfill(opts: BackfillOpts): Promise<
|
|
68
|
+
export declare function runBackfill(opts: BackfillOpts): Promise<number>;
|
|
69
69
|
export {};
|
|
70
70
|
//# sourceMappingURL=backfill.d.ts.map
|
package/dist/backfill.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAiBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAIjD,6CAA6C;AAC7C,UAAU,YAAY;IACpB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,8FAA8F;IAC9F,MAAM,EAAE,MAAM,CAAA;IACd,yEAAyE;IACzE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,wDAAwD;IACxD,MAAM,EAAE,cAAc,CAAA;CACvB;AA+FD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAkK/G;AA8BD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAkIrE"}
|
package/dist/backfill.js
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { parseCarStream } from "./car.js";
|
|
2
2
|
import { cborDecode } from "./cbor.js";
|
|
3
3
|
import { walkMst } from "./mst.js";
|
|
4
|
-
import { setRepoStatus, getRepoStatus, getRepoRetryInfo, listRetryEligibleRepos, listPendingRepos, querySQL, runSQL, getSchema, bulkInsertRecords, } from "./db.js";
|
|
4
|
+
import { setRepoStatus, getRepoStatus, getRepoRev, getRepoRetryInfo, listRetryEligibleRepos, listPendingRepos, querySQL, runSQL, getSchema, bulkInsertRecords, } from "./database/db.js";
|
|
5
5
|
import { emit, timer } from "./logger.js";
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
import { validateRecord } from '@bigmoves/lexicon';
|
|
7
|
+
import { getLexiconArray } from "./database/schema.js";
|
|
8
8
|
let plcUrl;
|
|
9
9
|
/**
|
|
10
10
|
* Resolves a DID to its PDS endpoint and handle by fetching the DID document.
|
|
11
11
|
*
|
|
12
12
|
* Supports both `did:web` (fetches `/.well-known/did.json`) and `did:plc`
|
|
13
|
-
* (fetches from the PLC directory).
|
|
13
|
+
* (fetches from the PLC directory). Always fetches fresh — DID docs change
|
|
14
|
+
* (handle renames, PDS migrations) and a stale cache silently rewrites stale
|
|
15
|
+
* handles back into `_repos` on every backfill.
|
|
14
16
|
*
|
|
15
17
|
* @example
|
|
16
18
|
* ```ts
|
|
@@ -20,9 +22,6 @@ let plcUrl;
|
|
|
20
22
|
* ```
|
|
21
23
|
*/
|
|
22
24
|
async function resolvePds(did) {
|
|
23
|
-
const cached = pdsCache.get(did);
|
|
24
|
-
if (cached)
|
|
25
|
-
return cached;
|
|
26
25
|
let didDoc;
|
|
27
26
|
if (did.startsWith('did:web:')) {
|
|
28
27
|
const domain = did.slice('did:web:'.length);
|
|
@@ -40,12 +39,10 @@ async function resolvePds(did) {
|
|
|
40
39
|
const pds = didDoc.service?.find((s) => s.id === '#atproto_pds')?.serviceEndpoint;
|
|
41
40
|
if (!pds)
|
|
42
41
|
throw new Error(`No PDS endpoint in DID document for ${did}`);
|
|
43
|
-
//
|
|
42
|
+
// First at:// entry in alsoKnownAs is the canonical handle (per @atproto/identity convention)
|
|
44
43
|
const aka = didDoc.alsoKnownAs?.find((u) => u.startsWith('at://'));
|
|
45
44
|
const handle = aka ? aka.slice('at://'.length) : null;
|
|
46
|
-
|
|
47
|
-
pdsCache.set(did, result);
|
|
48
|
-
return result;
|
|
45
|
+
return { pds, handle };
|
|
49
46
|
}
|
|
50
47
|
/**
|
|
51
48
|
* Paginates through all active repos on a relay/PDS using `com.atproto.sync.listRepos`.
|
|
@@ -128,6 +125,7 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
128
125
|
let error;
|
|
129
126
|
let resolvedPds;
|
|
130
127
|
let resolvedHandle = null;
|
|
128
|
+
let resolvedSince = null;
|
|
131
129
|
let retryCount;
|
|
132
130
|
let retryAfter;
|
|
133
131
|
const controller = new AbortController();
|
|
@@ -137,26 +135,67 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
137
135
|
resolvedPds = pdsUrl;
|
|
138
136
|
resolvedHandle = handle;
|
|
139
137
|
timeout = setTimeout(() => controller.abort(), fetchTimeout * 1000);
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
});
|
|
138
|
+
let lastRev = await getRepoRev(did);
|
|
139
|
+
const baseUrl = `${resolvedPds}/xrpc/com.atproto.sync.getRepo?did=${encodeURIComponent(did)}`;
|
|
140
|
+
let repoUrl = lastRev ? `${baseUrl}&since=${encodeURIComponent(lastRev)}` : baseUrl;
|
|
141
|
+
let res = await fetch(repoUrl, { signal: controller.signal });
|
|
142
|
+
// If the PDS rejected our `since` rev (compacted history), fall back to full import
|
|
143
|
+
if (res.status === 400 && lastRev) {
|
|
144
|
+
lastRev = null;
|
|
145
|
+
res = await fetch(baseUrl, { signal: controller.signal });
|
|
146
|
+
}
|
|
143
147
|
if (!res.ok) {
|
|
144
148
|
const httpErr = new Error(`getRepo failed for ${did}: ${res.status}`);
|
|
145
149
|
httpErr.httpStatus = res.status;
|
|
146
150
|
throw httpErr;
|
|
147
151
|
}
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
//
|
|
153
|
-
|
|
152
|
+
resolvedSince = lastRev;
|
|
153
|
+
let { roots, blocks, byteLength } = await parseCarStream(res.body);
|
|
154
|
+
carSizeBytes = byteLength;
|
|
155
|
+
// Decode commit to get MST root — if the diff CAR is missing the root block,
|
|
156
|
+
// fall back to a full import (the PDS compacted past our `since` rev)
|
|
157
|
+
let rootData = blocks.get(roots[0]);
|
|
158
|
+
if (!rootData && lastRev) {
|
|
159
|
+
lastRev = null;
|
|
160
|
+
resolvedSince = null;
|
|
161
|
+
res = await fetch(baseUrl, { signal: controller.signal });
|
|
162
|
+
if (!res.ok) {
|
|
163
|
+
const httpErr = new Error(`getRepo failed for ${did}: ${res.status}`);
|
|
164
|
+
httpErr.httpStatus = res.status;
|
|
165
|
+
throw httpErr;
|
|
166
|
+
}
|
|
167
|
+
;
|
|
168
|
+
({ roots, blocks, byteLength } = await parseCarStream(res.body));
|
|
169
|
+
carSizeBytes = byteLength;
|
|
170
|
+
rootData = blocks.get(roots[0]);
|
|
171
|
+
}
|
|
154
172
|
if (!rootData)
|
|
155
173
|
throw new Error(`No root block for ${did}`);
|
|
156
174
|
const { value: commit } = cborDecode(rootData);
|
|
157
175
|
// Walk MST to find all record paths
|
|
158
176
|
const entries = walkMst(blocks, commit.data.$link);
|
|
159
|
-
|
|
177
|
+
// Delete existing records for this DID before re-importing so deletions are reflected
|
|
178
|
+
// Only on full imports (no since) — diff CARs only contain changes
|
|
179
|
+
if (!lastRev) {
|
|
180
|
+
for (const col of collections) {
|
|
181
|
+
const schema = getSchema(col);
|
|
182
|
+
if (!schema)
|
|
183
|
+
continue;
|
|
184
|
+
await runSQL(`DELETE FROM ${schema.tableName} WHERE did = $1`, [did]);
|
|
185
|
+
for (const child of schema.children) {
|
|
186
|
+
await runSQL(`DELETE FROM ${child.tableName} WHERE parent_did = $1`, [did]);
|
|
187
|
+
}
|
|
188
|
+
for (const union of schema.unions) {
|
|
189
|
+
for (const branch of union.branches) {
|
|
190
|
+
await runSQL(`DELETE FROM ${branch.tableName} WHERE parent_did = $1`, [did]);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
// Insert records in chunks to limit memory usage
|
|
196
|
+
const CHUNK_SIZE = 1000;
|
|
197
|
+
let chunk = [];
|
|
198
|
+
const validationSkips = {};
|
|
160
199
|
for (const entry of entries) {
|
|
161
200
|
const collection = entry.path.split('/')[0];
|
|
162
201
|
if (!collections.has(collection))
|
|
@@ -164,13 +203,23 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
164
203
|
const blockData = blocks.get(entry.cid);
|
|
165
204
|
if (!blockData)
|
|
166
205
|
continue;
|
|
206
|
+
blocks.delete(entry.cid); // free block data as we go
|
|
167
207
|
try {
|
|
168
208
|
const { value: record } = cborDecode(blockData);
|
|
169
209
|
if (!record?.$type)
|
|
170
210
|
continue;
|
|
171
211
|
const rkey = entry.path.split('/').slice(1).join('/');
|
|
172
212
|
const uri = `at://${did}/${collection}/${rkey}`;
|
|
173
|
-
|
|
213
|
+
const validationError = validateRecord(getLexiconArray(), collection, record);
|
|
214
|
+
if (validationError) {
|
|
215
|
+
validationSkips[collection] = (validationSkips[collection] || 0) + 1;
|
|
216
|
+
continue;
|
|
217
|
+
}
|
|
218
|
+
chunk.push({ collection, uri, cid: entry.cid, did, record });
|
|
219
|
+
if (chunk.length >= CHUNK_SIZE) {
|
|
220
|
+
count += await bulkInsertRecords(chunk);
|
|
221
|
+
chunk = [];
|
|
222
|
+
}
|
|
174
223
|
}
|
|
175
224
|
catch (recordErr) {
|
|
176
225
|
emit('backfill', 'record_error', {
|
|
@@ -181,23 +230,13 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
181
230
|
});
|
|
182
231
|
}
|
|
183
232
|
}
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
await runSQL(`DELETE FROM ${schema.tableName} WHERE did = $1`, did);
|
|
191
|
-
for (const child of schema.children) {
|
|
192
|
-
await runSQL(`DELETE FROM ${child.tableName} WHERE parent_did = $1`, did);
|
|
193
|
-
}
|
|
194
|
-
for (const union of schema.unions) {
|
|
195
|
-
for (const branch of union.branches) {
|
|
196
|
-
await runSQL(`DELETE FROM ${branch.tableName} WHERE parent_did = $1`, did);
|
|
197
|
-
}
|
|
198
|
-
}
|
|
233
|
+
if (chunk.length > 0) {
|
|
234
|
+
count += await bulkInsertRecords(chunk);
|
|
235
|
+
}
|
|
236
|
+
const totalSkips = Object.values(validationSkips).reduce((a, b) => a + b, 0);
|
|
237
|
+
if (totalSkips > 0) {
|
|
238
|
+
emit('backfill', 'validation_skips', { did, total: totalSkips, by_collection: validationSkips });
|
|
199
239
|
}
|
|
200
|
-
count = await bulkInsertRecords(bulk);
|
|
201
240
|
await setRepoStatus(did, 'active', commit.rev, { handle });
|
|
202
241
|
return count;
|
|
203
242
|
}
|
|
@@ -229,6 +268,8 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
229
268
|
error,
|
|
230
269
|
pds_url: resolvedPds,
|
|
231
270
|
car_size_bytes: carSizeBytes,
|
|
271
|
+
import_mode: carSizeBytes !== undefined ? (resolvedSince ? 'diff' : 'full') : undefined,
|
|
272
|
+
since_rev: resolvedSince,
|
|
232
273
|
retry_count: retryCount,
|
|
233
274
|
retry_after: retryAfter,
|
|
234
275
|
permanent_failure: retryCount === 999 ? true : undefined,
|
|
@@ -354,7 +395,7 @@ export async function runBackfill(opts) {
|
|
|
354
395
|
parallelism: config.parallelism,
|
|
355
396
|
status: 'success',
|
|
356
397
|
});
|
|
357
|
-
return;
|
|
398
|
+
return 0;
|
|
358
399
|
}
|
|
359
400
|
// 3. Backfill with worker pool
|
|
360
401
|
let totalRecords = 0;
|
|
@@ -378,7 +419,7 @@ export async function runBackfill(opts) {
|
|
|
378
419
|
retryRound++;
|
|
379
420
|
// Wait until the earliest retry_after has passed
|
|
380
421
|
const now = Math.floor(Date.now() / 1000);
|
|
381
|
-
const rows = await querySQL(`SELECT MIN(retry_after) as earliest FROM _repos WHERE status = 'failed' AND retry_after > $1 AND retry_count < $2`, [now, maxRetries]);
|
|
422
|
+
const rows = (await querySQL(`SELECT MIN(retry_after) as earliest FROM _repos WHERE status = 'failed' AND retry_after > $1 AND retry_count < $2`, [now, maxRetries]));
|
|
382
423
|
const earliest = rows[0]?.earliest ? Number(rows[0].earliest) : 0;
|
|
383
424
|
if (earliest > now) {
|
|
384
425
|
await new Promise((resolve) => setTimeout(resolve, (earliest - now) * 1000));
|
|
@@ -412,4 +453,5 @@ export async function runBackfill(opts) {
|
|
|
412
453
|
retry_rounds: retryRound,
|
|
413
454
|
status: failedCount > 0 ? 'partial' : 'success',
|
|
414
455
|
});
|
|
456
|
+
return totalRecords;
|
|
415
457
|
}
|
package/dist/car.d.ts
CHANGED
|
@@ -12,20 +12,52 @@
|
|
|
12
12
|
* @module
|
|
13
13
|
*/
|
|
14
14
|
/**
|
|
15
|
-
*
|
|
15
|
+
* A memory-efficient block map that stores byte offsets into the original CAR
|
|
16
|
+
* buffer instead of copying block data. Implements the same `get`/`delete`/`size`
|
|
17
|
+
* interface as `Map<string, Uint8Array>` so it can be used as a drop-in replacement.
|
|
18
|
+
*/
|
|
19
|
+
export declare class LazyBlockMap {
|
|
20
|
+
private offsets;
|
|
21
|
+
private carBytes;
|
|
22
|
+
constructor(carBytes: Uint8Array, offsets: Map<string, [number, number]>);
|
|
23
|
+
get(cid: string): Uint8Array | undefined;
|
|
24
|
+
delete(cid: string): boolean;
|
|
25
|
+
get size(): number;
|
|
26
|
+
[Symbol.iterator](): IterableIterator<[string, Uint8Array]>;
|
|
27
|
+
/** Release the underlying CAR buffer */
|
|
28
|
+
free(): void;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Parses a CARv1 stream incrementally from a `ReadableStream`.
|
|
32
|
+
*
|
|
33
|
+
* Instead of buffering the entire CAR into a single ArrayBuffer, this reads
|
|
34
|
+
* chunks from the stream and parses blocks as they arrive. Each block's data
|
|
35
|
+
* is `.slice()`d into its own small `Uint8Array`, allowing V8 to GC individual
|
|
36
|
+
* blocks as they're consumed during the MST walk.
|
|
37
|
+
*
|
|
38
|
+
* This is critical for backfill where multiple workers download 30-90MB CARs
|
|
39
|
+
* concurrently — buffered downloads cause OOMs because `ArrayBuffer` memory
|
|
40
|
+
* is "external" to V8's heap and not controlled by `--max-old-space-size`.
|
|
41
|
+
*
|
|
42
|
+
* @param body - The response body stream (e.g. `res.body` from `fetch()`)
|
|
43
|
+
* @returns `roots` — root CID strings; `blocks` — map of CID → block data; `byteLength` — total bytes read
|
|
44
|
+
*/
|
|
45
|
+
export declare function parseCarStream(body: ReadableStream<Uint8Array>): Promise<{
|
|
46
|
+
roots: string[];
|
|
47
|
+
blocks: Map<string, Uint8Array>;
|
|
48
|
+
byteLength: number;
|
|
49
|
+
}>;
|
|
50
|
+
/**
|
|
51
|
+
* Parses a CARv1 binary frame into its root CIDs and a lazy block map.
|
|
52
|
+
*
|
|
53
|
+
* The block map stores byte offsets into `carBytes` rather than copying data,
|
|
54
|
+
* reducing heap usage from O(total block bytes) to O(number of blocks * 16 bytes).
|
|
16
55
|
*
|
|
17
56
|
* @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
|
|
18
|
-
* @returns `roots` — ordered list of root CID strings; `blocks` —
|
|
19
|
-
*
|
|
20
|
-
* @example
|
|
21
|
-
* ```ts
|
|
22
|
-
* const car = new Uint8Array(await res.arrayBuffer())
|
|
23
|
-
* const { roots, blocks } = parseCarFrame(car)
|
|
24
|
-
* const commitData = blocks.get(roots[0])
|
|
25
|
-
* ```
|
|
57
|
+
* @returns `roots` — ordered list of root CID strings; `blocks` — lazy block map
|
|
26
58
|
*/
|
|
27
59
|
export declare function parseCarFrame(carBytes: Uint8Array): {
|
|
28
60
|
roots: string[];
|
|
29
|
-
blocks:
|
|
61
|
+
blocks: LazyBlockMap;
|
|
30
62
|
};
|
|
31
63
|
//# sourceMappingURL=car.d.ts.map
|
package/dist/car.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAuCH
|
|
1
|
+
{"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAuCH;;;;GAIG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,OAAO,CAA+B;IAC9C,OAAO,CAAC,QAAQ,CAAmB;gBAEvB,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAKxE,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS;IAMxC,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAI5B,IAAI,IAAI,IAAI,MAAM,CAEjB;IAEA,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,gBAAgB,CAAC,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IAO5D,wCAAwC;IACxC,IAAI,IAAI,IAAI;CAIb;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,cAAc,CAAC,IAAI,EAAE,cAAc,CAAC,UAAU,CAAC,GAAG,OAAO,CAAC;IAC9E,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,CAAA;IAC/B,UAAU,EAAE,MAAM,CAAA;CACnB,CAAC,CAsGD;AAED;;;;;;;;GAQG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,UAAU,GAAG;IACnD,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,YAAY,CAAA;CACrB,CAiCA"}
|
package/dist/car.js
CHANGED
|
@@ -42,17 +42,158 @@ function parseCidFromBytes(bytes, offset) {
|
|
|
42
42
|
return [bytes.slice(offset, pos), pos];
|
|
43
43
|
}
|
|
44
44
|
/**
|
|
45
|
-
*
|
|
45
|
+
* A memory-efficient block map that stores byte offsets into the original CAR
|
|
46
|
+
* buffer instead of copying block data. Implements the same `get`/`delete`/`size`
|
|
47
|
+
* interface as `Map<string, Uint8Array>` so it can be used as a drop-in replacement.
|
|
48
|
+
*/
|
|
49
|
+
export class LazyBlockMap {
|
|
50
|
+
offsets;
|
|
51
|
+
carBytes;
|
|
52
|
+
constructor(carBytes, offsets) {
|
|
53
|
+
this.carBytes = carBytes;
|
|
54
|
+
this.offsets = offsets;
|
|
55
|
+
}
|
|
56
|
+
get(cid) {
|
|
57
|
+
const range = this.offsets.get(cid);
|
|
58
|
+
if (!range || !this.carBytes)
|
|
59
|
+
return undefined;
|
|
60
|
+
return this.carBytes.subarray(range[0], range[1]);
|
|
61
|
+
}
|
|
62
|
+
delete(cid) {
|
|
63
|
+
return this.offsets.delete(cid);
|
|
64
|
+
}
|
|
65
|
+
get size() {
|
|
66
|
+
return this.offsets.size;
|
|
67
|
+
}
|
|
68
|
+
*[Symbol.iterator]() {
|
|
69
|
+
for (const [cid, range] of this.offsets) {
|
|
70
|
+
if (!this.carBytes)
|
|
71
|
+
return;
|
|
72
|
+
yield [cid, this.carBytes.subarray(range[0], range[1])];
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/** Release the underlying CAR buffer */
|
|
76
|
+
free() {
|
|
77
|
+
this.carBytes = null;
|
|
78
|
+
this.offsets.clear();
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Parses a CARv1 stream incrementally from a `ReadableStream`.
|
|
46
83
|
*
|
|
47
|
-
*
|
|
48
|
-
*
|
|
84
|
+
* Instead of buffering the entire CAR into a single ArrayBuffer, this reads
|
|
85
|
+
* chunks from the stream and parses blocks as they arrive. Each block's data
|
|
86
|
+
* is `.slice()`d into its own small `Uint8Array`, allowing V8 to GC individual
|
|
87
|
+
* blocks as they're consumed during the MST walk.
|
|
49
88
|
*
|
|
50
|
-
*
|
|
51
|
-
*
|
|
52
|
-
*
|
|
53
|
-
*
|
|
54
|
-
*
|
|
55
|
-
*
|
|
89
|
+
* This is critical for backfill where multiple workers download 30-90MB CARs
|
|
90
|
+
* concurrently — buffered downloads cause OOMs because `ArrayBuffer` memory
|
|
91
|
+
* is "external" to V8's heap and not controlled by `--max-old-space-size`.
|
|
92
|
+
*
|
|
93
|
+
* @param body - The response body stream (e.g. `res.body` from `fetch()`)
|
|
94
|
+
* @returns `roots` — root CID strings; `blocks` — map of CID → block data; `byteLength` — total bytes read
|
|
95
|
+
*/
|
|
96
|
+
export async function parseCarStream(body) {
|
|
97
|
+
const reader = body.getReader();
|
|
98
|
+
// Growable buffer with position tracking. We reuse a single allocation and
|
|
99
|
+
// compact (shift data to front) when the read position passes the midpoint,
|
|
100
|
+
// avoiding per-chunk allocations and subarray references that pin old memory.
|
|
101
|
+
let buf = new Uint8Array(64 * 1024);
|
|
102
|
+
let pos = 0; // read cursor
|
|
103
|
+
let len = 0; // bytes of valid data in buf
|
|
104
|
+
let byteLength = 0;
|
|
105
|
+
// Ensure at least `need` bytes are available at buf[pos..pos+need)
|
|
106
|
+
async function fill(need) {
|
|
107
|
+
while (len - pos < need) {
|
|
108
|
+
const { done, value } = await reader.read();
|
|
109
|
+
if (done)
|
|
110
|
+
return len - pos >= need;
|
|
111
|
+
byteLength += value.length;
|
|
112
|
+
// Compact: shift remaining data to front when read cursor passes midpoint
|
|
113
|
+
if (pos > 0 && pos > buf.length >>> 1) {
|
|
114
|
+
buf.copyWithin(0, pos, len);
|
|
115
|
+
len -= pos;
|
|
116
|
+
pos = 0;
|
|
117
|
+
}
|
|
118
|
+
// Grow if needed
|
|
119
|
+
const required = len + value.length;
|
|
120
|
+
if (required > buf.length) {
|
|
121
|
+
const newBuf = new Uint8Array(Math.max(required, buf.length * 2));
|
|
122
|
+
newBuf.set(buf.subarray(0, len));
|
|
123
|
+
buf = newBuf;
|
|
124
|
+
}
|
|
125
|
+
buf.set(value, len);
|
|
126
|
+
len += value.length;
|
|
127
|
+
}
|
|
128
|
+
return true;
|
|
129
|
+
}
|
|
130
|
+
function consume(n) {
|
|
131
|
+
pos += n;
|
|
132
|
+
}
|
|
133
|
+
// Read a varint starting at buf[pos]
|
|
134
|
+
function readVarintFromBuf() {
|
|
135
|
+
let value = 0;
|
|
136
|
+
let shift = 0;
|
|
137
|
+
let p = pos;
|
|
138
|
+
while (p < len) {
|
|
139
|
+
const byte = buf[p++];
|
|
140
|
+
value |= (byte & 0x7f) << shift;
|
|
141
|
+
if ((byte & 0x80) === 0)
|
|
142
|
+
return [value, p - pos];
|
|
143
|
+
shift += 7;
|
|
144
|
+
if (shift > 35)
|
|
145
|
+
throw new Error('Varint too long');
|
|
146
|
+
}
|
|
147
|
+
throw new Error('Unexpected end of varint');
|
|
148
|
+
}
|
|
149
|
+
// Parse header: varint(headerLen) + CBOR(header)
|
|
150
|
+
if (!(await fill(1)))
|
|
151
|
+
throw new Error('Empty CAR stream');
|
|
152
|
+
// Prefetch up to 10 bytes for the varint; readVarintFromBuf bounds to `len`
|
|
153
|
+
await fill(10);
|
|
154
|
+
const [headerLen, headerVarintSize] = readVarintFromBuf();
|
|
155
|
+
consume(headerVarintSize);
|
|
156
|
+
if (!(await fill(headerLen)))
|
|
157
|
+
throw new Error('Truncated CAR header');
|
|
158
|
+
// .slice() copies out of the reusable buffer
|
|
159
|
+
const headerSlice = buf.slice(pos, pos + headerLen);
|
|
160
|
+
const { value: header } = cborDecode(headerSlice);
|
|
161
|
+
consume(headerLen);
|
|
162
|
+
const roots = (header.roots || []).map((root) => root?.$link ?? cidToString(root));
|
|
163
|
+
// Parse blocks
|
|
164
|
+
const blocks = new Map();
|
|
165
|
+
while (true) {
|
|
166
|
+
if (!(await fill(1)))
|
|
167
|
+
break;
|
|
168
|
+
// Prefetch up to 10 bytes for the varint; readVarintFromBuf bounds to `len`
|
|
169
|
+
await fill(10);
|
|
170
|
+
const [blockLen, blockVarintSize] = readVarintFromBuf();
|
|
171
|
+
consume(blockVarintSize);
|
|
172
|
+
if (blockLen === 0)
|
|
173
|
+
break;
|
|
174
|
+
if (!(await fill(blockLen)))
|
|
175
|
+
throw new Error('Truncated CAR block');
|
|
176
|
+
const [cidBytes, afterCid] = parseCidFromBytes(buf, pos);
|
|
177
|
+
const cid = cidToString(cidBytes);
|
|
178
|
+
const cidLen = afterCid - pos;
|
|
179
|
+
// .slice() creates an independent copy — the buffer can be reused
|
|
180
|
+
const data = buf.slice(afterCid, afterCid + blockLen - cidLen);
|
|
181
|
+
blocks.set(cid, data);
|
|
182
|
+
consume(blockLen);
|
|
183
|
+
}
|
|
184
|
+
reader.releaseLock();
|
|
185
|
+
// Release the internal buffer
|
|
186
|
+
buf = null;
|
|
187
|
+
return { roots, blocks, byteLength };
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Parses a CARv1 binary frame into its root CIDs and a lazy block map.
|
|
191
|
+
*
|
|
192
|
+
* The block map stores byte offsets into `carBytes` rather than copying data,
|
|
193
|
+
* reducing heap usage from O(total block bytes) to O(number of blocks * 16 bytes).
|
|
194
|
+
*
|
|
195
|
+
* @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
|
|
196
|
+
* @returns `roots` — ordered list of root CID strings; `blocks` — lazy block map
|
|
56
197
|
*/
|
|
57
198
|
export function parseCarFrame(carBytes) {
|
|
58
199
|
let offset = 0;
|
|
@@ -66,8 +207,8 @@ export function parseCarFrame(carBytes) {
|
|
|
66
207
|
// Our CBOR decoder converts tag-42 CIDs to { $link: "b..." } objects,
|
|
67
208
|
// so roots may already be decoded strings
|
|
68
209
|
const roots = (header.roots || []).map((root) => root?.$link ?? cidToString(root));
|
|
69
|
-
//
|
|
70
|
-
const
|
|
210
|
+
// Build offset index: CID → [start, end] into carBytes
|
|
211
|
+
const offsets = new Map();
|
|
71
212
|
while (offset < carBytes.length) {
|
|
72
213
|
const [blockLen, afterBlockLen] = readVarint(carBytes, offset);
|
|
73
214
|
offset = afterBlockLen;
|
|
@@ -76,9 +217,8 @@ export function parseCarFrame(carBytes) {
|
|
|
76
217
|
const [cidBytes, afterCid] = parseCidFromBytes(carBytes, offset);
|
|
77
218
|
const cid = cidToString(cidBytes);
|
|
78
219
|
const dataLen = blockLen - (afterCid - offset);
|
|
79
|
-
|
|
80
|
-
blocks.set(cid, data);
|
|
220
|
+
offsets.set(cid, [afterCid, afterCid + dataLen]);
|
|
81
221
|
offset = afterCid + dataLen;
|
|
82
222
|
}
|
|
83
|
-
return { roots, blocks };
|
|
223
|
+
return { roots, blocks: new LazyBlockMap(carBytes, offsets) };
|
|
84
224
|
}
|