@hatk/hatk 0.0.1-alpha.8 → 0.0.1-alpha.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/backfill.d.ts.map +1 -1
- package/dist/backfill.js +4 -3
- package/dist/car.d.ts +22 -10
- package/dist/car.d.ts.map +1 -1
- package/dist/car.js +46 -14
- package/dist/config.js +1 -1
- package/dist/mst.d.ts +3 -1
- package/dist/mst.d.ts.map +1 -1
- package/dist/mst.js +6 -8
- package/package.json +1 -1
package/dist/backfill.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjD,6CAA6C;AAC7C,UAAU,YAAY;IACpB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,8FAA8F;IAC9F,MAAM,EAAE,MAAM,CAAA;IACd,yEAAyE;IACzE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,qDAAqD;IACrD,MAAM,EAAE,cAAc,CAAA;CACvB;AAuGD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,
|
|
1
|
+
{"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjD,6CAA6C;AAC7C,UAAU,YAAY;IACpB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,8FAA8F;IAC9F,MAAM,EAAE,MAAM,CAAA;IACd,yEAAyE;IACzE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,qDAAqD;IACrD,MAAM,EAAE,cAAc,CAAA;CACvB;AAuGD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CA4H/G;AAgCD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAiInE"}
|
package/dist/backfill.js
CHANGED
|
@@ -145,10 +145,9 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
145
145
|
httpErr.httpStatus = res.status;
|
|
146
146
|
throw httpErr;
|
|
147
147
|
}
|
|
148
|
-
|
|
148
|
+
const carBytes = new Uint8Array(await res.arrayBuffer());
|
|
149
149
|
carSizeBytes = carBytes.length;
|
|
150
150
|
let { roots, blocks } = parseCarFrame(carBytes);
|
|
151
|
-
carBytes = null; // free CAR bytes before bulk insert
|
|
152
151
|
// Decode commit to get MST root
|
|
153
152
|
const rootData = blocks.get(roots[0]);
|
|
154
153
|
if (!rootData)
|
|
@@ -181,6 +180,7 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
181
180
|
const blockData = blocks.get(entry.cid);
|
|
182
181
|
if (!blockData)
|
|
183
182
|
continue;
|
|
183
|
+
blocks.delete(entry.cid); // free block data as we go
|
|
184
184
|
try {
|
|
185
185
|
const { value: record } = cborDecode(blockData);
|
|
186
186
|
if (!record?.$type)
|
|
@@ -202,7 +202,8 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
202
202
|
});
|
|
203
203
|
}
|
|
204
204
|
}
|
|
205
|
-
blocks
|
|
205
|
+
blocks.free();
|
|
206
|
+
blocks = null;
|
|
206
207
|
if (chunk.length > 0) {
|
|
207
208
|
count += await bulkInsertRecords(chunk);
|
|
208
209
|
}
|
package/dist/car.d.ts
CHANGED
|
@@ -12,20 +12,32 @@
|
|
|
12
12
|
* @module
|
|
13
13
|
*/
|
|
14
14
|
/**
|
|
15
|
-
*
|
|
15
|
+
* A memory-efficient block map that stores byte offsets into the original CAR
|
|
16
|
+
* buffer instead of copying block data. Implements the same `get`/`delete`/`size`
|
|
17
|
+
* interface as `Map<string, Uint8Array>` so it can be used as a drop-in replacement.
|
|
18
|
+
*/
|
|
19
|
+
export declare class LazyBlockMap {
|
|
20
|
+
private offsets;
|
|
21
|
+
private carBytes;
|
|
22
|
+
constructor(carBytes: Uint8Array, offsets: Map<string, [number, number]>);
|
|
23
|
+
get(cid: string): Uint8Array | undefined;
|
|
24
|
+
delete(cid: string): boolean;
|
|
25
|
+
get size(): number;
|
|
26
|
+
[Symbol.iterator](): IterableIterator<[string, Uint8Array]>;
|
|
27
|
+
/** Release the underlying CAR buffer */
|
|
28
|
+
free(): void;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Parses a CARv1 binary frame into its root CIDs and a lazy block map.
|
|
16
32
|
*
|
|
17
|
-
*
|
|
18
|
-
*
|
|
33
|
+
* The block map stores byte offsets into `carBytes` rather than copying data,
|
|
34
|
+
* reducing heap usage from O(total block bytes) to O(number of blocks * 16 bytes).
|
|
19
35
|
*
|
|
20
|
-
* @
|
|
21
|
-
*
|
|
22
|
-
* const car = new Uint8Array(await res.arrayBuffer())
|
|
23
|
-
* const { roots, blocks } = parseCarFrame(car)
|
|
24
|
-
* const commitData = blocks.get(roots[0])
|
|
25
|
-
* ```
|
|
36
|
+
* @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
|
|
37
|
+
* @returns `roots` — ordered list of root CID strings; `blocks` — lazy block map
|
|
26
38
|
*/
|
|
27
39
|
export declare function parseCarFrame(carBytes: Uint8Array): {
|
|
28
40
|
roots: string[];
|
|
29
|
-
blocks:
|
|
41
|
+
blocks: LazyBlockMap;
|
|
30
42
|
};
|
|
31
43
|
//# sourceMappingURL=car.d.ts.map
|
package/dist/car.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAuCH
|
|
1
|
+
{"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAuCH;;;;GAIG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,OAAO,CAA+B;IAC9C,OAAO,CAAC,QAAQ,CAAmB;gBAEvB,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAKxE,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS;IAMxC,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAI5B,IAAI,IAAI,IAAI,MAAM,CAEjB;IAEA,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,gBAAgB,CAAC,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IAO5D,wCAAwC;IACxC,IAAI,IAAI,IAAI;CAIb;AAED;;;;;;;;GAQG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,UAAU,GAAG;IACnD,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,YAAY,CAAA;CACrB,CAiCA"}
|
package/dist/car.js
CHANGED
|
@@ -42,17 +42,50 @@ function parseCidFromBytes(bytes, offset) {
|
|
|
42
42
|
return [bytes.slice(offset, pos), pos];
|
|
43
43
|
}
|
|
44
44
|
/**
|
|
45
|
-
*
|
|
45
|
+
* A memory-efficient block map that stores byte offsets into the original CAR
|
|
46
|
+
* buffer instead of copying block data. Implements the same `get`/`delete`/`size`
|
|
47
|
+
* interface as `Map<string, Uint8Array>` so it can be used as a drop-in replacement.
|
|
48
|
+
*/
|
|
49
|
+
export class LazyBlockMap {
|
|
50
|
+
offsets;
|
|
51
|
+
carBytes;
|
|
52
|
+
constructor(carBytes, offsets) {
|
|
53
|
+
this.carBytes = carBytes;
|
|
54
|
+
this.offsets = offsets;
|
|
55
|
+
}
|
|
56
|
+
get(cid) {
|
|
57
|
+
const range = this.offsets.get(cid);
|
|
58
|
+
if (!range || !this.carBytes)
|
|
59
|
+
return undefined;
|
|
60
|
+
return this.carBytes.subarray(range[0], range[1]);
|
|
61
|
+
}
|
|
62
|
+
delete(cid) {
|
|
63
|
+
return this.offsets.delete(cid);
|
|
64
|
+
}
|
|
65
|
+
get size() {
|
|
66
|
+
return this.offsets.size;
|
|
67
|
+
}
|
|
68
|
+
*[Symbol.iterator]() {
|
|
69
|
+
for (const [cid, range] of this.offsets) {
|
|
70
|
+
if (!this.carBytes)
|
|
71
|
+
return;
|
|
72
|
+
yield [cid, this.carBytes.subarray(range[0], range[1])];
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/** Release the underlying CAR buffer */
|
|
76
|
+
free() {
|
|
77
|
+
this.carBytes = null;
|
|
78
|
+
this.offsets.clear();
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Parses a CARv1 binary frame into its root CIDs and a lazy block map.
|
|
46
83
|
*
|
|
47
|
-
*
|
|
48
|
-
*
|
|
84
|
+
* The block map stores byte offsets into `carBytes` rather than copying data,
|
|
85
|
+
* reducing heap usage from O(total block bytes) to O(number of blocks * 16 bytes).
|
|
49
86
|
*
|
|
50
|
-
* @
|
|
51
|
-
*
|
|
52
|
-
* const car = new Uint8Array(await res.arrayBuffer())
|
|
53
|
-
* const { roots, blocks } = parseCarFrame(car)
|
|
54
|
-
* const commitData = blocks.get(roots[0])
|
|
55
|
-
* ```
|
|
87
|
+
* @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
|
|
88
|
+
* @returns `roots` — ordered list of root CID strings; `blocks` — lazy block map
|
|
56
89
|
*/
|
|
57
90
|
export function parseCarFrame(carBytes) {
|
|
58
91
|
let offset = 0;
|
|
@@ -66,8 +99,8 @@ export function parseCarFrame(carBytes) {
|
|
|
66
99
|
// Our CBOR decoder converts tag-42 CIDs to { $link: "b..." } objects,
|
|
67
100
|
// so roots may already be decoded strings
|
|
68
101
|
const roots = (header.roots || []).map((root) => root?.$link ?? cidToString(root));
|
|
69
|
-
//
|
|
70
|
-
const
|
|
102
|
+
// Build offset index: CID → [start, end] into carBytes
|
|
103
|
+
const offsets = new Map();
|
|
71
104
|
while (offset < carBytes.length) {
|
|
72
105
|
const [blockLen, afterBlockLen] = readVarint(carBytes, offset);
|
|
73
106
|
offset = afterBlockLen;
|
|
@@ -76,9 +109,8 @@ export function parseCarFrame(carBytes) {
|
|
|
76
109
|
const [cidBytes, afterCid] = parseCidFromBytes(carBytes, offset);
|
|
77
110
|
const cid = cidToString(cidBytes);
|
|
78
111
|
const dataLen = blockLen - (afterCid - offset);
|
|
79
|
-
|
|
80
|
-
blocks.set(cid, data);
|
|
112
|
+
offsets.set(cid, [afterCid, afterCid + dataLen]);
|
|
81
113
|
offset = afterCid + dataLen;
|
|
82
114
|
}
|
|
83
|
-
return { roots, blocks };
|
|
115
|
+
return { roots, blocks: new LazyBlockMap(carBytes, offsets) };
|
|
84
116
|
}
|
package/dist/config.js
CHANGED
|
@@ -23,7 +23,7 @@ export function loadConfig(configPath) {
|
|
|
23
23
|
signalCollections: backfillRaw.signalCollections || undefined,
|
|
24
24
|
repos: env.BACKFILL_REPOS ? env.BACKFILL_REPOS.split(',').map((s) => s.trim()) : backfillRaw.repos || undefined,
|
|
25
25
|
fullNetwork: env.BACKFILL_FULL_NETWORK ? env.BACKFILL_FULL_NETWORK === 'true' : backfillRaw.fullNetwork || false,
|
|
26
|
-
parallelism: parseInt(env.BACKFILL_PARALLELISM || '') || backfillRaw.parallelism ||
|
|
26
|
+
parallelism: parseInt(env.BACKFILL_PARALLELISM || '') || backfillRaw.parallelism || 3,
|
|
27
27
|
fetchTimeout: parseInt(env.BACKFILL_FETCH_TIMEOUT || '') || backfillRaw.fetchTimeout || 300,
|
|
28
28
|
maxRetries: parseInt(env.BACKFILL_MAX_RETRIES || '') || backfillRaw.maxRetries || 5,
|
|
29
29
|
},
|
package/dist/mst.d.ts
CHANGED
|
@@ -2,5 +2,7 @@ export interface MstEntry {
|
|
|
2
2
|
path: string;
|
|
3
3
|
cid: string;
|
|
4
4
|
}
|
|
5
|
-
export declare function walkMst(blocks:
|
|
5
|
+
export declare function walkMst(blocks: {
|
|
6
|
+
get(cid: string): Uint8Array | undefined;
|
|
7
|
+
}, rootCid: string): Generator<MstEntry>;
|
|
6
8
|
//# sourceMappingURL=mst.d.ts.map
|
package/dist/mst.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mst.d.ts","sourceRoot":"","sources":["../src/mst.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,MAAM,CAAA;CACZ;AAED,
|
|
1
|
+
{"version":3,"file":"mst.d.ts","sourceRoot":"","sources":["../src/mst.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,MAAM,CAAA;CACZ;AAED,wBAAiB,OAAO,CAAC,MAAM,EAAE;IAAE,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS,CAAA;CAAE,EAAE,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,CA8BnH"}
|
package/dist/mst.js
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
import { cborDecode } from "./cbor.js";
|
|
2
|
-
export function walkMst(blocks, rootCid) {
|
|
3
|
-
|
|
4
|
-
function visit(cid, prefix) {
|
|
2
|
+
export function* walkMst(blocks, rootCid) {
|
|
3
|
+
function* visit(cid, prefix) {
|
|
5
4
|
const data = blocks.get(cid);
|
|
6
5
|
if (!data)
|
|
7
6
|
return prefix;
|
|
8
7
|
const { value: node } = cborDecode(data);
|
|
9
8
|
// Visit left subtree
|
|
10
9
|
if (node.l?.$link)
|
|
11
|
-
visit(node.l.$link, prefix);
|
|
10
|
+
yield* visit(node.l.$link, prefix);
|
|
12
11
|
let lastKey = prefix;
|
|
13
12
|
for (const entry of node.e || []) {
|
|
14
13
|
const keySuffix = entry.k instanceof Uint8Array ? new TextDecoder().decode(entry.k) : entry.k;
|
|
@@ -16,15 +15,14 @@ export function walkMst(blocks, rootCid) {
|
|
|
16
15
|
const fullKey = lastKey.substring(0, prefixLen) + keySuffix;
|
|
17
16
|
lastKey = fullKey;
|
|
18
17
|
if (entry.v?.$link) {
|
|
19
|
-
|
|
18
|
+
yield { path: fullKey, cid: entry.v.$link };
|
|
20
19
|
}
|
|
21
20
|
// Visit right subtree
|
|
22
21
|
if (entry.t?.$link) {
|
|
23
|
-
visit(entry.t.$link, lastKey);
|
|
22
|
+
yield* visit(entry.t.$link, lastKey);
|
|
24
23
|
}
|
|
25
24
|
}
|
|
26
25
|
return lastKey;
|
|
27
26
|
}
|
|
28
|
-
visit(rootCid, '');
|
|
29
|
-
return entries;
|
|
27
|
+
yield* visit(rootCid, '');
|
|
30
28
|
}
|