@hatk/hatk 0.0.1-alpha.7 → 0.0.1-alpha.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/backfill.d.ts.map +1 -1
- package/dist/backfill.js +29 -20
- package/dist/car.d.ts +22 -10
- package/dist/car.d.ts.map +1 -1
- package/dist/car.js +46 -14
- package/dist/config.js +1 -1
- package/dist/mst.d.ts +3 -1
- package/dist/mst.d.ts.map +1 -1
- package/dist/mst.js +6 -8
- package/package.json +1 -1
package/dist/backfill.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjD,6CAA6C;AAC7C,UAAU,YAAY;IACpB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,8FAA8F;IAC9F,MAAM,EAAE,MAAM,CAAA;IACd,yEAAyE;IACzE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,qDAAqD;IACrD,MAAM,EAAE,cAAc,CAAA;CACvB;AAuGD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,
|
|
1
|
+
{"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjD,6CAA6C;AAC7C,UAAU,YAAY;IACpB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,8FAA8F;IAC9F,MAAM,EAAE,MAAM,CAAA;IACd,yEAAyE;IACzE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,qDAAqD;IACrD,MAAM,EAAE,cAAc,CAAA;CACvB;AAuGD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CA4H/G;AAgCD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAiInE"}
|
package/dist/backfill.js
CHANGED
|
@@ -145,10 +145,9 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
145
145
|
httpErr.httpStatus = res.status;
|
|
146
146
|
throw httpErr;
|
|
147
147
|
}
|
|
148
|
-
|
|
148
|
+
const carBytes = new Uint8Array(await res.arrayBuffer());
|
|
149
149
|
carSizeBytes = carBytes.length;
|
|
150
150
|
let { roots, blocks } = parseCarFrame(carBytes);
|
|
151
|
-
carBytes = null; // free CAR bytes before bulk insert
|
|
152
151
|
// Decode commit to get MST root
|
|
153
152
|
const rootData = blocks.get(roots[0]);
|
|
154
153
|
if (!rootData)
|
|
@@ -156,7 +155,24 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
156
155
|
const { value: commit } = cborDecode(rootData);
|
|
157
156
|
// Walk MST to find all record paths
|
|
158
157
|
const entries = walkMst(blocks, commit.data.$link);
|
|
159
|
-
|
|
158
|
+
// Delete existing records for this DID before re-importing so deletions are reflected
|
|
159
|
+
for (const col of collections) {
|
|
160
|
+
const schema = getSchema(col);
|
|
161
|
+
if (!schema)
|
|
162
|
+
continue;
|
|
163
|
+
await runSQL(`DELETE FROM ${schema.tableName} WHERE did = $1`, did);
|
|
164
|
+
for (const child of schema.children) {
|
|
165
|
+
await runSQL(`DELETE FROM ${child.tableName} WHERE parent_did = $1`, did);
|
|
166
|
+
}
|
|
167
|
+
for (const union of schema.unions) {
|
|
168
|
+
for (const branch of union.branches) {
|
|
169
|
+
await runSQL(`DELETE FROM ${branch.tableName} WHERE parent_did = $1`, did);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
// Insert records in chunks to limit memory usage
|
|
174
|
+
const CHUNK_SIZE = 1000;
|
|
175
|
+
let chunk = [];
|
|
160
176
|
for (const entry of entries) {
|
|
161
177
|
const collection = entry.path.split('/')[0];
|
|
162
178
|
if (!collections.has(collection))
|
|
@@ -164,13 +180,18 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
164
180
|
const blockData = blocks.get(entry.cid);
|
|
165
181
|
if (!blockData)
|
|
166
182
|
continue;
|
|
183
|
+
blocks.delete(entry.cid); // free block data as we go
|
|
167
184
|
try {
|
|
168
185
|
const { value: record } = cborDecode(blockData);
|
|
169
186
|
if (!record?.$type)
|
|
170
187
|
continue;
|
|
171
188
|
const rkey = entry.path.split('/').slice(1).join('/');
|
|
172
189
|
const uri = `at://${did}/${collection}/${rkey}`;
|
|
173
|
-
|
|
190
|
+
chunk.push({ collection, uri, cid: entry.cid, did, record });
|
|
191
|
+
if (chunk.length >= CHUNK_SIZE) {
|
|
192
|
+
count += await bulkInsertRecords(chunk);
|
|
193
|
+
chunk = [];
|
|
194
|
+
}
|
|
174
195
|
}
|
|
175
196
|
catch (recordErr) {
|
|
176
197
|
emit('backfill', 'record_error', {
|
|
@@ -181,23 +202,11 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
181
202
|
});
|
|
182
203
|
}
|
|
183
204
|
}
|
|
184
|
-
blocks
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
if (!schema)
|
|
189
|
-
continue;
|
|
190
|
-
await runSQL(`DELETE FROM ${schema.tableName} WHERE did = $1`, did);
|
|
191
|
-
for (const child of schema.children) {
|
|
192
|
-
await runSQL(`DELETE FROM ${child.tableName} WHERE parent_did = $1`, did);
|
|
193
|
-
}
|
|
194
|
-
for (const union of schema.unions) {
|
|
195
|
-
for (const branch of union.branches) {
|
|
196
|
-
await runSQL(`DELETE FROM ${branch.tableName} WHERE parent_did = $1`, did);
|
|
197
|
-
}
|
|
198
|
-
}
|
|
205
|
+
blocks.free();
|
|
206
|
+
blocks = null;
|
|
207
|
+
if (chunk.length > 0) {
|
|
208
|
+
count += await bulkInsertRecords(chunk);
|
|
199
209
|
}
|
|
200
|
-
count = await bulkInsertRecords(bulk);
|
|
201
210
|
await setRepoStatus(did, 'active', commit.rev, { handle });
|
|
202
211
|
return count;
|
|
203
212
|
}
|
package/dist/car.d.ts
CHANGED
|
@@ -12,20 +12,32 @@
|
|
|
12
12
|
* @module
|
|
13
13
|
*/
|
|
14
14
|
/**
|
|
15
|
-
*
|
|
15
|
+
* A memory-efficient block map that stores byte offsets into the original CAR
|
|
16
|
+
* buffer instead of copying block data. Implements the same `get`/`delete`/`size`
|
|
17
|
+
* interface as `Map<string, Uint8Array>` so it can be used as a drop-in replacement.
|
|
18
|
+
*/
|
|
19
|
+
export declare class LazyBlockMap {
|
|
20
|
+
private offsets;
|
|
21
|
+
private carBytes;
|
|
22
|
+
constructor(carBytes: Uint8Array, offsets: Map<string, [number, number]>);
|
|
23
|
+
get(cid: string): Uint8Array | undefined;
|
|
24
|
+
delete(cid: string): boolean;
|
|
25
|
+
get size(): number;
|
|
26
|
+
[Symbol.iterator](): IterableIterator<[string, Uint8Array]>;
|
|
27
|
+
/** Release the underlying CAR buffer */
|
|
28
|
+
free(): void;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Parses a CARv1 binary frame into its root CIDs and a lazy block map.
|
|
16
32
|
*
|
|
17
|
-
*
|
|
18
|
-
*
|
|
33
|
+
* The block map stores byte offsets into `carBytes` rather than copying data,
|
|
34
|
+
* reducing heap usage from O(total block bytes) to O(number of blocks * 16 bytes).
|
|
19
35
|
*
|
|
20
|
-
* @
|
|
21
|
-
*
|
|
22
|
-
* const car = new Uint8Array(await res.arrayBuffer())
|
|
23
|
-
* const { roots, blocks } = parseCarFrame(car)
|
|
24
|
-
* const commitData = blocks.get(roots[0])
|
|
25
|
-
* ```
|
|
36
|
+
* @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
|
|
37
|
+
* @returns `roots` — ordered list of root CID strings; `blocks` — lazy block map
|
|
26
38
|
*/
|
|
27
39
|
export declare function parseCarFrame(carBytes: Uint8Array): {
|
|
28
40
|
roots: string[];
|
|
29
|
-
blocks:
|
|
41
|
+
blocks: LazyBlockMap;
|
|
30
42
|
};
|
|
31
43
|
//# sourceMappingURL=car.d.ts.map
|
package/dist/car.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAuCH
|
|
1
|
+
{"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAuCH;;;;GAIG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,OAAO,CAA+B;IAC9C,OAAO,CAAC,QAAQ,CAAmB;gBAEvB,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAKxE,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS;IAMxC,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAI5B,IAAI,IAAI,IAAI,MAAM,CAEjB;IAEA,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,gBAAgB,CAAC,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IAO5D,wCAAwC;IACxC,IAAI,IAAI,IAAI;CAIb;AAED;;;;;;;;GAQG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,UAAU,GAAG;IACnD,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,YAAY,CAAA;CACrB,CAiCA"}
|
package/dist/car.js
CHANGED
|
@@ -42,17 +42,50 @@ function parseCidFromBytes(bytes, offset) {
|
|
|
42
42
|
return [bytes.slice(offset, pos), pos];
|
|
43
43
|
}
|
|
44
44
|
/**
|
|
45
|
-
*
|
|
45
|
+
* A memory-efficient block map that stores byte offsets into the original CAR
|
|
46
|
+
* buffer instead of copying block data. Implements the same `get`/`delete`/`size`
|
|
47
|
+
* interface as `Map<string, Uint8Array>` so it can be used as a drop-in replacement.
|
|
48
|
+
*/
|
|
49
|
+
export class LazyBlockMap {
|
|
50
|
+
offsets;
|
|
51
|
+
carBytes;
|
|
52
|
+
constructor(carBytes, offsets) {
|
|
53
|
+
this.carBytes = carBytes;
|
|
54
|
+
this.offsets = offsets;
|
|
55
|
+
}
|
|
56
|
+
get(cid) {
|
|
57
|
+
const range = this.offsets.get(cid);
|
|
58
|
+
if (!range || !this.carBytes)
|
|
59
|
+
return undefined;
|
|
60
|
+
return this.carBytes.subarray(range[0], range[1]);
|
|
61
|
+
}
|
|
62
|
+
delete(cid) {
|
|
63
|
+
return this.offsets.delete(cid);
|
|
64
|
+
}
|
|
65
|
+
get size() {
|
|
66
|
+
return this.offsets.size;
|
|
67
|
+
}
|
|
68
|
+
*[Symbol.iterator]() {
|
|
69
|
+
for (const [cid, range] of this.offsets) {
|
|
70
|
+
if (!this.carBytes)
|
|
71
|
+
return;
|
|
72
|
+
yield [cid, this.carBytes.subarray(range[0], range[1])];
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/** Release the underlying CAR buffer */
|
|
76
|
+
free() {
|
|
77
|
+
this.carBytes = null;
|
|
78
|
+
this.offsets.clear();
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Parses a CARv1 binary frame into its root CIDs and a lazy block map.
|
|
46
83
|
*
|
|
47
|
-
*
|
|
48
|
-
*
|
|
84
|
+
* The block map stores byte offsets into `carBytes` rather than copying data,
|
|
85
|
+
* reducing heap usage from O(total block bytes) to O(number of blocks * 16 bytes).
|
|
49
86
|
*
|
|
50
|
-
* @
|
|
51
|
-
*
|
|
52
|
-
* const car = new Uint8Array(await res.arrayBuffer())
|
|
53
|
-
* const { roots, blocks } = parseCarFrame(car)
|
|
54
|
-
* const commitData = blocks.get(roots[0])
|
|
55
|
-
* ```
|
|
87
|
+
* @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
|
|
88
|
+
* @returns `roots` — ordered list of root CID strings; `blocks` — lazy block map
|
|
56
89
|
*/
|
|
57
90
|
export function parseCarFrame(carBytes) {
|
|
58
91
|
let offset = 0;
|
|
@@ -66,8 +99,8 @@ export function parseCarFrame(carBytes) {
|
|
|
66
99
|
// Our CBOR decoder converts tag-42 CIDs to { $link: "b..." } objects,
|
|
67
100
|
// so roots may already be decoded strings
|
|
68
101
|
const roots = (header.roots || []).map((root) => root?.$link ?? cidToString(root));
|
|
69
|
-
//
|
|
70
|
-
const
|
|
102
|
+
// Build offset index: CID → [start, end] into carBytes
|
|
103
|
+
const offsets = new Map();
|
|
71
104
|
while (offset < carBytes.length) {
|
|
72
105
|
const [blockLen, afterBlockLen] = readVarint(carBytes, offset);
|
|
73
106
|
offset = afterBlockLen;
|
|
@@ -76,9 +109,8 @@ export function parseCarFrame(carBytes) {
|
|
|
76
109
|
const [cidBytes, afterCid] = parseCidFromBytes(carBytes, offset);
|
|
77
110
|
const cid = cidToString(cidBytes);
|
|
78
111
|
const dataLen = blockLen - (afterCid - offset);
|
|
79
|
-
|
|
80
|
-
blocks.set(cid, data);
|
|
112
|
+
offsets.set(cid, [afterCid, afterCid + dataLen]);
|
|
81
113
|
offset = afterCid + dataLen;
|
|
82
114
|
}
|
|
83
|
-
return { roots, blocks };
|
|
115
|
+
return { roots, blocks: new LazyBlockMap(carBytes, offsets) };
|
|
84
116
|
}
|
package/dist/config.js
CHANGED
|
@@ -23,7 +23,7 @@ export function loadConfig(configPath) {
|
|
|
23
23
|
signalCollections: backfillRaw.signalCollections || undefined,
|
|
24
24
|
repos: env.BACKFILL_REPOS ? env.BACKFILL_REPOS.split(',').map((s) => s.trim()) : backfillRaw.repos || undefined,
|
|
25
25
|
fullNetwork: env.BACKFILL_FULL_NETWORK ? env.BACKFILL_FULL_NETWORK === 'true' : backfillRaw.fullNetwork || false,
|
|
26
|
-
parallelism: parseInt(env.BACKFILL_PARALLELISM || '') || backfillRaw.parallelism ||
|
|
26
|
+
parallelism: parseInt(env.BACKFILL_PARALLELISM || '') || backfillRaw.parallelism || 3,
|
|
27
27
|
fetchTimeout: parseInt(env.BACKFILL_FETCH_TIMEOUT || '') || backfillRaw.fetchTimeout || 300,
|
|
28
28
|
maxRetries: parseInt(env.BACKFILL_MAX_RETRIES || '') || backfillRaw.maxRetries || 5,
|
|
29
29
|
},
|
package/dist/mst.d.ts
CHANGED
|
@@ -2,5 +2,7 @@ export interface MstEntry {
|
|
|
2
2
|
path: string;
|
|
3
3
|
cid: string;
|
|
4
4
|
}
|
|
5
|
-
export declare function walkMst(blocks:
|
|
5
|
+
export declare function walkMst(blocks: {
|
|
6
|
+
get(cid: string): Uint8Array | undefined;
|
|
7
|
+
}, rootCid: string): Generator<MstEntry>;
|
|
6
8
|
//# sourceMappingURL=mst.d.ts.map
|
package/dist/mst.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mst.d.ts","sourceRoot":"","sources":["../src/mst.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,MAAM,CAAA;CACZ;AAED,
|
|
1
|
+
{"version":3,"file":"mst.d.ts","sourceRoot":"","sources":["../src/mst.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,MAAM,CAAA;CACZ;AAED,wBAAiB,OAAO,CAAC,MAAM,EAAE;IAAE,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS,CAAA;CAAE,EAAE,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,CA8BnH"}
|
package/dist/mst.js
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
import { cborDecode } from "./cbor.js";
|
|
2
|
-
export function walkMst(blocks, rootCid) {
|
|
3
|
-
|
|
4
|
-
function visit(cid, prefix) {
|
|
2
|
+
export function* walkMst(blocks, rootCid) {
|
|
3
|
+
function* visit(cid, prefix) {
|
|
5
4
|
const data = blocks.get(cid);
|
|
6
5
|
if (!data)
|
|
7
6
|
return prefix;
|
|
8
7
|
const { value: node } = cborDecode(data);
|
|
9
8
|
// Visit left subtree
|
|
10
9
|
if (node.l?.$link)
|
|
11
|
-
visit(node.l.$link, prefix);
|
|
10
|
+
yield* visit(node.l.$link, prefix);
|
|
12
11
|
let lastKey = prefix;
|
|
13
12
|
for (const entry of node.e || []) {
|
|
14
13
|
const keySuffix = entry.k instanceof Uint8Array ? new TextDecoder().decode(entry.k) : entry.k;
|
|
@@ -16,15 +15,14 @@ export function walkMst(blocks, rootCid) {
|
|
|
16
15
|
const fullKey = lastKey.substring(0, prefixLen) + keySuffix;
|
|
17
16
|
lastKey = fullKey;
|
|
18
17
|
if (entry.v?.$link) {
|
|
19
|
-
|
|
18
|
+
yield { path: fullKey, cid: entry.v.$link };
|
|
20
19
|
}
|
|
21
20
|
// Visit right subtree
|
|
22
21
|
if (entry.t?.$link) {
|
|
23
|
-
visit(entry.t.$link, lastKey);
|
|
22
|
+
yield* visit(entry.t.$link, lastKey);
|
|
24
23
|
}
|
|
25
24
|
}
|
|
26
25
|
return lastKey;
|
|
27
26
|
}
|
|
28
|
-
visit(rootCid, '');
|
|
29
|
-
return entries;
|
|
27
|
+
yield* visit(rootCid, '');
|
|
30
28
|
}
|