@hatk/hatk 0.0.1-alpha.7 → 0.0.1-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjD,6CAA6C;AAC7C,UAAU,YAAY;IACpB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,8FAA8F;IAC9F,MAAM,EAAE,MAAM,CAAA;IACd,yEAAyE;IACzE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,qDAAqD;IACrD,MAAM,EAAE,cAAc,CAAA;CACvB;AAuGD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAmH/G;AAgCD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAiInE"}
1
+ {"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjD,6CAA6C;AAC7C,UAAU,YAAY;IACpB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,8FAA8F;IAC9F,MAAM,EAAE,MAAM,CAAA;IACd,yEAAyE;IACzE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,qDAAqD;IACrD,MAAM,EAAE,cAAc,CAAA;CACvB;AAuGD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CA4H/G;AAgCD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAiInE"}
package/dist/backfill.js CHANGED
@@ -145,10 +145,9 @@ export async function backfillRepo(did, collections, fetchTimeout) {
145
145
  httpErr.httpStatus = res.status;
146
146
  throw httpErr;
147
147
  }
148
- let carBytes = new Uint8Array(await res.arrayBuffer());
148
+ const carBytes = new Uint8Array(await res.arrayBuffer());
149
149
  carSizeBytes = carBytes.length;
150
150
  let { roots, blocks } = parseCarFrame(carBytes);
151
- carBytes = null; // free CAR bytes before bulk insert
152
151
  // Decode commit to get MST root
153
152
  const rootData = blocks.get(roots[0]);
154
153
  if (!rootData)
@@ -156,7 +155,24 @@ export async function backfillRepo(did, collections, fetchTimeout) {
156
155
  const { value: commit } = cborDecode(rootData);
157
156
  // Walk MST to find all record paths
158
157
  const entries = walkMst(blocks, commit.data.$link);
159
- const bulk = [];
158
+ // Delete existing records for this DID before re-importing so deletions are reflected
159
+ for (const col of collections) {
160
+ const schema = getSchema(col);
161
+ if (!schema)
162
+ continue;
163
+ await runSQL(`DELETE FROM ${schema.tableName} WHERE did = $1`, did);
164
+ for (const child of schema.children) {
165
+ await runSQL(`DELETE FROM ${child.tableName} WHERE parent_did = $1`, did);
166
+ }
167
+ for (const union of schema.unions) {
168
+ for (const branch of union.branches) {
169
+ await runSQL(`DELETE FROM ${branch.tableName} WHERE parent_did = $1`, did);
170
+ }
171
+ }
172
+ }
173
+ // Insert records in chunks to limit memory usage
174
+ const CHUNK_SIZE = 1000;
175
+ let chunk = [];
160
176
  for (const entry of entries) {
161
177
  const collection = entry.path.split('/')[0];
162
178
  if (!collections.has(collection))
@@ -164,13 +180,18 @@ export async function backfillRepo(did, collections, fetchTimeout) {
164
180
  const blockData = blocks.get(entry.cid);
165
181
  if (!blockData)
166
182
  continue;
183
+ blocks.delete(entry.cid); // free block data as we go
167
184
  try {
168
185
  const { value: record } = cborDecode(blockData);
169
186
  if (!record?.$type)
170
187
  continue;
171
188
  const rkey = entry.path.split('/').slice(1).join('/');
172
189
  const uri = `at://${did}/${collection}/${rkey}`;
173
- bulk.push({ collection, uri, cid: entry.cid, did, record });
190
+ chunk.push({ collection, uri, cid: entry.cid, did, record });
191
+ if (chunk.length >= CHUNK_SIZE) {
192
+ count += await bulkInsertRecords(chunk);
193
+ chunk = [];
194
+ }
174
195
  }
175
196
  catch (recordErr) {
176
197
  emit('backfill', 'record_error', {
@@ -181,23 +202,11 @@ export async function backfillRepo(did, collections, fetchTimeout) {
181
202
  });
182
203
  }
183
204
  }
184
- blocks = null; // free block map before bulk insert
185
- // Delete existing records for this DID before re-importing so deletions are reflected
186
- for (const col of collections) {
187
- const schema = getSchema(col);
188
- if (!schema)
189
- continue;
190
- await runSQL(`DELETE FROM ${schema.tableName} WHERE did = $1`, did);
191
- for (const child of schema.children) {
192
- await runSQL(`DELETE FROM ${child.tableName} WHERE parent_did = $1`, did);
193
- }
194
- for (const union of schema.unions) {
195
- for (const branch of union.branches) {
196
- await runSQL(`DELETE FROM ${branch.tableName} WHERE parent_did = $1`, did);
197
- }
198
- }
205
+ blocks.free();
206
+ blocks = null;
207
+ if (chunk.length > 0) {
208
+ count += await bulkInsertRecords(chunk);
199
209
  }
200
- count = await bulkInsertRecords(bulk);
201
210
  await setRepoStatus(did, 'active', commit.rev, { handle });
202
211
  return count;
203
212
  }
package/dist/car.d.ts CHANGED
@@ -12,20 +12,32 @@
12
12
  * @module
13
13
  */
14
14
  /**
15
- * Parses a CARv1 binary frame into its root CIDs and block map.
15
+ * A memory-efficient block map that stores byte offsets into the original CAR
16
+ * buffer instead of copying block data. Implements the same `get`/`delete`/`size`
17
+ * interface as `Map<string, Uint8Array>` so it can be used as a drop-in replacement.
18
+ */
19
+ export declare class LazyBlockMap {
20
+ private offsets;
21
+ private carBytes;
22
+ constructor(carBytes: Uint8Array, offsets: Map<string, [number, number]>);
23
+ get(cid: string): Uint8Array | undefined;
24
+ delete(cid: string): boolean;
25
+ get size(): number;
26
+ [Symbol.iterator](): IterableIterator<[string, Uint8Array]>;
27
+ /** Release the underlying CAR buffer */
28
+ free(): void;
29
+ }
30
+ /**
31
+ * Parses a CARv1 binary frame into its root CIDs and a lazy block map.
16
32
  *
17
- * @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
18
- * @returns `roots` ordered list of root CID strings; `blocks` — map of CID string raw block data
33
+ * The block map stores byte offsets into `carBytes` rather than copying data,
34
+ * reducing heap usage from O(total block bytes) to O(number of blocks * 16 bytes).
19
35
  *
20
- * @example
21
- * ```ts
22
- * const car = new Uint8Array(await res.arrayBuffer())
23
- * const { roots, blocks } = parseCarFrame(car)
24
- * const commitData = blocks.get(roots[0])
25
- * ```
36
+ * @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
37
+ * @returns `roots` — ordered list of root CID strings; `blocks` — lazy block map
26
38
  */
27
39
  export declare function parseCarFrame(carBytes: Uint8Array): {
28
40
  roots: string[];
29
- blocks: Map<string, Uint8Array>;
41
+ blocks: LazyBlockMap;
30
42
  };
31
43
  //# sourceMappingURL=car.d.ts.map
package/dist/car.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAuCH;;;;;;;;;;;;GAYG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,UAAU,GAAG;IACnD,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,CAAA;CAChC,CAmCA"}
1
+ {"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAuCH;;;;GAIG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,OAAO,CAA+B;IAC9C,OAAO,CAAC,QAAQ,CAAmB;gBAEvB,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAKxE,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS;IAMxC,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAI5B,IAAI,IAAI,IAAI,MAAM,CAEjB;IAEA,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,gBAAgB,CAAC,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IAO5D,wCAAwC;IACxC,IAAI,IAAI,IAAI;CAIb;AAED;;;;;;;;GAQG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,UAAU,GAAG;IACnD,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,YAAY,CAAA;CACrB,CAiCA"}
package/dist/car.js CHANGED
@@ -42,17 +42,50 @@ function parseCidFromBytes(bytes, offset) {
42
42
  return [bytes.slice(offset, pos), pos];
43
43
  }
44
44
  /**
45
- * Parses a CARv1 binary frame into its root CIDs and block map.
45
+ * A memory-efficient block map that stores byte offsets into the original CAR
46
+ * buffer instead of copying block data. Implements the same `get`/`delete`/`size`
47
+ * interface as `Map<string, Uint8Array>` so it can be used as a drop-in replacement.
48
+ */
49
+ export class LazyBlockMap {
50
+ offsets;
51
+ carBytes;
52
+ constructor(carBytes, offsets) {
53
+ this.carBytes = carBytes;
54
+ this.offsets = offsets;
55
+ }
56
+ get(cid) {
57
+ const range = this.offsets.get(cid);
58
+ if (!range || !this.carBytes)
59
+ return undefined;
60
+ return this.carBytes.subarray(range[0], range[1]);
61
+ }
62
+ delete(cid) {
63
+ return this.offsets.delete(cid);
64
+ }
65
+ get size() {
66
+ return this.offsets.size;
67
+ }
68
+ *[Symbol.iterator]() {
69
+ for (const [cid, range] of this.offsets) {
70
+ if (!this.carBytes)
71
+ return;
72
+ yield [cid, this.carBytes.subarray(range[0], range[1])];
73
+ }
74
+ }
75
+ /** Release the underlying CAR buffer */
76
+ free() {
77
+ this.carBytes = null;
78
+ this.offsets.clear();
79
+ }
80
+ }
81
+ /**
82
+ * Parses a CARv1 binary frame into its root CIDs and a lazy block map.
46
83
  *
47
- * @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
48
- * @returns `roots` ordered list of root CID strings; `blocks` — map of CID string raw block data
84
+ * The block map stores byte offsets into `carBytes` rather than copying data,
85
+ * reducing heap usage from O(total block bytes) to O(number of blocks * 16 bytes).
49
86
  *
50
- * @example
51
- * ```ts
52
- * const car = new Uint8Array(await res.arrayBuffer())
53
- * const { roots, blocks } = parseCarFrame(car)
54
- * const commitData = blocks.get(roots[0])
55
- * ```
87
+ * @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
88
+ * @returns `roots` — ordered list of root CID strings; `blocks` — lazy block map
56
89
  */
57
90
  export function parseCarFrame(carBytes) {
58
91
  let offset = 0;
@@ -66,8 +99,8 @@ export function parseCarFrame(carBytes) {
66
99
  // Our CBOR decoder converts tag-42 CIDs to { $link: "b..." } objects,
67
100
  // so roots may already be decoded strings
68
101
  const roots = (header.roots || []).map((root) => root?.$link ?? cidToString(root));
69
- // Parse blocks: each is varint(len) + CID + data
70
- const blocks = new Map();
102
+ // Build offset index: CID [start, end] into carBytes
103
+ const offsets = new Map();
71
104
  while (offset < carBytes.length) {
72
105
  const [blockLen, afterBlockLen] = readVarint(carBytes, offset);
73
106
  offset = afterBlockLen;
@@ -76,9 +109,8 @@ export function parseCarFrame(carBytes) {
76
109
  const [cidBytes, afterCid] = parseCidFromBytes(carBytes, offset);
77
110
  const cid = cidToString(cidBytes);
78
111
  const dataLen = blockLen - (afterCid - offset);
79
- const data = carBytes.slice(afterCid, afterCid + dataLen);
80
- blocks.set(cid, data);
112
+ offsets.set(cid, [afterCid, afterCid + dataLen]);
81
113
  offset = afterCid + dataLen;
82
114
  }
83
- return { roots, blocks };
115
+ return { roots, blocks: new LazyBlockMap(carBytes, offsets) };
84
116
  }
package/dist/config.js CHANGED
@@ -23,7 +23,7 @@ export function loadConfig(configPath) {
23
23
  signalCollections: backfillRaw.signalCollections || undefined,
24
24
  repos: env.BACKFILL_REPOS ? env.BACKFILL_REPOS.split(',').map((s) => s.trim()) : backfillRaw.repos || undefined,
25
25
  fullNetwork: env.BACKFILL_FULL_NETWORK ? env.BACKFILL_FULL_NETWORK === 'true' : backfillRaw.fullNetwork || false,
26
- parallelism: parseInt(env.BACKFILL_PARALLELISM || '') || backfillRaw.parallelism || 5,
26
+ parallelism: parseInt(env.BACKFILL_PARALLELISM || '') || backfillRaw.parallelism || 3,
27
27
  fetchTimeout: parseInt(env.BACKFILL_FETCH_TIMEOUT || '') || backfillRaw.fetchTimeout || 300,
28
28
  maxRetries: parseInt(env.BACKFILL_MAX_RETRIES || '') || backfillRaw.maxRetries || 5,
29
29
  },
package/dist/mst.d.ts CHANGED
@@ -2,5 +2,7 @@ export interface MstEntry {
2
2
  path: string;
3
3
  cid: string;
4
4
  }
5
- export declare function walkMst(blocks: Map<string, Uint8Array>, rootCid: string): MstEntry[];
5
+ export declare function walkMst(blocks: {
6
+ get(cid: string): Uint8Array | undefined;
7
+ }, rootCid: string): Generator<MstEntry>;
6
8
  //# sourceMappingURL=mst.d.ts.map
package/dist/mst.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"mst.d.ts","sourceRoot":"","sources":["../src/mst.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,MAAM,CAAA;CACZ;AAED,wBAAgB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,EAAE,OAAO,EAAE,MAAM,GAAG,QAAQ,EAAE,CAiCpF"}
1
+ {"version":3,"file":"mst.d.ts","sourceRoot":"","sources":["../src/mst.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,MAAM,CAAA;CACZ;AAED,wBAAiB,OAAO,CAAC,MAAM,EAAE;IAAE,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS,CAAA;CAAE,EAAE,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,CA8BnH"}
package/dist/mst.js CHANGED
@@ -1,14 +1,13 @@
1
1
  import { cborDecode } from "./cbor.js";
2
- export function walkMst(blocks, rootCid) {
3
- const entries = [];
4
- function visit(cid, prefix) {
2
+ export function* walkMst(blocks, rootCid) {
3
+ function* visit(cid, prefix) {
5
4
  const data = blocks.get(cid);
6
5
  if (!data)
7
6
  return prefix;
8
7
  const { value: node } = cborDecode(data);
9
8
  // Visit left subtree
10
9
  if (node.l?.$link)
11
- visit(node.l.$link, prefix);
10
+ yield* visit(node.l.$link, prefix);
12
11
  let lastKey = prefix;
13
12
  for (const entry of node.e || []) {
14
13
  const keySuffix = entry.k instanceof Uint8Array ? new TextDecoder().decode(entry.k) : entry.k;
@@ -16,15 +15,14 @@ export function walkMst(blocks, rootCid) {
16
15
  const fullKey = lastKey.substring(0, prefixLen) + keySuffix;
17
16
  lastKey = fullKey;
18
17
  if (entry.v?.$link) {
19
- entries.push({ path: fullKey, cid: entry.v.$link });
18
+ yield { path: fullKey, cid: entry.v.$link };
20
19
  }
21
20
  // Visit right subtree
22
21
  if (entry.t?.$link) {
23
- visit(entry.t.$link, lastKey);
22
+ yield* visit(entry.t.$link, lastKey);
24
23
  }
25
24
  }
26
25
  return lastKey;
27
26
  }
28
- visit(rootCid, '');
29
- return entries;
27
+ yield* visit(rootCid, '');
30
28
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hatk/hatk",
3
- "version": "0.0.1-alpha.7",
3
+ "version": "0.0.1-alpha.9",
4
4
  "license": "MIT",
5
5
  "bin": {
6
6
  "hatk": "dist/cli.js"