@hatk/hatk 0.0.1-alpha.8 → 0.0.1-alpha.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjD,6CAA6C;AAC7C,UAAU,YAAY;IACpB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,8FAA8F;IAC9F,MAAM,EAAE,MAAM,CAAA;IACd,yEAAyE;IACzE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,qDAAqD;IACrD,MAAM,EAAE,cAAc,CAAA;CACvB;AAuGD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CA2H/G;AAgCD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAiInE"}
1
+ {"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjD,6CAA6C;AAC7C,UAAU,YAAY;IACpB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,8FAA8F;IAC9F,MAAM,EAAE,MAAM,CAAA;IACd,yEAAyE;IACzE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,qDAAqD;IACrD,MAAM,EAAE,cAAc,CAAA;CACvB;AAuGD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CA4H/G;AAgCD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAiInE"}
package/dist/backfill.js CHANGED
@@ -145,10 +145,9 @@ export async function backfillRepo(did, collections, fetchTimeout) {
145
145
  httpErr.httpStatus = res.status;
146
146
  throw httpErr;
147
147
  }
148
- let carBytes = new Uint8Array(await res.arrayBuffer());
148
+ const carBytes = new Uint8Array(await res.arrayBuffer());
149
149
  carSizeBytes = carBytes.length;
150
150
  let { roots, blocks } = parseCarFrame(carBytes);
151
- carBytes = null; // free CAR bytes before bulk insert
152
151
  // Decode commit to get MST root
153
152
  const rootData = blocks.get(roots[0]);
154
153
  if (!rootData)
@@ -181,6 +180,7 @@ export async function backfillRepo(did, collections, fetchTimeout) {
181
180
  const blockData = blocks.get(entry.cid);
182
181
  if (!blockData)
183
182
  continue;
183
+ blocks.delete(entry.cid); // free block data as we go
184
184
  try {
185
185
  const { value: record } = cborDecode(blockData);
186
186
  if (!record?.$type)
@@ -202,7 +202,8 @@ export async function backfillRepo(did, collections, fetchTimeout) {
202
202
  });
203
203
  }
204
204
  }
205
- blocks = null; // free block map
205
+ blocks.free();
206
+ blocks = null;
206
207
  if (chunk.length > 0) {
207
208
  count += await bulkInsertRecords(chunk);
208
209
  }
package/dist/car.d.ts CHANGED
@@ -12,20 +12,32 @@
12
12
  * @module
13
13
  */
14
14
  /**
15
- * Parses a CARv1 binary frame into its root CIDs and block map.
15
+ * A memory-efficient block map that stores byte offsets into the original CAR
16
+ * buffer instead of copying block data. Implements the same `get`/`delete`/`size`
17
+ * interface as `Map<string, Uint8Array>` so it can be used as a drop-in replacement.
18
+ */
19
+ export declare class LazyBlockMap {
20
+ private offsets;
21
+ private carBytes;
22
+ constructor(carBytes: Uint8Array, offsets: Map<string, [number, number]>);
23
+ get(cid: string): Uint8Array | undefined;
24
+ delete(cid: string): boolean;
25
+ get size(): number;
26
+ [Symbol.iterator](): IterableIterator<[string, Uint8Array]>;
27
+ /** Release the underlying CAR buffer */
28
+ free(): void;
29
+ }
30
+ /**
31
+ * Parses a CARv1 binary frame into its root CIDs and a lazy block map.
16
32
  *
17
- * @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
18
- * @returns `roots` ordered list of root CID strings; `blocks` — map of CID string raw block data
33
+ * The block map stores byte offsets into `carBytes` rather than copying data,
34
+ * reducing heap usage from O(total block bytes) to O(number of blocks * 16 bytes).
19
35
  *
20
- * @example
21
- * ```ts
22
- * const car = new Uint8Array(await res.arrayBuffer())
23
- * const { roots, blocks } = parseCarFrame(car)
24
- * const commitData = blocks.get(roots[0])
25
- * ```
36
+ * @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
37
+ * @returns `roots` — ordered list of root CID strings; `blocks` — lazy block map
26
38
  */
27
39
  export declare function parseCarFrame(carBytes: Uint8Array): {
28
40
  roots: string[];
29
- blocks: Map<string, Uint8Array>;
41
+ blocks: LazyBlockMap;
30
42
  };
31
43
  //# sourceMappingURL=car.d.ts.map
package/dist/car.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAuCH;;;;;;;;;;;;GAYG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,UAAU,GAAG;IACnD,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,CAAA;CAChC,CAmCA"}
1
+ {"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAuCH;;;;GAIG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,OAAO,CAA+B;IAC9C,OAAO,CAAC,QAAQ,CAAmB;gBAEvB,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAKxE,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS;IAMxC,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAI5B,IAAI,IAAI,IAAI,MAAM,CAEjB;IAEA,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,gBAAgB,CAAC,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IAO5D,wCAAwC;IACxC,IAAI,IAAI,IAAI;CAIb;AAED;;;;;;;;GAQG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,UAAU,GAAG;IACnD,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,YAAY,CAAA;CACrB,CAiCA"}
package/dist/car.js CHANGED
@@ -42,17 +42,50 @@ function parseCidFromBytes(bytes, offset) {
42
42
  return [bytes.slice(offset, pos), pos];
43
43
  }
44
44
  /**
45
- * Parses a CARv1 binary frame into its root CIDs and block map.
45
+ * A memory-efficient block map that stores byte offsets into the original CAR
46
+ * buffer instead of copying block data. Implements the same `get`/`delete`/`size`
47
+ * interface as `Map<string, Uint8Array>` so it can be used as a drop-in replacement.
48
+ */
49
+ export class LazyBlockMap {
50
+ offsets;
51
+ carBytes;
52
+ constructor(carBytes, offsets) {
53
+ this.carBytes = carBytes;
54
+ this.offsets = offsets;
55
+ }
56
+ get(cid) {
57
+ const range = this.offsets.get(cid);
58
+ if (!range || !this.carBytes)
59
+ return undefined;
60
+ return this.carBytes.subarray(range[0], range[1]);
61
+ }
62
+ delete(cid) {
63
+ return this.offsets.delete(cid);
64
+ }
65
+ get size() {
66
+ return this.offsets.size;
67
+ }
68
+ *[Symbol.iterator]() {
69
+ for (const [cid, range] of this.offsets) {
70
+ if (!this.carBytes)
71
+ return;
72
+ yield [cid, this.carBytes.subarray(range[0], range[1])];
73
+ }
74
+ }
75
+ /** Release the underlying CAR buffer */
76
+ free() {
77
+ this.carBytes = null;
78
+ this.offsets.clear();
79
+ }
80
+ }
81
+ /**
82
+ * Parses a CARv1 binary frame into its root CIDs and a lazy block map.
46
83
  *
47
- * @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
48
- * @returns `roots` ordered list of root CID strings; `blocks` — map of CID string raw block data
84
+ * The block map stores byte offsets into `carBytes` rather than copying data,
85
+ * reducing heap usage from O(total block bytes) to O(number of blocks * 16 bytes).
49
86
  *
50
- * @example
51
- * ```ts
52
- * const car = new Uint8Array(await res.arrayBuffer())
53
- * const { roots, blocks } = parseCarFrame(car)
54
- * const commitData = blocks.get(roots[0])
55
- * ```
87
+ * @param carBytes - Raw CAR file bytes (e.g. from `getRepo` or a firehose commit)
88
+ * @returns `roots` — ordered list of root CID strings; `blocks` — lazy block map
56
89
  */
57
90
  export function parseCarFrame(carBytes) {
58
91
  let offset = 0;
@@ -66,8 +99,8 @@ export function parseCarFrame(carBytes) {
66
99
  // Our CBOR decoder converts tag-42 CIDs to { $link: "b..." } objects,
67
100
  // so roots may already be decoded strings
68
101
  const roots = (header.roots || []).map((root) => root?.$link ?? cidToString(root));
69
- // Parse blocks: each is varint(len) + CID + data
70
- const blocks = new Map();
102
+ // Build offset index: CID [start, end] into carBytes
103
+ const offsets = new Map();
71
104
  while (offset < carBytes.length) {
72
105
  const [blockLen, afterBlockLen] = readVarint(carBytes, offset);
73
106
  offset = afterBlockLen;
@@ -76,9 +109,8 @@ export function parseCarFrame(carBytes) {
76
109
  const [cidBytes, afterCid] = parseCidFromBytes(carBytes, offset);
77
110
  const cid = cidToString(cidBytes);
78
111
  const dataLen = blockLen - (afterCid - offset);
79
- const data = carBytes.slice(afterCid, afterCid + dataLen);
80
- blocks.set(cid, data);
112
+ offsets.set(cid, [afterCid, afterCid + dataLen]);
81
113
  offset = afterCid + dataLen;
82
114
  }
83
- return { roots, blocks };
115
+ return { roots, blocks: new LazyBlockMap(carBytes, offsets) };
84
116
  }
package/dist/config.js CHANGED
@@ -23,7 +23,7 @@ export function loadConfig(configPath) {
23
23
  signalCollections: backfillRaw.signalCollections || undefined,
24
24
  repos: env.BACKFILL_REPOS ? env.BACKFILL_REPOS.split(',').map((s) => s.trim()) : backfillRaw.repos || undefined,
25
25
  fullNetwork: env.BACKFILL_FULL_NETWORK ? env.BACKFILL_FULL_NETWORK === 'true' : backfillRaw.fullNetwork || false,
26
- parallelism: parseInt(env.BACKFILL_PARALLELISM || '') || backfillRaw.parallelism || 5,
26
+ parallelism: parseInt(env.BACKFILL_PARALLELISM || '') || backfillRaw.parallelism || 3,
27
27
  fetchTimeout: parseInt(env.BACKFILL_FETCH_TIMEOUT || '') || backfillRaw.fetchTimeout || 300,
28
28
  maxRetries: parseInt(env.BACKFILL_MAX_RETRIES || '') || backfillRaw.maxRetries || 5,
29
29
  },
package/dist/mst.d.ts CHANGED
@@ -2,5 +2,7 @@ export interface MstEntry {
2
2
  path: string;
3
3
  cid: string;
4
4
  }
5
- export declare function walkMst(blocks: Map<string, Uint8Array>, rootCid: string): MstEntry[];
5
+ export declare function walkMst(blocks: {
6
+ get(cid: string): Uint8Array | undefined;
7
+ }, rootCid: string): Generator<MstEntry>;
6
8
  //# sourceMappingURL=mst.d.ts.map
package/dist/mst.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"mst.d.ts","sourceRoot":"","sources":["../src/mst.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,MAAM,CAAA;CACZ;AAED,wBAAgB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,EAAE,OAAO,EAAE,MAAM,GAAG,QAAQ,EAAE,CAiCpF"}
1
+ {"version":3,"file":"mst.d.ts","sourceRoot":"","sources":["../src/mst.ts"],"names":[],"mappings":"AAEA,MAAM,WAAW,QAAQ;IACvB,IAAI,EAAE,MAAM,CAAA;IACZ,GAAG,EAAE,MAAM,CAAA;CACZ;AAED,wBAAiB,OAAO,CAAC,MAAM,EAAE;IAAE,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS,CAAA;CAAE,EAAE,OAAO,EAAE,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,CA8BnH"}
package/dist/mst.js CHANGED
@@ -1,14 +1,13 @@
1
1
  import { cborDecode } from "./cbor.js";
2
- export function walkMst(blocks, rootCid) {
3
- const entries = [];
4
- function visit(cid, prefix) {
2
+ export function* walkMst(blocks, rootCid) {
3
+ function* visit(cid, prefix) {
5
4
  const data = blocks.get(cid);
6
5
  if (!data)
7
6
  return prefix;
8
7
  const { value: node } = cborDecode(data);
9
8
  // Visit left subtree
10
9
  if (node.l?.$link)
11
- visit(node.l.$link, prefix);
10
+ yield* visit(node.l.$link, prefix);
12
11
  let lastKey = prefix;
13
12
  for (const entry of node.e || []) {
14
13
  const keySuffix = entry.k instanceof Uint8Array ? new TextDecoder().decode(entry.k) : entry.k;
@@ -16,15 +15,14 @@ export function walkMst(blocks, rootCid) {
16
15
  const fullKey = lastKey.substring(0, prefixLen) + keySuffix;
17
16
  lastKey = fullKey;
18
17
  if (entry.v?.$link) {
19
- entries.push({ path: fullKey, cid: entry.v.$link });
18
+ yield { path: fullKey, cid: entry.v.$link };
20
19
  }
21
20
  // Visit right subtree
22
21
  if (entry.t?.$link) {
23
- visit(entry.t.$link, lastKey);
22
+ yield* visit(entry.t.$link, lastKey);
24
23
  }
25
24
  }
26
25
  return lastKey;
27
26
  }
28
- visit(rootCid, '');
29
- return entries;
27
+ yield* visit(rootCid, '');
30
28
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hatk/hatk",
3
- "version": "0.0.1-alpha.8",
3
+ "version": "0.0.1-alpha.9",
4
4
  "license": "MIT",
5
5
  "bin": {
6
6
  "hatk": "dist/cli.js"