@hatk/hatk 0.0.1-alpha.10 → 0.0.1-alpha.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/backfill.d.ts.map +1 -1
- package/dist/backfill.js +31 -21
- package/dist/car.d.ts +20 -0
- package/dist/car.d.ts.map +1 -1
- package/dist/car.js +108 -0
- package/dist/db.d.ts +1 -1
- package/dist/db.d.ts.map +1 -1
- package/dist/db.js +8 -39
- package/dist/main.js +1 -3
- package/package.json +1 -1
package/dist/backfill.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAiBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjD,6CAA6C;AAC7C,UAAU,YAAY;IACpB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,8FAA8F;IAC9F,MAAM,EAAE,MAAM,CAAA;IACd,yEAAyE;IACzE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,qDAAqD;IACrD,MAAM,EAAE,cAAc,CAAA;CACvB;AAuGD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAwI/G;AAgCD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,wBAAsB,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAiInE"}
|
package/dist/backfill.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { parseCarStream } from "./car.js";
|
|
2
2
|
import { cborDecode } from "./cbor.js";
|
|
3
3
|
import { walkMst } from "./mst.js";
|
|
4
|
-
import { setRepoStatus, getRepoStatus, getRepoRetryInfo, listRetryEligibleRepos, listPendingRepos, querySQL, runSQL, getSchema, bulkInsertRecords, } from "./db.js";
|
|
4
|
+
import { setRepoStatus, getRepoStatus, getRepoRev, getRepoRetryInfo, listRetryEligibleRepos, listPendingRepos, querySQL, runSQL, getSchema, bulkInsertRecords, } from "./db.js";
|
|
5
5
|
import { emit, timer } from "./logger.js";
|
|
6
6
|
/** In-memory cache of DID → PDS resolution results to avoid redundant lookups. */
|
|
7
7
|
const pdsCache = new Map();
|
|
@@ -128,6 +128,7 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
128
128
|
let error;
|
|
129
129
|
let resolvedPds;
|
|
130
130
|
let resolvedHandle = null;
|
|
131
|
+
let resolvedSince = null;
|
|
131
132
|
let retryCount;
|
|
132
133
|
let retryAfter;
|
|
133
134
|
const controller = new AbortController();
|
|
@@ -137,17 +138,23 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
137
138
|
resolvedPds = pdsUrl;
|
|
138
139
|
resolvedHandle = handle;
|
|
139
140
|
timeout = setTimeout(() => controller.abort(), fetchTimeout * 1000);
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
});
|
|
141
|
+
let lastRev = await getRepoRev(did);
|
|
142
|
+
const baseUrl = `${resolvedPds}/xrpc/com.atproto.sync.getRepo?did=${encodeURIComponent(did)}`;
|
|
143
|
+
let repoUrl = lastRev ? `${baseUrl}&since=${encodeURIComponent(lastRev)}` : baseUrl;
|
|
144
|
+
let res = await fetch(repoUrl, { signal: controller.signal });
|
|
145
|
+
// If the PDS rejected our `since` rev (compacted history), fall back to full import
|
|
146
|
+
if (res.status === 400 && lastRev) {
|
|
147
|
+
lastRev = null;
|
|
148
|
+
res = await fetch(baseUrl, { signal: controller.signal });
|
|
149
|
+
}
|
|
143
150
|
if (!res.ok) {
|
|
144
151
|
const httpErr = new Error(`getRepo failed for ${did}: ${res.status}`);
|
|
145
152
|
httpErr.httpStatus = res.status;
|
|
146
153
|
throw httpErr;
|
|
147
154
|
}
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
155
|
+
resolvedSince = lastRev;
|
|
156
|
+
const { roots, blocks, byteLength } = await parseCarStream(res.body);
|
|
157
|
+
carSizeBytes = byteLength;
|
|
151
158
|
// Decode commit to get MST root
|
|
152
159
|
const rootData = blocks.get(roots[0]);
|
|
153
160
|
if (!rootData)
|
|
@@ -156,17 +163,20 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
156
163
|
// Walk MST to find all record paths
|
|
157
164
|
const entries = walkMst(blocks, commit.data.$link);
|
|
158
165
|
// Delete existing records for this DID before re-importing so deletions are reflected
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
await runSQL(`DELETE FROM ${
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
166
|
+
// Only on full imports (no since) — diff CARs only contain changes
|
|
167
|
+
if (!lastRev) {
|
|
168
|
+
for (const col of collections) {
|
|
169
|
+
const schema = getSchema(col);
|
|
170
|
+
if (!schema)
|
|
171
|
+
continue;
|
|
172
|
+
await runSQL(`DELETE FROM ${schema.tableName} WHERE did = $1`, did);
|
|
173
|
+
for (const child of schema.children) {
|
|
174
|
+
await runSQL(`DELETE FROM ${child.tableName} WHERE parent_did = $1`, did);
|
|
175
|
+
}
|
|
176
|
+
for (const union of schema.unions) {
|
|
177
|
+
for (const branch of union.branches) {
|
|
178
|
+
await runSQL(`DELETE FROM ${branch.tableName} WHERE parent_did = $1`, did);
|
|
179
|
+
}
|
|
170
180
|
}
|
|
171
181
|
}
|
|
172
182
|
}
|
|
@@ -202,8 +212,6 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
202
212
|
});
|
|
203
213
|
}
|
|
204
214
|
}
|
|
205
|
-
blocks.free();
|
|
206
|
-
blocks = null;
|
|
207
215
|
if (chunk.length > 0) {
|
|
208
216
|
count += await bulkInsertRecords(chunk);
|
|
209
217
|
}
|
|
@@ -238,6 +246,8 @@ export async function backfillRepo(did, collections, fetchTimeout) {
|
|
|
238
246
|
error,
|
|
239
247
|
pds_url: resolvedPds,
|
|
240
248
|
car_size_bytes: carSizeBytes,
|
|
249
|
+
import_mode: carSizeBytes !== undefined ? (resolvedSince ? 'diff' : 'full') : undefined,
|
|
250
|
+
since_rev: resolvedSince,
|
|
241
251
|
retry_count: retryCount,
|
|
242
252
|
retry_after: retryAfter,
|
|
243
253
|
permanent_failure: retryCount === 999 ? true : undefined,
|
package/dist/car.d.ts
CHANGED
|
@@ -27,6 +27,26 @@ export declare class LazyBlockMap {
|
|
|
27
27
|
/** Release the underlying CAR buffer */
|
|
28
28
|
free(): void;
|
|
29
29
|
}
|
|
30
|
+
/**
|
|
31
|
+
* Parses a CARv1 stream incrementally from a `ReadableStream`.
|
|
32
|
+
*
|
|
33
|
+
* Instead of buffering the entire CAR into a single ArrayBuffer, this reads
|
|
34
|
+
* chunks from the stream and parses blocks as they arrive. Each block's data
|
|
35
|
+
* is `.slice()`d into its own small `Uint8Array`, allowing V8 to GC individual
|
|
36
|
+
* blocks as they're consumed during the MST walk.
|
|
37
|
+
*
|
|
38
|
+
* This is critical for backfill where multiple workers download 30-90MB CARs
|
|
39
|
+
* concurrently — buffered downloads cause OOMs because `ArrayBuffer` memory
|
|
40
|
+
* is "external" to V8's heap and not controlled by `--max-old-space-size`.
|
|
41
|
+
*
|
|
42
|
+
* @param body - The response body stream (e.g. `res.body` from `fetch()`)
|
|
43
|
+
* @returns `roots` — root CID strings; `blocks` — map of CID → block data; `byteLength` — total bytes read
|
|
44
|
+
*/
|
|
45
|
+
export declare function parseCarStream(body: ReadableStream<Uint8Array>): Promise<{
|
|
46
|
+
roots: string[];
|
|
47
|
+
blocks: Map<string, Uint8Array>;
|
|
48
|
+
byteLength: number;
|
|
49
|
+
}>;
|
|
30
50
|
/**
|
|
31
51
|
* Parses a CARv1 binary frame into its root CIDs and a lazy block map.
|
|
32
52
|
*
|
package/dist/car.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAuCH;;;;GAIG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,OAAO,CAA+B;IAC9C,OAAO,CAAC,QAAQ,CAAmB;gBAEvB,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAKxE,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS;IAMxC,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAI5B,IAAI,IAAI,IAAI,MAAM,CAEjB;IAEA,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,gBAAgB,CAAC,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IAO5D,wCAAwC;IACxC,IAAI,IAAI,IAAI;CAIb;AAED;;;;;;;;GAQG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,UAAU,GAAG;IACnD,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,YAAY,CAAA;CACrB,CAiCA"}
|
|
1
|
+
{"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAuCH;;;;GAIG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,OAAO,CAA+B;IAC9C,OAAO,CAAC,QAAQ,CAAmB;gBAEvB,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAKxE,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS;IAMxC,MAAM,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO;IAI5B,IAAI,IAAI,IAAI,MAAM,CAEjB;IAEA,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,gBAAgB,CAAC,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;IAO5D,wCAAwC;IACxC,IAAI,IAAI,IAAI;CAIb;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,cAAc,CAAC,IAAI,EAAE,cAAc,CAAC,UAAU,CAAC,GAAG,OAAO,CAAC;IAC9E,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,CAAA;IAC/B,UAAU,EAAE,MAAM,CAAA;CACnB,CAAC,CAsGD;AAED;;;;;;;;GAQG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,UAAU,GAAG;IACnD,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,YAAY,CAAA;CACrB,CAiCA"}
|
package/dist/car.js
CHANGED
|
@@ -78,6 +78,114 @@ export class LazyBlockMap {
|
|
|
78
78
|
this.offsets.clear();
|
|
79
79
|
}
|
|
80
80
|
}
|
|
81
|
+
/**
|
|
82
|
+
* Parses a CARv1 stream incrementally from a `ReadableStream`.
|
|
83
|
+
*
|
|
84
|
+
* Instead of buffering the entire CAR into a single ArrayBuffer, this reads
|
|
85
|
+
* chunks from the stream and parses blocks as they arrive. Each block's data
|
|
86
|
+
* is `.slice()`d into its own small `Uint8Array`, allowing V8 to GC individual
|
|
87
|
+
* blocks as they're consumed during the MST walk.
|
|
88
|
+
*
|
|
89
|
+
* This is critical for backfill where multiple workers download 30-90MB CARs
|
|
90
|
+
* concurrently — buffered downloads cause OOMs because `ArrayBuffer` memory
|
|
91
|
+
* is "external" to V8's heap and not controlled by `--max-old-space-size`.
|
|
92
|
+
*
|
|
93
|
+
* @param body - The response body stream (e.g. `res.body` from `fetch()`)
|
|
94
|
+
* @returns `roots` — root CID strings; `blocks` — map of CID → block data; `byteLength` — total bytes read
|
|
95
|
+
*/
|
|
96
|
+
export async function parseCarStream(body) {
|
|
97
|
+
const reader = body.getReader();
|
|
98
|
+
// Growable buffer with position tracking. We reuse a single allocation and
|
|
99
|
+
// compact (shift data to front) when the read position passes the midpoint,
|
|
100
|
+
// avoiding per-chunk allocations and subarray references that pin old memory.
|
|
101
|
+
let buf = new Uint8Array(64 * 1024);
|
|
102
|
+
let pos = 0; // read cursor
|
|
103
|
+
let len = 0; // bytes of valid data in buf
|
|
104
|
+
let byteLength = 0;
|
|
105
|
+
// Ensure at least `need` bytes are available at buf[pos..pos+need)
|
|
106
|
+
async function fill(need) {
|
|
107
|
+
while (len - pos < need) {
|
|
108
|
+
const { done, value } = await reader.read();
|
|
109
|
+
if (done)
|
|
110
|
+
return (len - pos) >= need;
|
|
111
|
+
byteLength += value.length;
|
|
112
|
+
// Compact: shift remaining data to front when read cursor passes midpoint
|
|
113
|
+
if (pos > 0 && pos > buf.length >>> 1) {
|
|
114
|
+
buf.copyWithin(0, pos, len);
|
|
115
|
+
len -= pos;
|
|
116
|
+
pos = 0;
|
|
117
|
+
}
|
|
118
|
+
// Grow if needed
|
|
119
|
+
const required = len + value.length;
|
|
120
|
+
if (required > buf.length) {
|
|
121
|
+
const newBuf = new Uint8Array(Math.max(required, buf.length * 2));
|
|
122
|
+
newBuf.set(buf.subarray(0, len));
|
|
123
|
+
buf = newBuf;
|
|
124
|
+
}
|
|
125
|
+
buf.set(value, len);
|
|
126
|
+
len += value.length;
|
|
127
|
+
}
|
|
128
|
+
return true;
|
|
129
|
+
}
|
|
130
|
+
function consume(n) {
|
|
131
|
+
pos += n;
|
|
132
|
+
}
|
|
133
|
+
// Read a varint starting at buf[pos]
|
|
134
|
+
function readVarintFromBuf() {
|
|
135
|
+
let value = 0;
|
|
136
|
+
let shift = 0;
|
|
137
|
+
let p = pos;
|
|
138
|
+
while (p < len) {
|
|
139
|
+
const byte = buf[p++];
|
|
140
|
+
value |= (byte & 0x7f) << shift;
|
|
141
|
+
if ((byte & 0x80) === 0)
|
|
142
|
+
return [value, p - pos];
|
|
143
|
+
shift += 7;
|
|
144
|
+
if (shift > 35)
|
|
145
|
+
throw new Error('Varint too long');
|
|
146
|
+
}
|
|
147
|
+
throw new Error('Unexpected end of varint');
|
|
148
|
+
}
|
|
149
|
+
// Parse header: varint(headerLen) + CBOR(header)
|
|
150
|
+
if (!(await fill(1)))
|
|
151
|
+
throw new Error('Empty CAR stream');
|
|
152
|
+
// Prefetch up to 10 bytes for the varint; readVarintFromBuf bounds to `len`
|
|
153
|
+
await fill(10);
|
|
154
|
+
const [headerLen, headerVarintSize] = readVarintFromBuf();
|
|
155
|
+
consume(headerVarintSize);
|
|
156
|
+
if (!(await fill(headerLen)))
|
|
157
|
+
throw new Error('Truncated CAR header');
|
|
158
|
+
// .slice() copies out of the reusable buffer
|
|
159
|
+
const headerSlice = buf.slice(pos, pos + headerLen);
|
|
160
|
+
const { value: header } = cborDecode(headerSlice);
|
|
161
|
+
consume(headerLen);
|
|
162
|
+
const roots = (header.roots || []).map((root) => root?.$link ?? cidToString(root));
|
|
163
|
+
// Parse blocks
|
|
164
|
+
const blocks = new Map();
|
|
165
|
+
while (true) {
|
|
166
|
+
if (!(await fill(1)))
|
|
167
|
+
break;
|
|
168
|
+
// Prefetch up to 10 bytes for the varint; readVarintFromBuf bounds to `len`
|
|
169
|
+
await fill(10);
|
|
170
|
+
const [blockLen, blockVarintSize] = readVarintFromBuf();
|
|
171
|
+
consume(blockVarintSize);
|
|
172
|
+
if (blockLen === 0)
|
|
173
|
+
break;
|
|
174
|
+
if (!(await fill(blockLen)))
|
|
175
|
+
throw new Error('Truncated CAR block');
|
|
176
|
+
const [cidBytes, afterCid] = parseCidFromBytes(buf, pos);
|
|
177
|
+
const cid = cidToString(cidBytes);
|
|
178
|
+
const cidLen = afterCid - pos;
|
|
179
|
+
// .slice() creates an independent copy — the buffer can be reused
|
|
180
|
+
const data = buf.slice(afterCid, afterCid + blockLen - cidLen);
|
|
181
|
+
blocks.set(cid, data);
|
|
182
|
+
consume(blockLen);
|
|
183
|
+
}
|
|
184
|
+
reader.releaseLock();
|
|
185
|
+
// Release the internal buffer
|
|
186
|
+
buf = null;
|
|
187
|
+
return { roots, blocks, byteLength };
|
|
188
|
+
}
|
|
81
189
|
/**
|
|
82
190
|
* Parses a CARv1 binary frame into its root CIDs and a lazy block map.
|
|
83
191
|
*
|
package/dist/db.d.ts
CHANGED
|
@@ -14,6 +14,7 @@ export declare function setRepoStatus(did: string, status: string, rev?: string,
|
|
|
14
14
|
retryAfter?: number;
|
|
15
15
|
handle?: string | null;
|
|
16
16
|
}): Promise<void>;
|
|
17
|
+
export declare function getRepoRev(did: string): Promise<string | null>;
|
|
17
18
|
export declare function getRepoRetryInfo(did: string): Promise<{
|
|
18
19
|
retryCount: number;
|
|
19
20
|
retryAfter: number;
|
|
@@ -129,6 +130,5 @@ export declare function isTakendownDid(did: string): Promise<boolean>;
|
|
|
129
130
|
export declare function getPreferences(did: string): Promise<Record<string, any>>;
|
|
130
131
|
export declare function putPreference(did: string, key: string, value: any): Promise<void>;
|
|
131
132
|
export declare function filterTakendownDids(dids: string[]): Promise<Set<string>>;
|
|
132
|
-
export declare function backfillChildTables(): Promise<void>;
|
|
133
133
|
export {};
|
|
134
134
|
//# sourceMappingURL=db.d.ts.map
|
package/dist/db.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"db.d.ts","sourceRoot":"","sources":["../src/db.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,WAAW,EAAe,MAAM,aAAa,CAAA;AAC3D,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,gBAAgB,CAAA;AAUzC,wBAAgB,aAAa,IAAI,IAAI,CAUpC;AA+DD,wBAAsB,QAAQ,CAAC,UAAU,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,GAAG,EAAE,CAAA;CAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAkB/F;AAiBD,wBAAsB,YAAY,CAChC,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,WAAW,EAAE,EAC3B,aAAa,EAAE,MAAM,EAAE,GACtB,OAAO,CAAC,IAAI,CAAC,
|
|
1
|
+
{"version":3,"file":"db.d.ts","sourceRoot":"","sources":["../src/db.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,WAAW,EAAe,MAAM,aAAa,CAAA;AAC3D,OAAO,KAAK,EAAE,GAAG,EAAE,MAAM,gBAAgB,CAAA;AAUzC,wBAAgB,aAAa,IAAI,IAAI,CAUpC;AA+DD,wBAAsB,QAAQ,CAAC,UAAU,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,GAAG,EAAE,CAAA;CAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAkB/F;AAiBD,wBAAsB,YAAY,CAChC,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,WAAW,EAAE,EAC3B,aAAa,EAAE,MAAM,EAAE,GACtB,OAAO,CAAC,IAAI,CAAC,CAoEf;AAED,wBAAsB,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAGnE;AAED,wBAAsB,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAEzE;AAED,wBAAsB,aAAa,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAGvE;AAED,wBAAsB,aAAa,CACjC,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,MAAM,EACd,GAAG,CAAC,EAAE,MAAM,EACZ,IAAI,CAAC,EAAE;IAAE,UAAU,CAAC,EAAE,MAAM,CAAC;IAAC,UAAU,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GAC1E,OAAO,CAAC,IAAI,CAAC,CA0Cf;AAED,wBAAsB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAGpE;AAED,wBAAsB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAAC,CAI9G;AAED,wBAAsB,sBAAsB,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAQlF;AAED,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC,CAG1D;AAED,wBAAsB,mBAAmB,IAAI,OAAO,CAAC,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC,CAE3F;AAED,wBAAsB,kBAAkB,CACtC,IAAI,GAAE;IACJ,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,CAAC,CAAC,EAAE,MAAM,CAAA;CACN,GACL,OAAO,CAAC;IAAE,KAAK,EAAE,GAAG,EAAE,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CA6B1C;AAED,wBAAsB,mBAAmB,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAO3E;AAED,wBAAsB,aAAa,IAAI,OAAO,CAAC,MAAM,CAAC,CAGrD;AAED,wBAAgB,aAAa,CAC3B,UAAU,EAAE,MAAM,EAClB,GAAG,EAAE,MAAM,EACX,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAC1B;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,GAAG,EAAE,CAAA;CAAE,CA+BhC;AAED,wBAAsB,YAAY,CAChC,UAAU,EAAE,MAAM,EAClB,GAAG,EAAE,MAAM,EACX,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,GAC1B,OAAO,CAAC,IAAI,CAAC,CAwGf;AAWD,wBAAsB,YAAY,CAAC,UAAU,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,CAYjF;AAED,wBAAsB,YAAY,CAChC,MAAM,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,GAAG,CAAC,EAAE,OAAO,CAAC;IAAC,GAAG,CAAC,EAAE,MAAM,CAAC;IAAC,GAAG,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,GAClG,OAAO,CAAC,IAAI,CAAC,CAsBf;AAED,wBAAsB,kBAAkB,CACtC,IAAI,EAAE,MAAM,EAAE,GACb,OAAO,CACR,GAAG,CAAC,MAAM,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,OAAO,CAAC;IAAC,GAAG,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,CAAC,CAAC,CAC7G,CAqBA;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,EAAE,MAAM,CAAA;IAClB,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;IACX,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;CAC5B;AAED,wBAAsB,iBAAiB,CAAC,OAAO,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAwQ9E;AAED,UAAU,SAAS;IACjB,KAAK,CAAC,EAAE,MAAM,CAAA;IACd,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAChC,IAAI,CAAC,EAAE,MAAM,CAAA;IACb,KAAK,CAAC,EAAE,KAAK,GAAG,MAAM,CAAA;CACvB;AAED,wBAAsB,YAAY,CAChC,UAAU,EAAE,MAAM,EAClB,IAAI,GAAE,SAAc,GACnB,OAAO,CAAC;IAAE,OAAO,EAAE,GAAG,EAAE,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,CAoF9C;AAED,wBAAsB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,GAAG,IAAI,CAAC,CAgCrE;AAED,wBAAsB,gBAAgB,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC,CAqCzF;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,aAAa,CACjC,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,MAAM,EACb,IAAI,GAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,OAAO,CAAA;CAAO,GAC9D,OAAO,CAAC;IAAE,OAAO,EAAE,GAAG,EAAE,CAAC;IAAC,MAAM,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,CAmN9C;AAGD,wBAAsB,QAAQ,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,GAAE,GAAG,EAAO,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC,CAE9E;AAED,wBAAsB,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,GAAG,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAEzE;AAED,wBAAgB,SAAS,CAAC,UAAU,EAAE,MAAM,GAAG,WAAW,GAAG,SAAS,CAErE;AAED,wBAAsB,YAAY,CAAC,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAKpG;AAED,wBAAsB,iBAAiB,CACrC,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EAAE,GACf,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAc9B;AAED,wBAAsB,WAAW,CAAC,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,GAAG,IAAI,CAAC,CAKvG;AAED,wBAAsB,gBAAgB,CACpC,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EAAE,GACf,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC,CA6B7B;AAED,wBAAsB,kBAAkB,CACtC,UAAU,EAAE,MAAM,EAClB,KAAK,EAAE,MAAM,EACb,MAAM,EAAE,MAAM,EAAE,GACf,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CASpC;AAED,wBAAsB,eAAe,CACnC,UAAU,EAAE,MAAM,EAClB,UAAU,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,EAAE,GAC7C,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAOxB;AAKD,wBAAgB,cAAc,CAAC,CAAC,EAAE,GAAG,GAAG,GAAG,CAI1C;AAED,wBAAsB,YAAY,CAAC,cAAc,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC,CAW5G;AAED,wBAAgB,UAAU,CACxB,GAAG,EAAE,GAAG,EACR,SAAS,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC,EAC3C,SAAS,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,GACvD,GAAG,CAAC,OAAO,CAAC,GAAG,IAAI,CAiGrB;AAED,wBAAgB,UAAU,CAAC,OAAO,EAAE,OAAO,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,CAGhE;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CASpF;AAED,wBAAsB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC,CAKlE;AAED,wBAAsB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,GAAE,MAAW,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC,CAMtF;AAED,wBAAsB,qBAAqB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAOxE;AAED,wBAAsB,sBAAsB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAO3E;AAED,wBAAsB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAGlE;AAED,wBAAsB,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAW9E;AAED,wBAAsB,aAAa,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,CAQvF;AAED,wBAAsB,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAK9E"}
|
package/dist/db.js
CHANGED
|
@@ -114,7 +114,10 @@ async function all(sql, ...params) {
|
|
|
114
114
|
return enqueue('read', () => allDirect(sql, ...params));
|
|
115
115
|
}
|
|
116
116
|
export async function initDatabase(dbPath, tableSchemas, ddlStatements) {
|
|
117
|
-
instance = await DuckDBInstance.create(dbPath === ':memory:' ? undefined : dbPath
|
|
117
|
+
instance = await DuckDBInstance.create(dbPath === ':memory:' ? undefined : dbPath, {
|
|
118
|
+
memory_limit: '512MB',
|
|
119
|
+
threads: '2',
|
|
120
|
+
});
|
|
118
121
|
con = await instance.connect();
|
|
119
122
|
readCon = await instance.connect();
|
|
120
123
|
for (const schema of tableSchemas) {
|
|
@@ -200,6 +203,10 @@ export async function setRepoStatus(did, status, rev, opts) {
|
|
|
200
203
|
await run(`INSERT OR IGNORE INTO _repos (did, status) VALUES ($1, $2)`, did, status);
|
|
201
204
|
}
|
|
202
205
|
}
|
|
206
|
+
export async function getRepoRev(did) {
|
|
207
|
+
const rows = await all(`SELECT rev FROM _repos WHERE did = $1`, did);
|
|
208
|
+
return rows[0]?.rev ?? null;
|
|
209
|
+
}
|
|
203
210
|
export async function getRepoRetryInfo(did) {
|
|
204
211
|
const rows = await all(`SELECT retry_count, retry_after FROM _repos WHERE did = $1`, did);
|
|
205
212
|
if (rows.length === 0)
|
|
@@ -1321,41 +1328,3 @@ export async function filterTakendownDids(dids) {
|
|
|
1321
1328
|
const rows = await all(`SELECT did FROM _repos WHERE did IN (${placeholders}) AND status = 'takendown'`, ...dids);
|
|
1322
1329
|
return new Set(rows.map((r) => r.did));
|
|
1323
1330
|
}
|
|
1324
|
-
export async function backfillChildTables() {
|
|
1325
|
-
for (const [, schema] of schemas) {
|
|
1326
|
-
for (const child of schema.children) {
|
|
1327
|
-
// Check if child table needs backfill (significantly fewer rows than parent)
|
|
1328
|
-
const mainCount = (await all(`SELECT COUNT(*)::INTEGER as n FROM ${schema.tableName}`))[0]?.n || 0;
|
|
1329
|
-
if (mainCount === 0)
|
|
1330
|
-
continue;
|
|
1331
|
-
const childCount = (await all(`SELECT COUNT(DISTINCT parent_uri)::INTEGER as n FROM ${child.tableName}`))[0]?.n || 0;
|
|
1332
|
-
if (childCount >= mainCount * 0.9)
|
|
1333
|
-
continue;
|
|
1334
|
-
console.log(`[db] Backfilling ${child.tableName} from ${schema.tableName}...`);
|
|
1335
|
-
const snakeField = toSnakeCase(child.fieldName);
|
|
1336
|
-
const childColSelects = child.columns
|
|
1337
|
-
.map((c) => `json_extract_string(item.val, '$.${c.originalName}')`)
|
|
1338
|
-
.join(', ');
|
|
1339
|
-
const childColNames = ['parent_uri', 'parent_did', ...child.columns.map((c) => c.name)];
|
|
1340
|
-
const notNullFilters = child.columns
|
|
1341
|
-
.filter((c) => c.notNull)
|
|
1342
|
-
.map((c) => `json_extract_string(item.val, '$.${c.originalName}') IS NOT NULL`);
|
|
1343
|
-
const whereClause = [`p.${snakeField} IS NOT NULL`, ...notNullFilters].join(' AND ');
|
|
1344
|
-
try {
|
|
1345
|
-
await run(`DELETE FROM ${child.tableName}`);
|
|
1346
|
-
await run(`
|
|
1347
|
-
INSERT INTO ${child.tableName} (${childColNames.join(', ')})
|
|
1348
|
-
SELECT p.uri, p.did, ${childColSelects}
|
|
1349
|
-
FROM ${schema.tableName} p,
|
|
1350
|
-
unnest(from_json(p.${snakeField}::JSON, '["json"]')) AS item(val)
|
|
1351
|
-
WHERE ${whereClause}
|
|
1352
|
-
`);
|
|
1353
|
-
const result = await all(`SELECT COUNT(*)::INTEGER as n FROM ${child.tableName}`);
|
|
1354
|
-
console.log(`[db] Backfilled ${child.tableName}: ${result[0]?.n || 0} rows`);
|
|
1355
|
-
}
|
|
1356
|
-
catch (err) {
|
|
1357
|
-
console.warn(`[db] Backfill skipped for ${child.tableName}: ${err.message}`);
|
|
1358
|
-
}
|
|
1359
|
-
}
|
|
1360
|
-
}
|
|
1361
|
-
}
|
package/dist/main.js
CHANGED
|
@@ -5,7 +5,7 @@ import { log } from "./logger.js";
|
|
|
5
5
|
import { loadConfig } from "./config.js";
|
|
6
6
|
import { loadLexicons, storeLexicons, discoverCollections, generateTableSchema, generateCreateTableSQL, } from "./schema.js";
|
|
7
7
|
import { discoverViews } from "./views.js";
|
|
8
|
-
import { initDatabase, getCursor, querySQL
|
|
8
|
+
import { initDatabase, getCursor, querySQL } from "./db.js";
|
|
9
9
|
import { initFeeds, listFeeds } from "./feeds.js";
|
|
10
10
|
import { initXrpc, listXrpc, configureRelay } from "./xrpc.js";
|
|
11
11
|
import { initOpengraph } from "./opengraph.js";
|
|
@@ -75,8 +75,6 @@ if (config.database !== ':memory:') {
|
|
|
75
75
|
}
|
|
76
76
|
await initDatabase(config.database, schemas, ddlStatements);
|
|
77
77
|
log(`[main] DuckDB initialized (${config.database === ':memory:' ? 'in-memory' : config.database})`);
|
|
78
|
-
// 3a. Backfill child tables for decomposed arrays (one-time migration)
|
|
79
|
-
await backfillChildTables();
|
|
80
78
|
// 3b. Run setup hooks (after DB init, before server)
|
|
81
79
|
await initSetup(resolve(configDir, 'setup'));
|
|
82
80
|
// Detect orphaned tables
|