@hatk/hatk 0.0.1-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/backfill.d.ts +11 -0
- package/dist/backfill.d.ts.map +1 -0
- package/dist/backfill.js +328 -0
- package/dist/car.d.ts +5 -0
- package/dist/car.d.ts.map +1 -0
- package/dist/car.js +52 -0
- package/dist/cbor.d.ts +7 -0
- package/dist/cbor.d.ts.map +1 -0
- package/dist/cbor.js +89 -0
- package/dist/cid.d.ts +4 -0
- package/dist/cid.d.ts.map +1 -0
- package/dist/cid.js +39 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +1663 -0
- package/dist/config.d.ts +47 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +43 -0
- package/dist/db.d.ts +134 -0
- package/dist/db.d.ts.map +1 -0
- package/dist/db.js +1361 -0
- package/dist/feeds.d.ts +95 -0
- package/dist/feeds.d.ts.map +1 -0
- package/dist/feeds.js +144 -0
- package/dist/fts.d.ts +20 -0
- package/dist/fts.d.ts.map +1 -0
- package/dist/fts.js +762 -0
- package/dist/hydrate.d.ts +23 -0
- package/dist/hydrate.d.ts.map +1 -0
- package/dist/hydrate.js +75 -0
- package/dist/indexer.d.ts +14 -0
- package/dist/indexer.d.ts.map +1 -0
- package/dist/indexer.js +316 -0
- package/dist/labels.d.ts +29 -0
- package/dist/labels.d.ts.map +1 -0
- package/dist/labels.js +111 -0
- package/dist/lex-types.d.ts +401 -0
- package/dist/lex-types.d.ts.map +1 -0
- package/dist/lex-types.js +4 -0
- package/dist/lexicon-resolve.d.ts +14 -0
- package/dist/lexicon-resolve.d.ts.map +1 -0
- package/dist/lexicon-resolve.js +280 -0
- package/dist/logger.d.ts +4 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +23 -0
- package/dist/main.d.ts +3 -0
- package/dist/main.d.ts.map +1 -0
- package/dist/main.js +148 -0
- package/dist/mst.d.ts +6 -0
- package/dist/mst.d.ts.map +1 -0
- package/dist/mst.js +30 -0
- package/dist/oauth/client.d.ts +16 -0
- package/dist/oauth/client.d.ts.map +1 -0
- package/dist/oauth/client.js +54 -0
- package/dist/oauth/crypto.d.ts +28 -0
- package/dist/oauth/crypto.d.ts.map +1 -0
- package/dist/oauth/crypto.js +101 -0
- package/dist/oauth/db.d.ts +47 -0
- package/dist/oauth/db.d.ts.map +1 -0
- package/dist/oauth/db.js +139 -0
- package/dist/oauth/discovery.d.ts +22 -0
- package/dist/oauth/discovery.d.ts.map +1 -0
- package/dist/oauth/discovery.js +50 -0
- package/dist/oauth/dpop.d.ts +11 -0
- package/dist/oauth/dpop.d.ts.map +1 -0
- package/dist/oauth/dpop.js +56 -0
- package/dist/oauth/hooks.d.ts +10 -0
- package/dist/oauth/hooks.d.ts.map +1 -0
- package/dist/oauth/hooks.js +40 -0
- package/dist/oauth/server.d.ts +86 -0
- package/dist/oauth/server.d.ts.map +1 -0
- package/dist/oauth/server.js +572 -0
- package/dist/opengraph.d.ts +34 -0
- package/dist/opengraph.d.ts.map +1 -0
- package/dist/opengraph.js +198 -0
- package/dist/schema.d.ts +51 -0
- package/dist/schema.d.ts.map +1 -0
- package/dist/schema.js +358 -0
- package/dist/seed.d.ts +29 -0
- package/dist/seed.d.ts.map +1 -0
- package/dist/seed.js +86 -0
- package/dist/server.d.ts +6 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +1024 -0
- package/dist/setup.d.ts +8 -0
- package/dist/setup.d.ts.map +1 -0
- package/dist/setup.js +48 -0
- package/dist/test-browser.d.ts +14 -0
- package/dist/test-browser.d.ts.map +1 -0
- package/dist/test-browser.js +26 -0
- package/dist/test.d.ts +47 -0
- package/dist/test.d.ts.map +1 -0
- package/dist/test.js +256 -0
- package/dist/views.d.ts +40 -0
- package/dist/views.d.ts.map +1 -0
- package/dist/views.js +178 -0
- package/dist/vite-plugin.d.ts +5 -0
- package/dist/vite-plugin.d.ts.map +1 -0
- package/dist/vite-plugin.js +86 -0
- package/dist/xrpc-client.d.ts +18 -0
- package/dist/xrpc-client.d.ts.map +1 -0
- package/dist/xrpc-client.js +54 -0
- package/dist/xrpc.d.ts +53 -0
- package/dist/xrpc.d.ts.map +1 -0
- package/dist/xrpc.js +139 -0
- package/fonts/Inter-Regular.woff +0 -0
- package/package.json +41 -0
- package/public/admin-auth.js +320 -0
- package/public/admin.html +2166 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { BackfillConfig } from './config.ts';
|
|
2
|
+
interface BackfillOpts {
|
|
3
|
+
pdsUrl: string;
|
|
4
|
+
plcUrl: string;
|
|
5
|
+
collections: Set<string>;
|
|
6
|
+
config: BackfillConfig;
|
|
7
|
+
}
|
|
8
|
+
export declare function backfillRepo(did: string, collections: Set<string>, fetchTimeout: number): Promise<number>;
|
|
9
|
+
export declare function runBackfill(opts: BackfillOpts): Promise<void>;
|
|
10
|
+
export {};
|
|
11
|
+
//# sourceMappingURL=backfill.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"backfill.d.ts","sourceRoot":"","sources":["../src/backfill.ts"],"names":[],"mappings":"AAgBA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAEjD,UAAU,YAAY;IACpB,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;IACd,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAA;IACxB,MAAM,EAAE,cAAc,CAAA;CACvB;AA+ED,wBAAsB,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,YAAY,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAiH/G;AAwBD,wBAAsB,WAAW,CAAC,IAAI,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAiInE"}
|
package/dist/backfill.js
ADDED
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
import { parseCarFrame } from "./car.js";
|
|
2
|
+
import { cborDecode } from "./cbor.js";
|
|
3
|
+
import { walkMst } from "./mst.js";
|
|
4
|
+
import { setRepoStatus, getRepoStatus, getRepoRetryInfo, listRetryEligibleRepos, listPendingRepos, querySQL, runSQL, getSchema, bulkInsertRecords, } from "./db.js";
|
|
5
|
+
import { emit, timer } from "./logger.js";
|
|
6
|
+
const pdsCache = new Map();
|
|
7
|
+
let plcUrl;
|
|
8
|
+
async function resolvePds(did) {
|
|
9
|
+
const cached = pdsCache.get(did);
|
|
10
|
+
if (cached)
|
|
11
|
+
return cached;
|
|
12
|
+
let didDoc;
|
|
13
|
+
if (did.startsWith('did:web:')) {
|
|
14
|
+
const domain = did.slice('did:web:'.length);
|
|
15
|
+
const res = await fetch(`https://${domain}/.well-known/did.json`);
|
|
16
|
+
if (!res.ok)
|
|
17
|
+
throw new Error(`did:web resolution failed for ${did}: ${res.status}`);
|
|
18
|
+
didDoc = await res.json();
|
|
19
|
+
}
|
|
20
|
+
else {
|
|
21
|
+
const res = await fetch(`${plcUrl}/${did}`);
|
|
22
|
+
if (!res.ok)
|
|
23
|
+
throw new Error(`PLC resolution failed for ${did}: ${res.status}`);
|
|
24
|
+
didDoc = await res.json();
|
|
25
|
+
}
|
|
26
|
+
const pds = didDoc.service?.find((s) => s.id === '#atproto_pds')?.serviceEndpoint;
|
|
27
|
+
if (!pds)
|
|
28
|
+
throw new Error(`No PDS endpoint in DID document for ${did}`);
|
|
29
|
+
// Extract handle from alsoKnownAs (format: "at://handle")
|
|
30
|
+
const aka = didDoc.alsoKnownAs?.find((u) => u.startsWith('at://'));
|
|
31
|
+
const handle = aka ? aka.slice('at://'.length) : null;
|
|
32
|
+
const result = { pds, handle };
|
|
33
|
+
pdsCache.set(did, result);
|
|
34
|
+
return result;
|
|
35
|
+
}
|
|
36
|
+
// --- Repo Enumeration ---
|
|
37
|
+
async function* listRepos(pdsUrl) {
|
|
38
|
+
let cursor;
|
|
39
|
+
while (true) {
|
|
40
|
+
const params = new URLSearchParams({ limit: '100' });
|
|
41
|
+
if (cursor)
|
|
42
|
+
params.set('cursor', cursor);
|
|
43
|
+
const res = await fetch(`${pdsUrl}/xrpc/com.atproto.sync.listRepos?${params}`);
|
|
44
|
+
if (!res.ok)
|
|
45
|
+
throw new Error(`listRepos failed: ${res.status}`);
|
|
46
|
+
const data = await res.json();
|
|
47
|
+
for (const repo of data.repos || []) {
|
|
48
|
+
if (repo.active !== false)
|
|
49
|
+
yield { did: repo.did, rev: repo.rev };
|
|
50
|
+
}
|
|
51
|
+
if (!data.cursor || (data.repos || []).length === 0)
|
|
52
|
+
break;
|
|
53
|
+
cursor = data.cursor;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
async function* listReposByCollection(pdsUrl, collection) {
|
|
57
|
+
let cursor;
|
|
58
|
+
while (true) {
|
|
59
|
+
const params = new URLSearchParams({ collection, limit: '100' });
|
|
60
|
+
if (cursor)
|
|
61
|
+
params.set('cursor', cursor);
|
|
62
|
+
const res = await fetch(`${pdsUrl}/xrpc/com.atproto.sync.listReposByCollection?${params}`);
|
|
63
|
+
if (!res.ok)
|
|
64
|
+
throw new Error(`listReposByCollection failed: ${res.status}`);
|
|
65
|
+
const data = await res.json();
|
|
66
|
+
for (const repo of data.repos || []) {
|
|
67
|
+
yield { did: repo.did, rev: repo.rev || '' };
|
|
68
|
+
}
|
|
69
|
+
if (!data.cursor || (data.repos || []).length === 0)
|
|
70
|
+
break;
|
|
71
|
+
cursor = data.cursor;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// --- Single Repo Backfill ---
|
|
75
|
+
export async function backfillRepo(did, collections, fetchTimeout) {
|
|
76
|
+
const elapsed = timer();
|
|
77
|
+
let count = 0;
|
|
78
|
+
let carSizeBytes;
|
|
79
|
+
let status = 'success';
|
|
80
|
+
let error;
|
|
81
|
+
let resolvedPds;
|
|
82
|
+
let resolvedHandle = null;
|
|
83
|
+
let retryCount;
|
|
84
|
+
let retryAfter;
|
|
85
|
+
const controller = new AbortController();
|
|
86
|
+
let timeout;
|
|
87
|
+
try {
|
|
88
|
+
const { pds: pdsUrl, handle } = await resolvePds(did);
|
|
89
|
+
resolvedPds = pdsUrl;
|
|
90
|
+
resolvedHandle = handle;
|
|
91
|
+
timeout = setTimeout(() => controller.abort(), fetchTimeout * 1000);
|
|
92
|
+
const res = await fetch(`${resolvedPds}/xrpc/com.atproto.sync.getRepo?did=${encodeURIComponent(did)}`, {
|
|
93
|
+
signal: controller.signal,
|
|
94
|
+
});
|
|
95
|
+
if (!res.ok) {
|
|
96
|
+
const httpErr = new Error(`getRepo failed for ${did}: ${res.status}`);
|
|
97
|
+
httpErr.httpStatus = res.status;
|
|
98
|
+
throw httpErr;
|
|
99
|
+
}
|
|
100
|
+
const carBytes = new Uint8Array(await res.arrayBuffer());
|
|
101
|
+
carSizeBytes = carBytes.length;
|
|
102
|
+
const { roots, blocks } = parseCarFrame(carBytes);
|
|
103
|
+
// Decode commit to get MST root
|
|
104
|
+
const rootData = blocks.get(roots[0]);
|
|
105
|
+
if (!rootData)
|
|
106
|
+
throw new Error(`No root block for ${did}`);
|
|
107
|
+
const { value: commit } = cborDecode(rootData);
|
|
108
|
+
// Walk MST to find all record paths
|
|
109
|
+
const entries = walkMst(blocks, commit.data.$link);
|
|
110
|
+
const bulk = [];
|
|
111
|
+
for (const entry of entries) {
|
|
112
|
+
const collection = entry.path.split('/')[0];
|
|
113
|
+
if (!collections.has(collection))
|
|
114
|
+
continue;
|
|
115
|
+
const blockData = blocks.get(entry.cid);
|
|
116
|
+
if (!blockData)
|
|
117
|
+
continue;
|
|
118
|
+
try {
|
|
119
|
+
const { value: record } = cborDecode(blockData);
|
|
120
|
+
if (!record?.$type)
|
|
121
|
+
continue;
|
|
122
|
+
const rkey = entry.path.split('/').slice(1).join('/');
|
|
123
|
+
const uri = `at://${did}/${collection}/${rkey}`;
|
|
124
|
+
bulk.push({ collection, uri, cid: entry.cid, did, record });
|
|
125
|
+
}
|
|
126
|
+
catch (recordErr) {
|
|
127
|
+
emit('backfill', 'record_error', {
|
|
128
|
+
did,
|
|
129
|
+
uri: `at://${did}/${entry.path}`,
|
|
130
|
+
collection,
|
|
131
|
+
error: recordErr.message,
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
// Delete existing records for this DID before re-importing so deletions are reflected
|
|
136
|
+
for (const col of collections) {
|
|
137
|
+
const schema = getSchema(col);
|
|
138
|
+
if (!schema)
|
|
139
|
+
continue;
|
|
140
|
+
await runSQL(`DELETE FROM ${schema.tableName} WHERE did = $1`, did);
|
|
141
|
+
for (const child of schema.children) {
|
|
142
|
+
await runSQL(`DELETE FROM ${child.tableName} WHERE parent_did = $1`, did);
|
|
143
|
+
}
|
|
144
|
+
for (const union of schema.unions) {
|
|
145
|
+
for (const branch of union.branches) {
|
|
146
|
+
await runSQL(`DELETE FROM ${branch.tableName} WHERE parent_did = $1`, did);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
count = await bulkInsertRecords(bulk);
|
|
151
|
+
await setRepoStatus(did, 'active', commit.rev, { handle });
|
|
152
|
+
return count;
|
|
153
|
+
}
|
|
154
|
+
catch (err) {
|
|
155
|
+
status = 'error';
|
|
156
|
+
error = err.message;
|
|
157
|
+
// Don't retry permanent failures (4xx = client error, repo doesn't exist / is deactivated)
|
|
158
|
+
const isPermanent = err.httpStatus && err.httpStatus >= 400 && err.httpStatus < 500;
|
|
159
|
+
if (isPermanent) {
|
|
160
|
+
retryCount = 999;
|
|
161
|
+
await setRepoStatus(did, 'failed', undefined, { retryCount: 999, retryAfter: 0, handle: resolvedHandle });
|
|
162
|
+
}
|
|
163
|
+
else {
|
|
164
|
+
const info = await getRepoRetryInfo(did);
|
|
165
|
+
retryCount = (info?.retryCount ?? 0) + 1;
|
|
166
|
+
const backoffSecs = Math.min(retryCount * 60, 3600);
|
|
167
|
+
retryAfter = Math.floor(Date.now() / 1000) + backoffSecs;
|
|
168
|
+
await setRepoStatus(did, 'failed', undefined, { retryCount, retryAfter, handle: resolvedHandle });
|
|
169
|
+
}
|
|
170
|
+
throw err;
|
|
171
|
+
}
|
|
172
|
+
finally {
|
|
173
|
+
clearTimeout(timeout);
|
|
174
|
+
emit('backfill', 'repo', {
|
|
175
|
+
did,
|
|
176
|
+
record_count: count,
|
|
177
|
+
duration_ms: elapsed(),
|
|
178
|
+
status,
|
|
179
|
+
error,
|
|
180
|
+
pds_url: resolvedPds,
|
|
181
|
+
car_size_bytes: carSizeBytes,
|
|
182
|
+
retry_count: retryCount,
|
|
183
|
+
retry_after: retryAfter,
|
|
184
|
+
permanent_failure: retryCount === 999 ? true : undefined,
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
// --- Worker Pool ---
|
|
189
|
+
async function runWorkerPool(items, parallelism, fn) {
|
|
190
|
+
let index = 0;
|
|
191
|
+
async function worker() {
|
|
192
|
+
while (index < items.length) {
|
|
193
|
+
const item = items[index++];
|
|
194
|
+
try {
|
|
195
|
+
await fn(item);
|
|
196
|
+
}
|
|
197
|
+
catch {
|
|
198
|
+
// Errors captured by backfill.repo event
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
const workers = Array.from({ length: Math.min(parallelism, items.length) }, () => worker());
|
|
203
|
+
await Promise.all(workers);
|
|
204
|
+
}
|
|
205
|
+
// --- Main Backfill Entry Point ---
|
|
206
|
+
export async function runBackfill(opts) {
|
|
207
|
+
const { pdsUrl, collections, config } = opts;
|
|
208
|
+
plcUrl = opts.plcUrl;
|
|
209
|
+
const signalCollections = config.signalCollections || [...collections];
|
|
210
|
+
const elapsed = timer();
|
|
211
|
+
const mode = config.repos?.length ? 'pinned repos' : config.fullNetwork ? 'full network' : 'collection signal';
|
|
212
|
+
// 1. Enumerate repos
|
|
213
|
+
const dids = new Set();
|
|
214
|
+
if (config.repos?.length) {
|
|
215
|
+
for (const did of config.repos) {
|
|
216
|
+
dids.add(did);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
else if (config.fullNetwork) {
|
|
220
|
+
for await (const repo of listRepos(pdsUrl)) {
|
|
221
|
+
dids.add(repo.did);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
else {
|
|
225
|
+
for (const col of signalCollections) {
|
|
226
|
+
try {
|
|
227
|
+
for await (const repo of listReposByCollection(pdsUrl, col)) {
|
|
228
|
+
dids.add(repo.did);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
catch (err) {
|
|
232
|
+
// Fall back to listRepos if listReposByCollection not supported
|
|
233
|
+
if (err.message.includes('400') || err.message.includes('401') || err.message.includes('501')) {
|
|
234
|
+
for await (const repo of listRepos(pdsUrl)) {
|
|
235
|
+
dids.add(repo.did);
|
|
236
|
+
}
|
|
237
|
+
break;
|
|
238
|
+
}
|
|
239
|
+
throw err;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
// 2. Filter to repos that haven't been backfilled + pick up existing pending repos
|
|
244
|
+
const pending = [];
|
|
245
|
+
for (const did of dids) {
|
|
246
|
+
const status = await getRepoStatus(did);
|
|
247
|
+
if (status !== 'active') {
|
|
248
|
+
if (!status)
|
|
249
|
+
await setRepoStatus(did, 'pending');
|
|
250
|
+
pending.push(did);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
// Also re-queue any repos left pending from previous runs
|
|
254
|
+
const existingPending = await listPendingRepos();
|
|
255
|
+
for (const did of existingPending) {
|
|
256
|
+
if (!pending.includes(did))
|
|
257
|
+
pending.push(did);
|
|
258
|
+
}
|
|
259
|
+
if (pending.length === 0) {
|
|
260
|
+
emit('backfill', 'run', {
|
|
261
|
+
mode,
|
|
262
|
+
total_repos: dids.size,
|
|
263
|
+
pending_repos: 0,
|
|
264
|
+
total_records: 0,
|
|
265
|
+
failed_count: 0,
|
|
266
|
+
duration_ms: elapsed(),
|
|
267
|
+
parallelism: config.parallelism,
|
|
268
|
+
status: 'success',
|
|
269
|
+
});
|
|
270
|
+
return;
|
|
271
|
+
}
|
|
272
|
+
// 3. Backfill with worker pool
|
|
273
|
+
let totalRecords = 0;
|
|
274
|
+
let failedCount = 0;
|
|
275
|
+
await runWorkerPool(pending, config.parallelism, async (did) => {
|
|
276
|
+
try {
|
|
277
|
+
const count = await backfillRepo(did, collections, config.fetchTimeout);
|
|
278
|
+
totalRecords += count;
|
|
279
|
+
}
|
|
280
|
+
catch {
|
|
281
|
+
failedCount++;
|
|
282
|
+
}
|
|
283
|
+
});
|
|
284
|
+
// 4. Retry failed repos with exponential backoff
|
|
285
|
+
const maxRetries = config.maxRetries;
|
|
286
|
+
let retryRound = 0;
|
|
287
|
+
while (true) {
|
|
288
|
+
const eligible = await listRetryEligibleRepos(maxRetries);
|
|
289
|
+
if (eligible.length === 0)
|
|
290
|
+
break;
|
|
291
|
+
retryRound++;
|
|
292
|
+
// Wait until the earliest retry_after has passed
|
|
293
|
+
const now = Math.floor(Date.now() / 1000);
|
|
294
|
+
const rows = await querySQL(`SELECT MIN(retry_after) as earliest FROM _repos WHERE status = 'failed' AND retry_after > $1 AND retry_count < $2`, [now, maxRetries]);
|
|
295
|
+
const earliest = rows[0]?.earliest ? Number(rows[0].earliest) : 0;
|
|
296
|
+
if (earliest > now) {
|
|
297
|
+
await new Promise((resolve) => setTimeout(resolve, (earliest - now) * 1000));
|
|
298
|
+
}
|
|
299
|
+
const retryEligible = await listRetryEligibleRepos(maxRetries);
|
|
300
|
+
if (retryEligible.length === 0)
|
|
301
|
+
break;
|
|
302
|
+
emit('backfill', 'retry_round', {
|
|
303
|
+
round: retryRound,
|
|
304
|
+
eligible_repos: retryEligible.length,
|
|
305
|
+
});
|
|
306
|
+
await runWorkerPool(retryEligible, config.parallelism, async (did) => {
|
|
307
|
+
try {
|
|
308
|
+
const count = await backfillRepo(did, collections, config.fetchTimeout);
|
|
309
|
+
totalRecords += count;
|
|
310
|
+
failedCount--;
|
|
311
|
+
}
|
|
312
|
+
catch {
|
|
313
|
+
// retry info already updated in backfillRepo
|
|
314
|
+
}
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
emit('backfill', 'run', {
|
|
318
|
+
mode,
|
|
319
|
+
total_repos: dids.size,
|
|
320
|
+
pending_repos: pending.length,
|
|
321
|
+
total_records: totalRecords,
|
|
322
|
+
failed_count: failedCount,
|
|
323
|
+
duration_ms: elapsed(),
|
|
324
|
+
parallelism: config.parallelism,
|
|
325
|
+
retry_rounds: retryRound,
|
|
326
|
+
status: failedCount > 0 ? 'partial' : 'success',
|
|
327
|
+
});
|
|
328
|
+
}
|
package/dist/car.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"car.d.ts","sourceRoot":"","sources":["../src/car.ts"],"names":[],"mappings":"AAgCA,wBAAgB,aAAa,CAAC,QAAQ,EAAE,UAAU,GAAG;IACnD,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,CAAA;CAChC,CAmCA"}
|
package/dist/car.js
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
// CAR (Content Addressable aRchive) parser from scratch
|
|
2
|
+
// CAR files bundle content-addressed blocks — used in firehose events
|
|
3
|
+
import { cborDecode } from "./cbor.js";
|
|
4
|
+
import { cidToString, readVarint } from "./cid.js";
|
|
5
|
+
function parseCidFromBytes(bytes, offset) {
|
|
6
|
+
const firstByte = bytes[offset];
|
|
7
|
+
if (firstByte === 0x12) {
|
|
8
|
+
// CIDv0: SHA-256 multihash (0x12 = sha2-256, 0x20 = 32 bytes)
|
|
9
|
+
return [bytes.slice(offset, offset + 34), offset + 34];
|
|
10
|
+
}
|
|
11
|
+
// CIDv1: version + codec + multihash
|
|
12
|
+
let pos = offset;
|
|
13
|
+
const [version, afterVersion] = readVarint(bytes, pos);
|
|
14
|
+
if (version !== 1)
|
|
15
|
+
throw new Error(`Unsupported CID version: ${version}`);
|
|
16
|
+
pos = afterVersion;
|
|
17
|
+
const [_codec, afterCodec] = readVarint(bytes, pos);
|
|
18
|
+
pos = afterCodec;
|
|
19
|
+
const [_hashFn, afterHashFn] = readVarint(bytes, pos);
|
|
20
|
+
pos = afterHashFn;
|
|
21
|
+
const [digestLen, afterDigestLen] = readVarint(bytes, pos);
|
|
22
|
+
pos = afterDigestLen + digestLen;
|
|
23
|
+
return [bytes.slice(offset, pos), pos];
|
|
24
|
+
}
|
|
25
|
+
export function parseCarFrame(carBytes) {
|
|
26
|
+
let offset = 0;
|
|
27
|
+
// Read header length (varint-prefixed CBOR)
|
|
28
|
+
const [headerLen, afterHeaderLen] = readVarint(carBytes, offset);
|
|
29
|
+
offset = afterHeaderLen;
|
|
30
|
+
// Decode header CBOR
|
|
31
|
+
const headerSlice = carBytes.slice(offset, offset + headerLen);
|
|
32
|
+
const { value: header } = cborDecode(headerSlice);
|
|
33
|
+
offset += headerLen;
|
|
34
|
+
// Our CBOR decoder converts tag-42 CIDs to { $link: "b..." } objects,
|
|
35
|
+
// so roots may already be decoded strings
|
|
36
|
+
const roots = (header.roots || []).map((root) => root?.$link ?? cidToString(root));
|
|
37
|
+
// Parse blocks: each is varint(len) + CID + data
|
|
38
|
+
const blocks = new Map();
|
|
39
|
+
while (offset < carBytes.length) {
|
|
40
|
+
const [blockLen, afterBlockLen] = readVarint(carBytes, offset);
|
|
41
|
+
offset = afterBlockLen;
|
|
42
|
+
if (blockLen === 0)
|
|
43
|
+
break;
|
|
44
|
+
const [cidBytes, afterCid] = parseCidFromBytes(carBytes, offset);
|
|
45
|
+
const cid = cidToString(cidBytes);
|
|
46
|
+
const dataLen = blockLen - (afterCid - offset);
|
|
47
|
+
const data = carBytes.slice(afterCid, afterCid + dataLen);
|
|
48
|
+
blocks.set(cid, data);
|
|
49
|
+
offset = afterCid + dataLen;
|
|
50
|
+
}
|
|
51
|
+
return { roots, blocks };
|
|
52
|
+
}
|
package/dist/cbor.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cbor.d.ts","sourceRoot":"","sources":["../src/cbor.ts"],"names":[],"mappings":"AAQA,UAAU,YAAY;IACpB,KAAK,EAAE,GAAG,CAAA;IACV,MAAM,EAAE,MAAM,CAAA;CACf;AAED,wBAAgB,UAAU,CAAC,KAAK,EAAE,UAAU,EAAE,WAAW,SAAI,GAAG,YAAY,CAgF3E"}
|
package/dist/cbor.js
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
// CBOR decoder from scratch (RFC 8949)
|
|
2
|
+
// Returns { value, offset } so we can split firehose frames
|
|
3
|
+
// (two concatenated CBOR values: header + body)
|
|
4
|
+
import { cidToString } from "./cid.js";
|
|
5
|
+
const CBOR_TAG_CID = 42;
|
|
6
|
+
export function cborDecode(bytes, startOffset = 0) {
|
|
7
|
+
let offset = startOffset;
|
|
8
|
+
function read() {
|
|
9
|
+
const initial = bytes[offset++];
|
|
10
|
+
const major = initial >> 5;
|
|
11
|
+
const info = initial & 0x1f;
|
|
12
|
+
let length = info;
|
|
13
|
+
if (info === 24)
|
|
14
|
+
length = bytes[offset++];
|
|
15
|
+
else if (info === 25) {
|
|
16
|
+
length = (bytes[offset++] << 8) | bytes[offset++];
|
|
17
|
+
}
|
|
18
|
+
else if (info === 26) {
|
|
19
|
+
length = bytes[offset++] * 0x1000000 + bytes[offset++] * 0x10000 + bytes[offset++] * 0x100 + bytes[offset++];
|
|
20
|
+
}
|
|
21
|
+
else if (info === 27) {
|
|
22
|
+
// 8-byte integer — read as Number (safe up to 2^53)
|
|
23
|
+
length =
|
|
24
|
+
bytes[offset++] * 0x100000000000000 +
|
|
25
|
+
bytes[offset++] * 0x1000000000000 +
|
|
26
|
+
bytes[offset++] * 0x10000000000 +
|
|
27
|
+
bytes[offset++] * 0x100000000 +
|
|
28
|
+
bytes[offset++] * 0x1000000 +
|
|
29
|
+
bytes[offset++] * 0x10000 +
|
|
30
|
+
bytes[offset++] * 0x100 +
|
|
31
|
+
bytes[offset++];
|
|
32
|
+
}
|
|
33
|
+
switch (major) {
|
|
34
|
+
case 0:
|
|
35
|
+
return length; // unsigned int
|
|
36
|
+
case 1:
|
|
37
|
+
return -1 - length; // negative int
|
|
38
|
+
case 2: {
|
|
39
|
+
// byte string — use subarray (view, no copy)
|
|
40
|
+
const data = bytes.subarray(offset, offset + length);
|
|
41
|
+
offset += length;
|
|
42
|
+
return data;
|
|
43
|
+
}
|
|
44
|
+
case 3: {
|
|
45
|
+
// text string
|
|
46
|
+
const data = new TextDecoder().decode(bytes.subarray(offset, offset + length));
|
|
47
|
+
offset += length;
|
|
48
|
+
return data;
|
|
49
|
+
}
|
|
50
|
+
case 4: {
|
|
51
|
+
// array
|
|
52
|
+
const arr = [];
|
|
53
|
+
for (let i = 0; i < length; i++)
|
|
54
|
+
arr.push(read());
|
|
55
|
+
return arr;
|
|
56
|
+
}
|
|
57
|
+
case 5: {
|
|
58
|
+
// map
|
|
59
|
+
const obj = {};
|
|
60
|
+
for (let i = 0; i < length; i++) {
|
|
61
|
+
const key = read();
|
|
62
|
+
obj[key] = read();
|
|
63
|
+
}
|
|
64
|
+
return obj;
|
|
65
|
+
}
|
|
66
|
+
case 6: {
|
|
67
|
+
// tag
|
|
68
|
+
const taggedValue = read();
|
|
69
|
+
if (length === CBOR_TAG_CID) {
|
|
70
|
+
// DAG-CBOR CID link: strip 0x00 multibase prefix, return as { $link }
|
|
71
|
+
return { $link: cidToString(taggedValue.slice(1)) };
|
|
72
|
+
}
|
|
73
|
+
return taggedValue;
|
|
74
|
+
}
|
|
75
|
+
case 7: {
|
|
76
|
+
// special values
|
|
77
|
+
if (info === 20)
|
|
78
|
+
return false;
|
|
79
|
+
if (info === 21)
|
|
80
|
+
return true;
|
|
81
|
+
if (info === 22)
|
|
82
|
+
return null;
|
|
83
|
+
return undefined;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
const value = read();
|
|
88
|
+
return { value, offset };
|
|
89
|
+
}
|
package/dist/cid.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cid.d.ts","sourceRoot":"","sources":["../src/cid.ts"],"names":[],"mappings":"AAKA,wBAAgB,YAAY,CAAC,KAAK,EAAE,UAAU,GAAG,MAAM,CAmBtD;AAED,wBAAgB,WAAW,CAAC,QAAQ,EAAE,UAAU,GAAG,MAAM,CAGxD;AAED,wBAAgB,UAAU,CAAC,KAAK,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAc9E"}
|
package/dist/cid.js
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
// CID (Content Identifier) + base32 + varint — from scratch
|
|
2
|
+
// CIDs are self-describing content hashes used throughout AT Protocol
|
|
3
|
+
const BASE32_ALPHABET = 'abcdefghijklmnopqrstuvwxyz234567';
|
|
4
|
+
export function base32Encode(bytes) {
|
|
5
|
+
let result = '';
|
|
6
|
+
let bits = 0;
|
|
7
|
+
let value = 0;
|
|
8
|
+
for (const byte of bytes) {
|
|
9
|
+
value = (value << 8) | byte;
|
|
10
|
+
bits += 8;
|
|
11
|
+
while (bits >= 5) {
|
|
12
|
+
bits -= 5;
|
|
13
|
+
result += BASE32_ALPHABET[(value >> bits) & 31];
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
if (bits > 0) {
|
|
17
|
+
result += BASE32_ALPHABET[(value << (5 - bits)) & 31];
|
|
18
|
+
}
|
|
19
|
+
return result;
|
|
20
|
+
}
|
|
21
|
+
export function cidToString(cidBytes) {
|
|
22
|
+
// base32lower with 'b' multibase prefix
|
|
23
|
+
return `b${base32Encode(cidBytes)}`;
|
|
24
|
+
}
|
|
25
|
+
export function readVarint(bytes, offset) {
|
|
26
|
+
let value = 0;
|
|
27
|
+
let shift = 0;
|
|
28
|
+
let pos = offset;
|
|
29
|
+
while (pos < bytes.length) {
|
|
30
|
+
const byte = bytes[pos++];
|
|
31
|
+
value |= (byte & 0x7f) << shift;
|
|
32
|
+
if ((byte & 0x80) === 0)
|
|
33
|
+
return [value, pos];
|
|
34
|
+
shift += 7;
|
|
35
|
+
if (shift > 35)
|
|
36
|
+
throw new Error('Varint too long');
|
|
37
|
+
}
|
|
38
|
+
throw new Error('Unexpected end of varint');
|
|
39
|
+
}
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":""}
|