@hatk/hatk 0.0.1-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/dist/backfill.d.ts +11 -0
  2. package/dist/backfill.d.ts.map +1 -0
  3. package/dist/backfill.js +328 -0
  4. package/dist/car.d.ts +5 -0
  5. package/dist/car.d.ts.map +1 -0
  6. package/dist/car.js +52 -0
  7. package/dist/cbor.d.ts +7 -0
  8. package/dist/cbor.d.ts.map +1 -0
  9. package/dist/cbor.js +89 -0
  10. package/dist/cid.d.ts +4 -0
  11. package/dist/cid.d.ts.map +1 -0
  12. package/dist/cid.js +39 -0
  13. package/dist/cli.d.ts +3 -0
  14. package/dist/cli.d.ts.map +1 -0
  15. package/dist/cli.js +1663 -0
  16. package/dist/config.d.ts +47 -0
  17. package/dist/config.d.ts.map +1 -0
  18. package/dist/config.js +43 -0
  19. package/dist/db.d.ts +134 -0
  20. package/dist/db.d.ts.map +1 -0
  21. package/dist/db.js +1361 -0
  22. package/dist/feeds.d.ts +95 -0
  23. package/dist/feeds.d.ts.map +1 -0
  24. package/dist/feeds.js +144 -0
  25. package/dist/fts.d.ts +20 -0
  26. package/dist/fts.d.ts.map +1 -0
  27. package/dist/fts.js +762 -0
  28. package/dist/hydrate.d.ts +23 -0
  29. package/dist/hydrate.d.ts.map +1 -0
  30. package/dist/hydrate.js +75 -0
  31. package/dist/indexer.d.ts +14 -0
  32. package/dist/indexer.d.ts.map +1 -0
  33. package/dist/indexer.js +316 -0
  34. package/dist/labels.d.ts +29 -0
  35. package/dist/labels.d.ts.map +1 -0
  36. package/dist/labels.js +111 -0
  37. package/dist/lex-types.d.ts +401 -0
  38. package/dist/lex-types.d.ts.map +1 -0
  39. package/dist/lex-types.js +4 -0
  40. package/dist/lexicon-resolve.d.ts +14 -0
  41. package/dist/lexicon-resolve.d.ts.map +1 -0
  42. package/dist/lexicon-resolve.js +280 -0
  43. package/dist/logger.d.ts +4 -0
  44. package/dist/logger.d.ts.map +1 -0
  45. package/dist/logger.js +23 -0
  46. package/dist/main.d.ts +3 -0
  47. package/dist/main.d.ts.map +1 -0
  48. package/dist/main.js +148 -0
  49. package/dist/mst.d.ts +6 -0
  50. package/dist/mst.d.ts.map +1 -0
  51. package/dist/mst.js +30 -0
  52. package/dist/oauth/client.d.ts +16 -0
  53. package/dist/oauth/client.d.ts.map +1 -0
  54. package/dist/oauth/client.js +54 -0
  55. package/dist/oauth/crypto.d.ts +28 -0
  56. package/dist/oauth/crypto.d.ts.map +1 -0
  57. package/dist/oauth/crypto.js +101 -0
  58. package/dist/oauth/db.d.ts +47 -0
  59. package/dist/oauth/db.d.ts.map +1 -0
  60. package/dist/oauth/db.js +139 -0
  61. package/dist/oauth/discovery.d.ts +22 -0
  62. package/dist/oauth/discovery.d.ts.map +1 -0
  63. package/dist/oauth/discovery.js +50 -0
  64. package/dist/oauth/dpop.d.ts +11 -0
  65. package/dist/oauth/dpop.d.ts.map +1 -0
  66. package/dist/oauth/dpop.js +56 -0
  67. package/dist/oauth/hooks.d.ts +10 -0
  68. package/dist/oauth/hooks.d.ts.map +1 -0
  69. package/dist/oauth/hooks.js +40 -0
  70. package/dist/oauth/server.d.ts +86 -0
  71. package/dist/oauth/server.d.ts.map +1 -0
  72. package/dist/oauth/server.js +572 -0
  73. package/dist/opengraph.d.ts +34 -0
  74. package/dist/opengraph.d.ts.map +1 -0
  75. package/dist/opengraph.js +198 -0
  76. package/dist/schema.d.ts +51 -0
  77. package/dist/schema.d.ts.map +1 -0
  78. package/dist/schema.js +358 -0
  79. package/dist/seed.d.ts +29 -0
  80. package/dist/seed.d.ts.map +1 -0
  81. package/dist/seed.js +86 -0
  82. package/dist/server.d.ts +6 -0
  83. package/dist/server.d.ts.map +1 -0
  84. package/dist/server.js +1024 -0
  85. package/dist/setup.d.ts +8 -0
  86. package/dist/setup.d.ts.map +1 -0
  87. package/dist/setup.js +48 -0
  88. package/dist/test-browser.d.ts +14 -0
  89. package/dist/test-browser.d.ts.map +1 -0
  90. package/dist/test-browser.js +26 -0
  91. package/dist/test.d.ts +47 -0
  92. package/dist/test.d.ts.map +1 -0
  93. package/dist/test.js +256 -0
  94. package/dist/views.d.ts +40 -0
  95. package/dist/views.d.ts.map +1 -0
  96. package/dist/views.js +178 -0
  97. package/dist/vite-plugin.d.ts +5 -0
  98. package/dist/vite-plugin.d.ts.map +1 -0
  99. package/dist/vite-plugin.js +86 -0
  100. package/dist/xrpc-client.d.ts +18 -0
  101. package/dist/xrpc-client.d.ts.map +1 -0
  102. package/dist/xrpc-client.js +54 -0
  103. package/dist/xrpc.d.ts +53 -0
  104. package/dist/xrpc.d.ts.map +1 -0
  105. package/dist/xrpc.js +139 -0
  106. package/fonts/Inter-Regular.woff +0 -0
  107. package/package.json +41 -0
  108. package/public/admin-auth.js +320 -0
  109. package/public/admin.html +2166 -0
package/dist/db.js ADDED
@@ -0,0 +1,1361 @@
1
+ import { DuckDBInstance } from '@duckdb/node-api';
2
+ import { toSnakeCase } from "./schema.js";
3
+ import { getSearchColumns, stripStopWords } from "./fts.js";
4
+ import { emit, timer } from "./logger.js";
5
+ import { OAUTH_DDL } from "./oauth/db.js";
6
+ let instance;
7
+ let con;
8
+ let readCon;
9
+ const schemas = new Map();
10
+ export function closeDatabase() {
11
+ try {
12
+ readCon?.closeSync();
13
+ }
14
+ catch { }
15
+ try {
16
+ con?.closeSync();
17
+ }
18
+ catch { }
19
+ try {
20
+ instance?.closeSync();
21
+ }
22
+ catch { }
23
+ }
24
+ let writeQueue = Promise.resolve();
25
+ let readQueue = Promise.resolve();
26
+ function enqueue(queue, fn) {
27
+ if (queue === 'write') {
28
+ const p = writeQueue.then(fn);
29
+ writeQueue = p.then(() => { }, () => { });
30
+ return p;
31
+ }
32
+ else {
33
+ const p = readQueue.then(fn);
34
+ readQueue = p.then(() => { }, () => { });
35
+ return p;
36
+ }
37
+ }
38
+ function bindParams(prepared, params) {
39
+ for (let i = 0; i < params.length; i++) {
40
+ const idx = i + 1;
41
+ const value = params[i];
42
+ if (value === null || value === undefined) {
43
+ prepared.bindNull(idx);
44
+ }
45
+ else if (typeof value === 'string') {
46
+ prepared.bindVarchar(idx, value);
47
+ }
48
+ else if (typeof value === 'number') {
49
+ if (Number.isInteger(value)) {
50
+ prepared.bindInteger(idx, value);
51
+ }
52
+ else {
53
+ prepared.bindDouble(idx, value);
54
+ }
55
+ }
56
+ else if (typeof value === 'boolean') {
57
+ prepared.bindBoolean(idx, value);
58
+ }
59
+ else if (typeof value === 'bigint') {
60
+ prepared.bindBigInt(idx, value);
61
+ }
62
+ else if (value instanceof Uint8Array) {
63
+ prepared.bindBlob(idx, value);
64
+ }
65
+ else {
66
+ prepared.bindVarchar(idx, String(value));
67
+ }
68
+ }
69
+ }
70
+ async function runDirect(sql, ...params) {
71
+ if (params.length === 0) {
72
+ await con.run(sql);
73
+ return;
74
+ }
75
+ const prepared = await con.prepare(sql);
76
+ bindParams(prepared, params);
77
+ await prepared.run();
78
+ }
79
+ async function run(sql, ...params) {
80
+ return enqueue('write', () => runDirect(sql, ...params));
81
+ }
82
+ export async function runBatch(operations) {
83
+ return enqueue('write', async () => {
84
+ await con.run('BEGIN TRANSACTION');
85
+ for (const op of operations) {
86
+ try {
87
+ if (op.params.length === 0) {
88
+ await con.run(op.sql);
89
+ }
90
+ else {
91
+ const prepared = await con.prepare(op.sql);
92
+ bindParams(prepared, op.params);
93
+ await prepared.run();
94
+ }
95
+ }
96
+ catch {
97
+ // Skip bad records, continue with rest of batch
98
+ }
99
+ }
100
+ await con.run('COMMIT');
101
+ });
102
+ }
103
+ async function allDirect(sql, ...params) {
104
+ if (params.length === 0) {
105
+ const reader = await readCon.runAndReadAll(sql);
106
+ return reader.getRowObjects();
107
+ }
108
+ const prepared = await readCon.prepare(sql);
109
+ bindParams(prepared, params);
110
+ const reader = await prepared.runAndReadAll();
111
+ return reader.getRowObjects();
112
+ }
113
+ async function all(sql, ...params) {
114
+ return enqueue('read', () => allDirect(sql, ...params));
115
+ }
116
+ export async function initDatabase(dbPath, tableSchemas, ddlStatements) {
117
+ instance = await DuckDBInstance.create(dbPath === ':memory:' ? undefined : dbPath);
118
+ con = await instance.connect();
119
+ readCon = await instance.connect();
120
+ for (const schema of tableSchemas) {
121
+ schemas.set(schema.collection, schema);
122
+ }
123
+ for (const ddl of ddlStatements) {
124
+ for (const statement of ddl.split(';').filter((s) => s.trim())) {
125
+ await run(statement);
126
+ }
127
+ }
128
+ // Internal tables for backfill state
129
+ await run(`CREATE TABLE IF NOT EXISTS _repos (
130
+ did TEXT PRIMARY KEY,
131
+ status TEXT NOT NULL DEFAULT 'pending',
132
+ handle TEXT,
133
+ backfilled_at TIMESTAMP,
134
+ rev TEXT,
135
+ retry_count INTEGER NOT NULL DEFAULT 0,
136
+ retry_after INTEGER NOT NULL DEFAULT 0
137
+ )`);
138
+ // Migration: add handle column to existing _repos tables
139
+ try {
140
+ await run(`ALTER TABLE _repos ADD COLUMN handle TEXT`);
141
+ }
142
+ catch { }
143
+ // Re-queue repos with missing handles so backfill populates them
144
+ await run(`UPDATE _repos SET status = 'pending' WHERE handle IS NULL`);
145
+ await run(`CREATE TABLE IF NOT EXISTS _cursor (
146
+ key TEXT PRIMARY KEY,
147
+ value TEXT NOT NULL
148
+ )`);
149
+ // Labels table (atproto-compatible)
150
+ await run(`CREATE SEQUENCE IF NOT EXISTS _labels_seq START 1`);
151
+ await run(`CREATE TABLE IF NOT EXISTS _labels (
152
+ id INTEGER PRIMARY KEY DEFAULT nextval('_labels_seq'),
153
+ src TEXT NOT NULL,
154
+ uri TEXT NOT NULL,
155
+ val TEXT NOT NULL,
156
+ neg BOOLEAN DEFAULT FALSE,
157
+ cts TIMESTAMP NOT NULL,
158
+ exp TIMESTAMP
159
+ )`);
160
+ await run(`CREATE INDEX IF NOT EXISTS idx_labels_uri ON _labels(uri)`);
161
+ await run(`CREATE INDEX IF NOT EXISTS idx_labels_src ON _labels(src)`);
162
+ // Preferences table (generic key-value per user)
163
+ await run(`CREATE TABLE IF NOT EXISTS _preferences (
164
+ did TEXT NOT NULL,
165
+ key TEXT NOT NULL,
166
+ value JSON NOT NULL,
167
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
168
+ PRIMARY KEY (did, key)
169
+ )`);
170
+ // OAuth tables
171
+ for (const statement of OAUTH_DDL.split(';').filter((s) => s.trim())) {
172
+ await run(statement);
173
+ }
174
+ }
175
+ export async function getCursor(key) {
176
+ const rows = await all(`SELECT value FROM _cursor WHERE key = $1`, key);
177
+ return rows[0]?.value || null;
178
+ }
179
+ export async function setCursor(key, value) {
180
+ await run(`INSERT OR REPLACE INTO _cursor (key, value) VALUES ($1, $2)`, key, value);
181
+ }
182
+ export async function getRepoStatus(did) {
183
+ const rows = await all(`SELECT status FROM _repos WHERE did = $1`, did);
184
+ return rows[0]?.status || null;
185
+ }
186
+ export async function setRepoStatus(did, status, rev, opts) {
187
+ if (status === 'active') {
188
+ // Update existing row preserving handle if not provided
189
+ await run(`UPDATE _repos SET status = $1, handle = COALESCE($2, handle), backfilled_at = $3, rev = COALESCE($4, rev), retry_count = 0, retry_after = 0 WHERE did = $5`, status, opts?.handle || null, new Date().toISOString(), rev || null, did);
190
+ // Insert if row didn't exist yet
191
+ await run(`INSERT OR IGNORE INTO _repos (did, status, handle, backfilled_at, rev, retry_count, retry_after) VALUES ($1, $2, $3, $4, $5, 0, 0)`, did, status, opts?.handle || null, new Date().toISOString(), rev || null);
192
+ }
193
+ else if (status === 'failed' && opts) {
194
+ await run(`UPDATE _repos SET status = $1, retry_count = $2, retry_after = $3, handle = COALESCE($4, handle) WHERE did = $5`, status, opts.retryCount ?? 0, opts.retryAfter ?? 0, opts.handle || null, did);
195
+ // If row didn't exist yet, insert it
196
+ await run(`INSERT OR IGNORE INTO _repos (did, status, handle, retry_count, retry_after) VALUES ($1, $2, $3, $4, $5)`, did, status, opts.handle || null, opts.retryCount ?? 0, opts.retryAfter ?? 0);
197
+ }
198
+ else {
199
+ await run(`UPDATE _repos SET status = $1 WHERE did = $2`, status, did);
200
+ await run(`INSERT OR IGNORE INTO _repos (did, status) VALUES ($1, $2)`, did, status);
201
+ }
202
+ }
203
+ export async function getRepoRetryInfo(did) {
204
+ const rows = await all(`SELECT retry_count, retry_after FROM _repos WHERE did = $1`, did);
205
+ if (rows.length === 0)
206
+ return null;
207
+ return { retryCount: Number(rows[0].retry_count), retryAfter: Number(rows[0].retry_after) };
208
+ }
209
+ export async function listRetryEligibleRepos(maxRetries) {
210
+ const now = Math.floor(Date.now() / 1000);
211
+ const rows = await all(`SELECT did FROM _repos WHERE status = 'failed' AND retry_after <= $1 AND retry_count < $2`, now, maxRetries);
212
+ return rows.map((r) => r.did);
213
+ }
214
+ export async function listPendingRepos() {
215
+ const rows = await all(`SELECT did FROM _repos WHERE status = 'pending'`);
216
+ return rows.map((r) => r.did);
217
+ }
218
+ export async function listAllRepoStatuses() {
219
+ return (await all(`SELECT did, status FROM _repos`));
220
+ }
221
+ export async function listReposPaginated(opts = {}) {
222
+ const { limit = 50, offset = 0, status, q } = opts;
223
+ const conditions = [];
224
+ const params = [];
225
+ let paramIdx = 1;
226
+ if (status) {
227
+ conditions.push(`status = $${paramIdx++}`);
228
+ params.push(status);
229
+ }
230
+ if (q) {
231
+ conditions.push(`(did ILIKE $${paramIdx} OR handle ILIKE $${paramIdx})`);
232
+ params.push(`%${q}%`);
233
+ paramIdx++;
234
+ }
235
+ const where = conditions.length ? ' WHERE ' + conditions.join(' AND ') : '';
236
+ const countRows = await all(`SELECT COUNT(*)::INTEGER as total FROM _repos${where}`, ...params);
237
+ const total = Number(countRows[0]?.total || 0);
238
+ const rows = await all(`SELECT did, handle, status, backfilled_at, rev FROM _repos${where} ORDER BY backfilled_at DESC NULLS LAST, did LIMIT $${paramIdx++} OFFSET $${paramIdx++}`, ...params, limit, offset);
239
+ return { repos: rows, total };
240
+ }
241
+ export async function getCollectionCounts() {
242
+ const counts = {};
243
+ for (const [collection, schema] of schemas) {
244
+ const rows = await all(`SELECT COUNT(*)::INTEGER as count FROM ${schema.tableName}`);
245
+ counts[collection] = Number(rows[0]?.count || 0);
246
+ }
247
+ return counts;
248
+ }
249
+ export async function getSchemaDump() {
250
+ const rows = await all(`SELECT sql FROM duckdb_tables() ORDER BY table_name`);
251
+ return rows.map((r) => r.sql + ';').join('\n\n');
252
+ }
253
+ export function buildInsertOp(collection, uri, cid, authorDid, record) {
254
+ const schema = schemas.get(collection);
255
+ if (!schema)
256
+ throw new Error(`Unknown collection: ${collection}`);
257
+ const colNames = ['uri', 'cid', 'did', 'indexed_at'];
258
+ const placeholders = ['$1', '$2', '$3', '$4'];
259
+ const values = [uri, cid, authorDid, new Date().toISOString()];
260
+ let paramIdx = 5;
261
+ for (const col of schema.columns) {
262
+ let rawValue = record[col.originalName];
263
+ // Handle strongRef expansion: subject_uri reads record.subject.uri, subject__cid reads record.subject.cid
264
+ if (rawValue && typeof rawValue === 'object' && col.name.endsWith('_uri') && col.isRef) {
265
+ rawValue = rawValue.uri;
266
+ }
267
+ else if (col.originalName.endsWith('__cid') && record[col.originalName.replace('__cid', '')]) {
268
+ rawValue = record[col.originalName.replace('__cid', '')].cid;
269
+ }
270
+ colNames.push(col.name);
271
+ placeholders.push(`$${paramIdx++}`);
272
+ if (rawValue === undefined || rawValue === null) {
273
+ values.push(null);
274
+ }
275
+ else if (col.duckdbType === 'JSON') {
276
+ values.push(JSON.stringify(rawValue));
277
+ }
278
+ else {
279
+ values.push(rawValue);
280
+ }
281
+ }
282
+ const sql = `INSERT OR REPLACE INTO ${schema.tableName} (${colNames.join(', ')}) VALUES (${placeholders.join(', ')})`;
283
+ return { sql, params: values };
284
+ }
285
+ export async function insertRecord(collection, uri, cid, authorDid, record) {
286
+ const schema = schemas.get(collection);
287
+ if (!schema)
288
+ throw new Error(`Unknown collection: ${collection}`);
289
+ const { sql, params } = buildInsertOp(collection, uri, cid, authorDid, record);
290
+ await run(sql, ...params);
291
+ // Insert child table rows
292
+ for (const child of schema.children) {
293
+ const items = record[child.fieldName];
294
+ if (!Array.isArray(items))
295
+ continue;
296
+ // Delete existing child rows (handles INSERT OR REPLACE on main table)
297
+ await run(`DELETE FROM ${child.tableName} WHERE parent_uri = $1`, uri);
298
+ for (const item of items) {
299
+ const colNames = ['parent_uri', 'parent_did'];
300
+ const placeholders = ['$1', '$2'];
301
+ const values = [uri, authorDid];
302
+ let idx = 3;
303
+ for (const col of child.columns) {
304
+ colNames.push(col.name);
305
+ placeholders.push(`$${idx++}`);
306
+ const raw = item[col.originalName];
307
+ if (raw === undefined || raw === null) {
308
+ values.push(null);
309
+ }
310
+ else if (col.duckdbType === 'JSON') {
311
+ values.push(JSON.stringify(raw));
312
+ }
313
+ else {
314
+ values.push(raw);
315
+ }
316
+ }
317
+ await run(`INSERT INTO ${child.tableName} (${colNames.join(', ')}) VALUES (${placeholders.join(', ')})`, ...values);
318
+ }
319
+ }
320
+ // Insert union branch rows
321
+ for (const union of schema.unions) {
322
+ const unionValue = record[union.fieldName];
323
+ if (!unionValue || !unionValue.$type)
324
+ continue;
325
+ const branch = union.branches.find((b) => b.type === unionValue.$type);
326
+ if (!branch)
327
+ continue;
328
+ // Delete existing branch rows (handles INSERT OR REPLACE)
329
+ for (const b of union.branches) {
330
+ await run(`DELETE FROM ${b.tableName} WHERE parent_uri = $1`, uri);
331
+ }
332
+ if (branch.isArray && branch.arrayField) {
333
+ // Array branch (e.g., embed.images) — insert one row per array item
334
+ const items = unionValue[branch.arrayField];
335
+ if (!Array.isArray(items))
336
+ continue;
337
+ for (const item of items) {
338
+ const colNames = ['parent_uri', 'parent_did'];
339
+ const placeholders = ['$1', '$2'];
340
+ const values = [uri, authorDid];
341
+ let idx = 3;
342
+ for (const col of branch.columns) {
343
+ colNames.push(col.name);
344
+ placeholders.push(`$${idx++}`);
345
+ const raw = item[col.originalName];
346
+ if (raw === undefined || raw === null) {
347
+ values.push(null);
348
+ }
349
+ else if (col.duckdbType === 'JSON') {
350
+ values.push(JSON.stringify(raw));
351
+ }
352
+ else {
353
+ values.push(raw);
354
+ }
355
+ }
356
+ await run(`INSERT INTO ${branch.tableName} (${colNames.join(', ')}) VALUES (${placeholders.join(', ')})`, ...values);
357
+ }
358
+ }
359
+ else {
360
+ // Single-value branch — extract data from wrapper or direct properties
361
+ const branchData = resolveBranchData(unionValue, branch);
362
+ const colNames = ['parent_uri', 'parent_did'];
363
+ const placeholders = ['$1', '$2'];
364
+ const values = [uri, authorDid];
365
+ let idx = 3;
366
+ for (const col of branch.columns) {
367
+ colNames.push(col.name);
368
+ placeholders.push(`$${idx++}`);
369
+ const raw = branchData[col.originalName];
370
+ if (raw === undefined || raw === null) {
371
+ values.push(null);
372
+ }
373
+ else if (col.duckdbType === 'JSON') {
374
+ values.push(JSON.stringify(raw));
375
+ }
376
+ else {
377
+ values.push(raw);
378
+ }
379
+ }
380
+ await run(`INSERT INTO ${branch.tableName} (${colNames.join(', ')}) VALUES (${placeholders.join(', ')})`, ...values);
381
+ }
382
+ }
383
+ }
384
+ /** Extract branch data from a union value, handling wrapper properties */
385
+ function resolveBranchData(unionValue, branch) {
386
+ if (branch.wrapperField) {
387
+ const wrapper = unionValue[branch.wrapperField];
388
+ if (wrapper && typeof wrapper === 'object')
389
+ return wrapper;
390
+ }
391
+ return unionValue;
392
+ }
393
+ export async function deleteRecord(collection, uri) {
394
+ const schema = schemas.get(collection);
395
+ if (!schema)
396
+ return;
397
+ for (const child of schema.children) {
398
+ await run(`DELETE FROM ${child.tableName} WHERE parent_uri = $1`, uri);
399
+ }
400
+ for (const union of schema.unions) {
401
+ for (const branch of union.branches) {
402
+ await run(`DELETE FROM ${branch.tableName} WHERE parent_uri = $1`, uri);
403
+ }
404
+ }
405
+ await run(`DELETE FROM ${schema.tableName} WHERE uri = $1`, uri);
406
+ }
407
+ export async function insertLabels(labels) {
408
+ if (labels.length === 0)
409
+ return;
410
+ for (const label of labels) {
411
+ // Skip if an active (non-negated, non-expired, not-superseded-by-negation) label already exists
412
+ const existing = await all(`SELECT 1 FROM _labels l1 WHERE l1.src = $1 AND l1.uri = $2 AND l1.val = $3 AND l1.neg = false AND (l1.exp IS NULL OR l1.exp > CURRENT_TIMESTAMP) AND NOT EXISTS (SELECT 1 FROM _labels l2 WHERE l2.uri = l1.uri AND l2.val = l1.val AND l2.neg = true AND l2.id > l1.id) LIMIT 1`, label.src, label.uri, label.val);
413
+ if (!label.neg && existing.length > 0)
414
+ continue;
415
+ await run(`INSERT INTO _labels (src, uri, val, neg, cts, exp) VALUES ($1, $2, $3, $4, $5, $6)`, label.src, label.uri, label.val, label.neg || false, label.cts || new Date().toISOString(), label.exp || null);
416
+ }
417
+ }
418
+ export async function queryLabelsForUris(uris) {
419
+ if (uris.length === 0)
420
+ return new Map();
421
+ const placeholders = uris.map((_, i) => `$${i + 1}`).join(',');
422
+ const rows = await all(`SELECT src, uri, val, neg, cts, exp FROM _labels l1 WHERE uri IN (${placeholders}) AND (exp IS NULL OR exp > CURRENT_TIMESTAMP) AND neg = false AND NOT EXISTS (SELECT 1 FROM _labels l2 WHERE l2.uri = l1.uri AND l2.val = l1.val AND l2.neg = true AND l2.id > l1.id)`, ...uris);
423
+ const result = new Map();
424
+ for (const row of rows) {
425
+ const key = row.uri;
426
+ if (!result.has(key))
427
+ result.set(key, []);
428
+ result.get(key).push({
429
+ src: row.src,
430
+ uri: row.uri,
431
+ val: row.val,
432
+ neg: row.neg,
433
+ cts: normalizeValue(row.cts),
434
+ exp: row.exp ? String(row.exp) : null,
435
+ });
436
+ }
437
+ return result;
438
+ }
439
+ export async function bulkInsertRecords(records) {
440
+ if (records.length === 0)
441
+ return 0;
442
+ // Group records by collection
443
+ const byCollection = new Map();
444
+ for (const rec of records) {
445
+ const list = byCollection.get(rec.collection) || [];
446
+ list.push(rec);
447
+ byCollection.set(rec.collection, list);
448
+ }
449
+ let inserted = 0;
450
+ for (const [collection, recs] of byCollection) {
451
+ const schema = schemas.get(collection);
452
+ if (!schema)
453
+ continue;
454
+ const stagingTable = `_staging_${collection.replace(/\./g, '_')}`;
455
+ const allCols = ['uri', 'cid', 'did', 'indexed_at', ...schema.columns.map((c) => c.name)];
456
+ const colDefs = [
457
+ 'uri TEXT',
458
+ 'cid TEXT',
459
+ 'did TEXT',
460
+ 'indexed_at TEXT',
461
+ ...schema.columns.map((c) => `${c.name} ${c.duckdbType === 'TIMESTAMP' ? 'TEXT' : c.duckdbType}`),
462
+ ];
463
+ // Create staging table + appender + merge all in one write queue slot
464
+ await enqueue('write', async () => {
465
+ await con.run(`DROP TABLE IF EXISTS ${stagingTable}`);
466
+ await con.run(`CREATE TABLE ${stagingTable} (${colDefs.join(', ')})`);
467
+ const appender = await con.createAppender(stagingTable);
468
+ const now = new Date().toISOString();
469
+ for (const rec of recs) {
470
+ try {
471
+ appender.appendVarchar(rec.uri);
472
+ appender.appendVarchar(rec.cid);
473
+ appender.appendVarchar(rec.did);
474
+ appender.appendVarchar(now);
475
+ for (const col of schema.columns) {
476
+ let rawValue = rec.record[col.originalName];
477
+ if (rawValue && typeof rawValue === 'object' && col.name.endsWith('_uri') && col.isRef) {
478
+ rawValue = rawValue.uri;
479
+ }
480
+ else if (col.originalName.endsWith('__cid') && rec.record[col.originalName.replace('__cid', '')]) {
481
+ rawValue = rec.record[col.originalName.replace('__cid', '')].cid;
482
+ }
483
+ if (rawValue === undefined || rawValue === null) {
484
+ appender.appendNull();
485
+ }
486
+ else if (col.duckdbType === 'JSON') {
487
+ appender.appendVarchar(JSON.stringify(rawValue));
488
+ }
489
+ else if (col.duckdbType === 'INTEGER') {
490
+ appender.appendInteger(typeof rawValue === 'number' ? rawValue : parseInt(rawValue));
491
+ }
492
+ else if (col.duckdbType === 'BOOLEAN') {
493
+ appender.appendBoolean(!!rawValue);
494
+ }
495
+ else {
496
+ appender.appendVarchar(String(rawValue));
497
+ }
498
+ }
499
+ appender.endRow();
500
+ inserted++;
501
+ }
502
+ catch {
503
+ // Skip bad records
504
+ }
505
+ }
506
+ appender.flushSync();
507
+ appender.closeSync();
508
+ // Merge into target with TRY_CAST for TIMESTAMP columns, filtering rows that would violate NOT NULL
509
+ const selectCols = allCols.map((name) => {
510
+ const col = schema.columns.find((c) => c.name === name);
511
+ if (name === 'indexed_at' || (col && col.duckdbType === 'TIMESTAMP')) {
512
+ return `TRY_CAST(${name} AS TIMESTAMP) AS ${name}`;
513
+ }
514
+ return name;
515
+ });
516
+ // Build WHERE clause to exclude rows missing NOT NULL fields
517
+ const notNullChecks = ['uri IS NOT NULL', 'did IS NOT NULL'];
518
+ for (const col of schema.columns) {
519
+ if (col.notNull) {
520
+ if (col.duckdbType === 'TIMESTAMP') {
521
+ notNullChecks.push(`TRY_CAST(${col.name} AS TIMESTAMP) IS NOT NULL`);
522
+ }
523
+ else {
524
+ notNullChecks.push(`${col.name} IS NOT NULL`);
525
+ }
526
+ }
527
+ }
528
+ const whereClause = notNullChecks.length ? ` WHERE ${notNullChecks.join(' AND ')}` : '';
529
+ await con.run(`INSERT OR REPLACE INTO ${schema.tableName} (${allCols.join(', ')}) SELECT ${selectCols.join(', ')} FROM ${stagingTable}${whereClause}`);
530
+ await con.run(`DROP TABLE ${stagingTable}`);
531
+ // Populate child tables
532
+ for (const child of schema.children) {
533
+ const childStagingTable = `_staging_${collection.replace(/\./g, '_')}__${child.fieldName}`;
534
+ const childColDefs = [
535
+ 'parent_uri TEXT',
536
+ 'parent_did TEXT',
537
+ ...child.columns.map((c) => `${c.name} ${c.duckdbType === 'TIMESTAMP' ? 'TEXT' : c.duckdbType}`),
538
+ ];
539
+ const childAllCols = ['parent_uri', 'parent_did', ...child.columns.map((c) => c.name)];
540
+ await con.run(`DROP TABLE IF EXISTS ${childStagingTable}`);
541
+ await con.run(`CREATE TABLE ${childStagingTable} (${childColDefs.join(', ')})`);
542
+ const childAppender = await con.createAppender(childStagingTable);
543
+ for (const rec of recs) {
544
+ const items = rec.record[child.fieldName];
545
+ if (!Array.isArray(items))
546
+ continue;
547
+ for (const item of items) {
548
+ try {
549
+ childAppender.appendVarchar(rec.uri);
550
+ childAppender.appendVarchar(rec.did);
551
+ for (const col of child.columns) {
552
+ const rawValue = item[col.originalName];
553
+ if (rawValue === undefined || rawValue === null) {
554
+ childAppender.appendNull();
555
+ }
556
+ else if (col.duckdbType === 'JSON') {
557
+ childAppender.appendVarchar(JSON.stringify(rawValue));
558
+ }
559
+ else if (col.duckdbType === 'INTEGER') {
560
+ childAppender.appendInteger(typeof rawValue === 'number' ? rawValue : parseInt(rawValue));
561
+ }
562
+ else if (col.duckdbType === 'BOOLEAN') {
563
+ childAppender.appendBoolean(!!rawValue);
564
+ }
565
+ else {
566
+ childAppender.appendVarchar(String(rawValue));
567
+ }
568
+ }
569
+ childAppender.endRow();
570
+ }
571
+ catch {
572
+ // Skip bad items
573
+ }
574
+ }
575
+ }
576
+ childAppender.flushSync();
577
+ childAppender.closeSync();
578
+ // Delete existing child rows for these URIs, then merge staging
579
+ const uriPlaceholders = recs.map((_, i) => `$${i + 1}`).join(',');
580
+ const delStmt = await con.prepare(`DELETE FROM ${child.tableName} WHERE parent_uri IN (${uriPlaceholders})`);
581
+ bindParams(delStmt, recs.map((r) => r.uri));
582
+ await delStmt.run();
583
+ const childSelectCols = childAllCols.map((name) => {
584
+ const col = child.columns.find((c) => c.name === name);
585
+ if (col && col.duckdbType === 'TIMESTAMP')
586
+ return `TRY_CAST(${name} AS TIMESTAMP) AS ${name}`;
587
+ return name;
588
+ });
589
+ await con.run(`INSERT INTO ${child.tableName} (${childAllCols.join(', ')}) SELECT ${childSelectCols.join(', ')} FROM ${childStagingTable} WHERE parent_uri IS NOT NULL`);
590
+ await con.run(`DROP TABLE ${childStagingTable}`);
591
+ }
592
+ // Populate union branch tables
593
+ for (const union of schema.unions) {
594
+ for (const branch of union.branches) {
595
+ const branchStagingTable = `_staging_${collection.replace(/\./g, '_')}__${toSnakeCase(union.fieldName)}_${branch.branchName}`;
596
+ const branchColDefs = [
597
+ 'parent_uri TEXT',
598
+ 'parent_did TEXT',
599
+ ...branch.columns.map((c) => `${c.name} ${c.duckdbType === 'TIMESTAMP' ? 'TEXT' : c.duckdbType}`),
600
+ ];
601
+ const branchAllCols = ['parent_uri', 'parent_did', ...branch.columns.map((c) => c.name)];
602
+ await con.run(`DROP TABLE IF EXISTS ${branchStagingTable}`);
603
+ await con.run(`CREATE TABLE ${branchStagingTable} (${branchColDefs.join(', ')})`);
604
+ const branchAppender = await con.createAppender(branchStagingTable);
605
+ for (const rec of recs) {
606
+ const unionValue = rec.record[union.fieldName];
607
+ if (!unionValue || typeof unionValue !== 'object')
608
+ continue;
609
+ if (unionValue.$type !== branch.type)
610
+ continue;
611
+ if (branch.isArray && branch.arrayField) {
612
+ const items = resolveBranchData(unionValue, branch)[branch.arrayField];
613
+ if (!Array.isArray(items))
614
+ continue;
615
+ for (const item of items) {
616
+ try {
617
+ branchAppender.appendVarchar(rec.uri);
618
+ branchAppender.appendVarchar(rec.did);
619
+ for (const col of branch.columns) {
620
+ const rawValue = item[col.originalName];
621
+ if (rawValue === undefined || rawValue === null) {
622
+ branchAppender.appendNull();
623
+ }
624
+ else if (col.duckdbType === 'JSON') {
625
+ branchAppender.appendVarchar(JSON.stringify(rawValue));
626
+ }
627
+ else if (col.duckdbType === 'INTEGER') {
628
+ branchAppender.appendInteger(typeof rawValue === 'number' ? rawValue : parseInt(rawValue));
629
+ }
630
+ else if (col.duckdbType === 'BOOLEAN') {
631
+ branchAppender.appendBoolean(!!rawValue);
632
+ }
633
+ else {
634
+ branchAppender.appendVarchar(String(rawValue));
635
+ }
636
+ }
637
+ branchAppender.endRow();
638
+ }
639
+ catch {
640
+ // Skip bad items
641
+ }
642
+ }
643
+ }
644
+ else {
645
+ try {
646
+ const branchData = resolveBranchData(unionValue, branch);
647
+ branchAppender.appendVarchar(rec.uri);
648
+ branchAppender.appendVarchar(rec.did);
649
+ for (const col of branch.columns) {
650
+ const rawValue = branchData[col.originalName];
651
+ if (rawValue === undefined || rawValue === null) {
652
+ branchAppender.appendNull();
653
+ }
654
+ else if (col.duckdbType === 'JSON') {
655
+ branchAppender.appendVarchar(JSON.stringify(rawValue));
656
+ }
657
+ else if (col.duckdbType === 'INTEGER') {
658
+ branchAppender.appendInteger(typeof rawValue === 'number' ? rawValue : parseInt(rawValue));
659
+ }
660
+ else if (col.duckdbType === 'BOOLEAN') {
661
+ branchAppender.appendBoolean(!!rawValue);
662
+ }
663
+ else {
664
+ branchAppender.appendVarchar(String(rawValue));
665
+ }
666
+ }
667
+ branchAppender.endRow();
668
+ }
669
+ catch {
670
+ // Skip bad records
671
+ }
672
+ }
673
+ }
674
+ branchAppender.flushSync();
675
+ branchAppender.closeSync();
676
+ // Delete existing branch rows for these URIs, then merge staging
677
+ const uriPlaceholders = recs.map((_, i) => `$${i + 1}`).join(',');
678
+ const delStmt = await con.prepare(`DELETE FROM ${branch.tableName} WHERE parent_uri IN (${uriPlaceholders})`);
679
+ bindParams(delStmt, recs.map((r) => r.uri));
680
+ await delStmt.run();
681
+ const branchSelectCols = branchAllCols.map((name) => {
682
+ const col = branch.columns.find((c) => c.name === name);
683
+ if (col && col.duckdbType === 'TIMESTAMP')
684
+ return `TRY_CAST(${name} AS TIMESTAMP) AS ${name}`;
685
+ return name;
686
+ });
687
+ await con.run(`INSERT INTO ${branch.tableName} (${branchAllCols.join(', ')}) SELECT ${branchSelectCols.join(', ')} FROM ${branchStagingTable} WHERE parent_uri IS NOT NULL`);
688
+ await con.run(`DROP TABLE ${branchStagingTable}`);
689
+ }
690
+ }
691
+ });
692
+ }
693
+ return inserted;
694
+ }
695
+ export async function queryRecords(collection, opts = {}) {
696
+ const schema = schemas.get(collection);
697
+ if (!schema)
698
+ throw new Error(`Unknown collection: ${collection}`);
699
+ const { limit = 20, cursor, filters, sort = 'indexed_at', order = 'desc' } = opts;
700
+ // Validate sort field exists
701
+ const sortCol = sort === 'indexed_at' ? 'indexed_at' : schema.columns.find((c) => c.originalName === sort || c.name === sort);
702
+ const sortName = typeof sortCol === 'string' ? sortCol : sortCol?.name;
703
+ if (!sortName)
704
+ throw new Error(`Invalid sort field: ${sort}`);
705
+ const conditions = [];
706
+ const params = [];
707
+ let paramIdx = 1;
708
+ // Cursor pagination — compound keyset (sortCol, cid)
709
+ if (cursor) {
710
+ const parsed = unpackCursor(cursor);
711
+ if (parsed) {
712
+ const op = order === 'desc' ? '<' : '>';
713
+ const pSort1 = `$${paramIdx++}`;
714
+ const pSort2 = `$${paramIdx++}`;
715
+ const pCid = `$${paramIdx++}`;
716
+ conditions.push(`(t.${sortName} ${op} ${pSort1} OR (t.${sortName} = ${pSort2} AND t.cid ${op} ${pCid}))`);
717
+ params.push(parsed.primary, parsed.primary, parsed.cid);
718
+ }
719
+ }
720
+ // Field filters — validate each against schema
721
+ if (filters) {
722
+ const validColumns = new Set(schema.columns.map((c) => c.name));
723
+ validColumns.add('did');
724
+ for (const [key, value] of Object.entries(filters)) {
725
+ const colName = toSnakeCase(key);
726
+ if (!validColumns.has(colName))
727
+ continue; // silently skip invalid filters
728
+ conditions.push(`t.${colName} = $${paramIdx++}`);
729
+ params.push(value);
730
+ }
731
+ }
732
+ let sql = `SELECT t.*, r.handle FROM ${schema.tableName} t LEFT JOIN _repos r ON t.did = r.did WHERE (r.status IS NULL OR r.status != 'takendown')`;
733
+ if (conditions.length)
734
+ sql += ' AND ' + conditions.join(' AND ');
735
+ sql += ` ORDER BY t.${sortName} ${order.toUpperCase()}, t.cid ${order.toUpperCase()} LIMIT $${paramIdx++}`;
736
+ params.push(limit + 1); // fetch one extra for cursor
737
+ const rows = await all(sql, ...params);
738
+ const hasMore = rows.length > limit;
739
+ if (hasMore)
740
+ rows.pop();
741
+ // Attach child data if this collection has decomposed arrays
742
+ if (schema.children.length > 0 && rows.length > 0) {
743
+ const uris = rows.map((r) => r.uri);
744
+ const childData = new Map();
745
+ for (const child of schema.children) {
746
+ const childRows = await getChildRows(child.tableName, uris);
747
+ childData.set(child.fieldName, childRows);
748
+ }
749
+ for (const row of rows) {
750
+ ;
751
+ row.__childData = childData;
752
+ }
753
+ }
754
+ // Attach union branch data
755
+ if (schema.unions.length > 0 && rows.length > 0) {
756
+ const uris = rows.map((r) => r.uri);
757
+ const unionData = new Map();
758
+ for (const union of schema.unions) {
759
+ const branchData = new Map();
760
+ for (const branch of union.branches) {
761
+ const branchRows = await getChildRows(branch.tableName, uris);
762
+ branchData.set(branch.branchName, branchRows);
763
+ }
764
+ unionData.set(union.fieldName, branchData);
765
+ }
766
+ for (const row of rows) {
767
+ ;
768
+ row.__unionData = unionData;
769
+ }
770
+ }
771
+ const lastRow = rows[rows.length - 1];
772
+ const nextCursor = hasMore && lastRow ? packCursor(lastRow[sortName], lastRow.cid) : undefined;
773
+ return { records: rows, cursor: nextCursor };
774
+ }
775
+ export async function getRecordByUri(uri) {
776
+ for (const [_collection, schema] of schemas) {
777
+ const rows = await all(`SELECT t.*, r.handle FROM ${schema.tableName} t LEFT JOIN _repos r ON t.did = r.did WHERE t.uri = $1 AND (r.status IS NULL OR r.status != 'takendown')`, uri);
778
+ if (rows.length > 0) {
779
+ const row = rows[0];
780
+ if (schema.children.length > 0) {
781
+ const childData = new Map();
782
+ for (const child of schema.children) {
783
+ const childRows = await getChildRows(child.tableName, [uri]);
784
+ childData.set(child.fieldName, childRows);
785
+ }
786
+ ;
787
+ row.__childData = childData;
788
+ }
789
+ if (schema.unions.length > 0) {
790
+ const unionData = new Map();
791
+ for (const union of schema.unions) {
792
+ const branchData = new Map();
793
+ for (const branch of union.branches) {
794
+ const branchRows = await getChildRows(branch.tableName, [uri]);
795
+ branchData.set(branch.branchName, branchRows);
796
+ }
797
+ unionData.set(union.fieldName, branchData);
798
+ }
799
+ ;
800
+ row.__unionData = unionData;
801
+ }
802
+ return row;
803
+ }
804
+ }
805
+ return null;
806
+ }
807
+ export async function getRecordsByUris(collection, uris) {
808
+ if (uris.length === 0)
809
+ return [];
810
+ const schema = schemas.get(collection);
811
+ if (!schema)
812
+ return [];
813
+ const placeholders = uris.map((_, i) => `$${i + 1}`).join(',');
814
+ const rows = await all(`SELECT t.*, r.handle FROM ${schema.tableName} t LEFT JOIN _repos r ON t.did = r.did WHERE t.uri IN (${placeholders}) AND (r.status IS NULL OR r.status != 'takendown')`, ...uris);
815
+ // Batch-fetch child rows for all URIs
816
+ const childData = new Map();
817
+ for (const child of schema.children) {
818
+ const childRows = await getChildRows(child.tableName, uris);
819
+ childData.set(child.fieldName, childRows);
820
+ }
821
+ // Batch-fetch union branch rows for all URIs
822
+ const unionData = new Map();
823
+ for (const union of schema.unions) {
824
+ const branchData = new Map();
825
+ for (const branch of union.branches) {
826
+ const branchRows = await getChildRows(branch.tableName, uris);
827
+ branchData.set(branch.branchName, branchRows);
828
+ }
829
+ unionData.set(union.fieldName, branchData);
830
+ }
831
+ // Attach child data to rows for reshapeRow
832
+ for (const row of rows) {
833
+ ;
834
+ row.__childData = childData;
835
+ if (unionData.size > 0)
836
+ row.__unionData = unionData;
837
+ }
838
+ // Preserve ordering
839
+ const byUri = new Map(rows.map((r) => [r.uri, r]));
840
+ return uris.map((u) => byUri.get(u)).filter(Boolean);
841
+ }
842
+ /**
843
+ * Multi-phase search across any collection's records.
844
+ *
845
+ * 1. **BM25** — Full-text search via DuckDB FTS. Multi-word queries use conjunctive
846
+ * mode (ALL terms required) to avoid spurious single-token matches.
847
+ * 2. **Exact substring** — ILIKE scan on all TEXT/JSON columns catches phrase matches
848
+ * that BM25 missed or ranked low (e.g. "bad bunny"). Results are prepended to BM25.
849
+ * 3. **Recent rows** — ILIKE scan of rows ingested since the last FTS rebuild, so newly
850
+ * written records are immediately searchable before the index catches up.
851
+ * 4. **Fuzzy** — Jaro-Winkler similarity fallback for typo tolerance when earlier phases
852
+ * return fewer than `limit` results.
853
+ *
854
+ * All phases derive searchable columns generically from the collection schema — no
855
+ * column names are hardcoded.
856
+ */
857
+ export async function searchRecords(collection, query, opts = {}) {
858
+ const schema = schemas.get(collection);
859
+ if (!schema)
860
+ throw new Error(`Unknown collection: ${collection}`);
861
+ const elapsed = timer();
862
+ const { limit = 20, cursor, fuzzy = true } = opts;
863
+ const textCols = schema.columns.filter((c) => c.duckdbType === 'TEXT');
864
+ // Also check if FTS has indexed any columns (including derived JSON columns)
865
+ const ftsSearchCols = getSearchColumns(collection);
866
+ if (textCols.length === 0 && ftsSearchCols.length === 0) {
867
+ throw new Error(`No searchable columns in ${collection}`);
868
+ }
869
+ // FTS shadow table name (dots replaced with underscores)
870
+ const safeName = '_fts_' + collection.replace(/\./g, '_');
871
+ const ftsSchema = `fts_main_${safeName}`;
872
+ const phaseErrors = [];
873
+ const phasesUsed = [];
874
+ // Phase 1: BM25 ranked search on FTS shadow table
875
+ let bm25Results = [];
876
+ try {
877
+ let paramIdx = 1;
878
+ const ftsQuery = stripStopWords(query);
879
+ const isMultiWord = ftsQuery.split(/\s+/).length > 1;
880
+ const conjunctiveFlag = isMultiWord ? ', conjunctive := 1' : '';
881
+ let sql = `SELECT m.*, ${ftsSchema}.match_bm25(s.uri, $${paramIdx++}${conjunctiveFlag}) AS score
882
+ FROM ${safeName} s
883
+ JOIN ${schema.tableName} m ON m.uri = s.uri
884
+ LEFT JOIN _repos r ON m.did = r.did
885
+ WHERE score IS NOT NULL
886
+ AND (r.status IS NULL OR r.status != 'takendown')`;
887
+ const params = [ftsQuery];
888
+ if (cursor) {
889
+ const parsed = unpackCursor(cursor);
890
+ if (parsed) {
891
+ const pScore1 = `$${paramIdx++}`;
892
+ const pScore2 = `$${paramIdx++}`;
893
+ const pCid = `$${paramIdx++}`;
894
+ sql += ` AND (score > ${pScore1} OR (score = ${pScore2} AND m.cid < ${pCid}))`;
895
+ params.push(parsed.primary, parsed.primary, parsed.cid);
896
+ }
897
+ }
898
+ sql += ` ORDER BY score, m.cid DESC LIMIT $${paramIdx++}`;
899
+ params.push(limit + 1);
900
+ bm25Results = await all(sql, ...params);
901
+ phasesUsed.push('bm25');
902
+ }
903
+ catch (err) {
904
+ phaseErrors.push(`bm25: ${err.message}`);
905
+ }
906
+ const bm25Count = bm25Results.length;
907
+ const hasMore = bm25Results.length > limit;
908
+ if (hasMore)
909
+ bm25Results.pop();
910
+ // Phase 2: Exact substring match — boosts phrase matches above BM25 results
911
+ const exactMatchResults = [];
912
+ const bm25Uris = new Set(bm25Results.map((r) => r.uri));
913
+ try {
914
+ const searchParam = `%${query}%`;
915
+ let paramIdx = 1;
916
+ const ilikeConds = [];
917
+ const params = [];
918
+ // TEXT columns — direct ILIKE
919
+ for (const c of textCols) {
920
+ ilikeConds.push(`t.${c.name} ILIKE $${paramIdx++}`);
921
+ params.push(searchParam);
922
+ }
923
+ // JSON columns — cast to text then ILIKE
924
+ const jsonCols = schema.columns.filter((c) => c.duckdbType === 'JSON');
925
+ for (const c of jsonCols) {
926
+ ilikeConds.push(`CAST(t.${c.name} AS TEXT) ILIKE $${paramIdx++}`);
927
+ params.push(searchParam);
928
+ }
929
+ // Handle from _repos table
930
+ ilikeConds.push(`r.handle ILIKE $${paramIdx++}`);
931
+ params.push(searchParam);
932
+ if (ilikeConds.length > 0) {
933
+ const exactSQL = `SELECT t.* FROM ${schema.tableName} t LEFT JOIN _repos r ON t.did = r.did
934
+ WHERE (${ilikeConds.join(' OR ')})
935
+ ORDER BY t.indexed_at DESC
936
+ LIMIT $${paramIdx++}`;
937
+ params.push(limit);
938
+ const rows = await all(exactSQL, ...params);
939
+ phasesUsed.push('exact');
940
+ for (const row of rows) {
941
+ if (!bm25Uris.has(row.uri)) {
942
+ exactMatchResults.push(row);
943
+ bm25Uris.add(row.uri);
944
+ }
945
+ }
946
+ }
947
+ }
948
+ catch (err) {
949
+ phaseErrors.push(`exact: ${err.message}`);
950
+ }
951
+ // Merge: exact matches first, then BM25 results, capped at limit
952
+ const mergedResults = [...exactMatchResults, ...bm25Results].slice(0, limit + (hasMore ? 1 : 0));
953
+ // Replace bm25Results with merged for downstream phases
954
+ bm25Results = mergedResults;
955
+ // Phase 3: ILIKE scan of rows written since last FTS rebuild (immediate searchability)
956
+ const existingUris = new Set(bm25Results.map((r) => r.uri));
957
+ const { getLastRebuiltAt } = await import("./fts.js");
958
+ const rebuiltAt = getLastRebuiltAt(collection);
959
+ let recentCount = 0;
960
+ if (rebuiltAt && bm25Results.length < limit) {
961
+ const remaining = limit - bm25Results.length;
962
+ const searchParam = `%${query}%`;
963
+ let paramIdx = 1;
964
+ const ilikeParts = textCols.map((c) => `t.${c.name} ILIKE $${paramIdx++}`);
965
+ ilikeParts.push(`r.handle ILIKE $${paramIdx++}`);
966
+ const ilikeConds = ilikeParts.join(' OR ');
967
+ const params = [...textCols.map(() => searchParam), searchParam];
968
+ const recentSQL = `SELECT t.* FROM ${schema.tableName} t LEFT JOIN _repos r ON t.did = r.did
969
+ WHERE t.indexed_at >= $${paramIdx++} AND t.uri NOT IN (SELECT uri FROM ${safeName}) AND (${ilikeConds})
970
+ ORDER BY t.indexed_at DESC
971
+ LIMIT $${paramIdx++}`;
972
+ params.push(rebuiltAt, remaining + existingUris.size);
973
+ try {
974
+ const recentRows = await all(recentSQL, ...params);
975
+ phasesUsed.push('recent');
976
+ for (const row of recentRows) {
977
+ if (bm25Results.length >= limit)
978
+ break;
979
+ if (!existingUris.has(row.uri)) {
980
+ existingUris.add(row.uri);
981
+ bm25Results.push(row);
982
+ recentCount++;
983
+ }
984
+ }
985
+ }
986
+ catch (err) {
987
+ phaseErrors.push(`recent: ${err.message}`);
988
+ }
989
+ }
990
+ // Phase 4: Fuzzy fallback for typo tolerance (if still under limit)
991
+ let fuzzyCount = 0;
992
+ if (fuzzy && bm25Results.length < limit) {
993
+ const remaining = limit - bm25Results.length;
994
+ const simExprs = [
995
+ ...textCols.map((c) => `jaro_winkler_similarity(lower(t.${c.name}), lower($1))`),
996
+ `jaro_winkler_similarity(lower(r.handle), lower($1))`,
997
+ ];
998
+ // Include child table TEXT columns via correlated subquery
999
+ for (const child of schema.children) {
1000
+ for (const col of child.columns) {
1001
+ if (col.duckdbType === 'TEXT') {
1002
+ simExprs.push(`COALESCE((SELECT MAX(jaro_winkler_similarity(lower(c.${col.name}), lower($1))) FROM ${child.tableName} c WHERE c.parent_uri = t.uri), 0)`);
1003
+ }
1004
+ }
1005
+ }
1006
+ const greatestExpr = `GREATEST(${simExprs.join(', ')})`;
1007
+ const fuzzySQL = `SELECT t.*, ${greatestExpr} AS fuzzy_score
1008
+ FROM ${schema.tableName} t LEFT JOIN _repos r ON t.did = r.did
1009
+ WHERE ${greatestExpr} >= 0.8
1010
+ ORDER BY fuzzy_score DESC
1011
+ LIMIT $2`;
1012
+ try {
1013
+ const fuzzyRows = await all(fuzzySQL, query, remaining + existingUris.size);
1014
+ phasesUsed.push('fuzzy');
1015
+ for (const row of fuzzyRows) {
1016
+ if (bm25Results.length >= limit)
1017
+ break;
1018
+ if (!existingUris.has(row.uri)) {
1019
+ bm25Results.push(row);
1020
+ fuzzyCount++;
1021
+ }
1022
+ }
1023
+ }
1024
+ catch (err) {
1025
+ phaseErrors.push(`fuzzy: ${err.message}`);
1026
+ }
1027
+ }
1028
+ // Remove score columns from results
1029
+ const records = bm25Results.map(({ score: _score, fuzzy_score: _fuzzy_score, ...rest }) => rest);
1030
+ const lastRow = bm25Results[bm25Results.length - 1];
1031
+ const nextCursor = hasMore && lastRow?.score != null ? packCursor(lastRow.score, lastRow.cid) : undefined;
1032
+ emit('search', 'query', {
1033
+ collection,
1034
+ query,
1035
+ bm25_count: bm25Count > limit ? bm25Count - 1 : bm25Count,
1036
+ exact_count: exactMatchResults.length,
1037
+ recent_count: recentCount,
1038
+ fuzzy_count: fuzzyCount,
1039
+ total_results: records.length,
1040
+ duration_ms: elapsed(),
1041
+ phases_used: phasesUsed.join(','),
1042
+ error: phaseErrors.length > 0 ? phaseErrors.join('; ') : undefined,
1043
+ });
1044
+ return { records, cursor: nextCursor };
1045
+ }
1046
+ // Raw SQL for script feeds
1047
+ export async function querySQL(sql, params = []) {
1048
+ return all(sql, ...params);
1049
+ }
1050
+ export async function runSQL(sql, ...params) {
1051
+ return run(sql, ...params);
1052
+ }
1053
+ export function getSchema(collection) {
1054
+ return schemas.get(collection);
1055
+ }
1056
+ export async function countByField(collection, field, value) {
1057
+ const schema = schemas.get(collection);
1058
+ if (!schema)
1059
+ return 0;
1060
+ const rows = await all(`SELECT COUNT(*) as count FROM ${schema.tableName} WHERE ${field} = $1`, value);
1061
+ return Number(rows[0]?.count || 0);
1062
+ }
1063
+ export async function countByFieldBatch(collection, field, values) {
1064
+ if (values.length === 0)
1065
+ return new Map();
1066
+ const schema = schemas.get(collection);
1067
+ if (!schema)
1068
+ return new Map();
1069
+ const placeholders = values.map((_, i) => `$${i + 1}`).join(',');
1070
+ const rows = await all(`SELECT ${field}, COUNT(*) as count FROM ${schema.tableName} WHERE ${field} IN (${placeholders}) GROUP BY ${field}`, ...values);
1071
+ const result = new Map();
1072
+ for (const row of rows) {
1073
+ result.set(row[field], Number(row.count));
1074
+ }
1075
+ return result;
1076
+ }
1077
+ export async function findByField(collection, field, value) {
1078
+ const schema = schemas.get(collection);
1079
+ if (!schema)
1080
+ return null;
1081
+ const rows = await all(`SELECT * FROM ${schema.tableName} WHERE ${field} = $1 LIMIT 1`, value);
1082
+ return rows[0] || null;
1083
+ }
1084
+ export async function findByFieldBatch(collection, field, values) {
1085
+ if (values.length === 0)
1086
+ return new Map();
1087
+ const schema = schemas.get(collection);
1088
+ if (!schema)
1089
+ return new Map();
1090
+ const placeholders = values.map((_, i) => `$${i + 1}`).join(',');
1091
+ const rows = await all(`SELECT t.*, r.handle FROM ${schema.tableName} t LEFT JOIN _repos r ON t.did = r.did WHERE t.${field} IN (${placeholders})`, ...values);
1092
+ // Attach child data if this collection has decomposed arrays
1093
+ if (schema.children.length > 0 && rows.length > 0) {
1094
+ const uris = rows.map((r) => r.uri);
1095
+ const childData = new Map();
1096
+ for (const child of schema.children) {
1097
+ const childRows = await getChildRows(child.tableName, uris);
1098
+ childData.set(child.fieldName, childRows);
1099
+ }
1100
+ for (const row of rows) {
1101
+ ;
1102
+ row.__childData = childData;
1103
+ }
1104
+ }
1105
+ const result = new Map();
1106
+ for (const row of rows) {
1107
+ const key = row[field];
1108
+ if (!result.has(key))
1109
+ result.set(key, []);
1110
+ result.get(key).push(row);
1111
+ }
1112
+ return result;
1113
+ }
1114
+ export async function lookupByFieldBatch(collection, field, values) {
1115
+ if (values.length === 0)
1116
+ return new Map();
1117
+ const results = await findByFieldBatch(collection, field, values);
1118
+ const map = new Map();
1119
+ for (const [key, records] of results) {
1120
+ const shaped = records.length > 0 ? reshapeRow(records[0], records[0]?.__childData) : null;
1121
+ if (shaped)
1122
+ map.set(key, shaped);
1123
+ }
1124
+ return map;
1125
+ }
1126
+ export async function findUriByFields(collection, conditions) {
1127
+ const schema = schemas.get(collection);
1128
+ if (!schema)
1129
+ return null;
1130
+ const where = conditions.map((c, i) => `${c.field} = $${i + 1}`).join(' AND ');
1131
+ const params = conditions.map((c) => c.value);
1132
+ const rows = await all(`SELECT uri FROM ${schema.tableName} WHERE ${where} LIMIT 1`, ...params);
1133
+ return rows[0]?.uri || null;
1134
+ }
1135
+ const ENVELOPE_KEYS = new Set(['uri', 'cid', 'did', 'handle', 'indexed_at']);
1136
+ const INTERNAL_KEYS = new Set(['__childData', '__unionData']);
1137
+ export function normalizeValue(v) {
1138
+ if (v && typeof v === 'object' && 'micros' in v)
1139
+ return new Date(Number(v.micros) / 1000).toISOString();
1140
+ if (typeof v === 'bigint')
1141
+ return Number(v);
1142
+ return v;
1143
+ }
1144
+ export async function getChildRows(childTableName, parentUris) {
1145
+ if (parentUris.length === 0)
1146
+ return new Map();
1147
+ const placeholders = parentUris.map((_, i) => `$${i + 1}`).join(',');
1148
+ const rows = await all(`SELECT * FROM ${childTableName} WHERE parent_uri IN (${placeholders})`, ...parentUris);
1149
+ const result = new Map();
1150
+ for (const row of rows) {
1151
+ const key = row.parent_uri;
1152
+ if (!result.has(key))
1153
+ result.set(key, []);
1154
+ result.get(key).push(row);
1155
+ }
1156
+ return result;
1157
+ }
1158
+ export function reshapeRow(row, childData, unionData) {
1159
+ if (!row)
1160
+ return null;
1161
+ // Derive collection from URI (at://did/collection/rkey)
1162
+ const collection = row.uri?.split('/')?.[3];
1163
+ const schema = collection ? schemas.get(collection) : null;
1164
+ // Build snake→camel map and JSON column set from schema
1165
+ const nameMap = new Map();
1166
+ const jsonCols = new Set();
1167
+ if (schema) {
1168
+ for (const col of schema.columns) {
1169
+ nameMap.set(col.name, col.originalName);
1170
+ if (col.duckdbType === 'JSON')
1171
+ jsonCols.add(col.name);
1172
+ }
1173
+ }
1174
+ const value = {};
1175
+ const envelope = {};
1176
+ for (const [key, rawVal] of Object.entries(row)) {
1177
+ const val = normalizeValue(rawVal);
1178
+ if (INTERNAL_KEYS.has(key)) {
1179
+ continue;
1180
+ }
1181
+ else if (ENVELOPE_KEYS.has(key)) {
1182
+ envelope[key] = val;
1183
+ }
1184
+ else {
1185
+ const originalKey = nameMap.get(key) || key;
1186
+ if (jsonCols.has(key) && typeof val === 'string') {
1187
+ try {
1188
+ value[originalKey] = JSON.parse(val);
1189
+ }
1190
+ catch {
1191
+ value[originalKey] = val;
1192
+ }
1193
+ }
1194
+ else {
1195
+ value[originalKey] = val;
1196
+ }
1197
+ }
1198
+ }
1199
+ // Reconstruct decomposed array fields from child data
1200
+ if (schema && childData) {
1201
+ for (const child of schema.children) {
1202
+ const childMap = childData.get(child.fieldName);
1203
+ const childRows = childMap?.get(row.uri) || [];
1204
+ value[child.fieldName] = childRows.map((cr) => {
1205
+ const item = {};
1206
+ for (const col of child.columns) {
1207
+ const raw = cr[col.name];
1208
+ item[col.originalName] = normalizeValue(raw);
1209
+ }
1210
+ return item;
1211
+ });
1212
+ }
1213
+ }
1214
+ // Reconstruct union fields from branch data
1215
+ const uData = unionData || row.__unionData;
1216
+ if (schema && uData) {
1217
+ for (const union of schema.unions) {
1218
+ const branchDataMap = uData.get(union.fieldName);
1219
+ if (!branchDataMap)
1220
+ continue;
1221
+ // Find which branch has rows for this URI (implicit discrimination)
1222
+ for (const branch of union.branches) {
1223
+ const branchMap = branchDataMap.get(branch.branchName);
1224
+ const branchRows = branchMap?.get(row.uri);
1225
+ if (!branchRows || branchRows.length === 0)
1226
+ continue;
1227
+ if (branch.isArray && branch.arrayField) {
1228
+ // Array branch: reconstruct { $type, arrayField: [...items] }
1229
+ const items = branchRows.map((br) => {
1230
+ const item = {};
1231
+ for (const col of branch.columns) {
1232
+ item[col.originalName] = normalizeValue(br[col.name]);
1233
+ }
1234
+ return item;
1235
+ });
1236
+ value[union.fieldName] = { $type: branch.type, [branch.arrayField]: items };
1237
+ }
1238
+ else {
1239
+ // Single-value branch: reconstruct { $type, ...properties }
1240
+ // If branchName matches a wrapper property pattern, nest under it
1241
+ const br = branchRows[0];
1242
+ const props = {};
1243
+ for (const col of branch.columns) {
1244
+ props[col.originalName] = normalizeValue(br[col.name]);
1245
+ }
1246
+ if (branch.wrapperField) {
1247
+ value[union.fieldName] = { $type: branch.type, [branch.wrapperField]: props };
1248
+ }
1249
+ else {
1250
+ value[union.fieldName] = { $type: branch.type, ...props };
1251
+ }
1252
+ }
1253
+ break; // Only one branch should match
1254
+ }
1255
+ }
1256
+ }
1257
+ return { ...envelope, value };
1258
+ }
1259
+ export function packCursor(sortVal, cid) {
1260
+ const primary = sortVal instanceof Date ? sortVal.toISOString() : String(sortVal);
1261
+ return Buffer.from(`${primary}::${cid}`).toString('base64url');
1262
+ }
1263
+ export function unpackCursor(cursor) {
1264
+ try {
1265
+ const decoded = Buffer.from(cursor, 'base64url').toString();
1266
+ const idx = decoded.lastIndexOf('::');
1267
+ if (idx === -1)
1268
+ return null;
1269
+ return { primary: decoded.substring(0, idx), cid: decoded.substring(idx + 2) };
1270
+ }
1271
+ catch {
1272
+ return null;
1273
+ }
1274
+ }
1275
+ export async function queryLabelsByDid(did) {
1276
+ return all(`SELECT * FROM _labels WHERE uri LIKE $1 AND neg = false AND (exp IS NULL OR exp > CURRENT_TIMESTAMP)`, `at://${did}/%`);
1277
+ }
1278
+ export async function searchAccounts(query, limit = 20) {
1279
+ return all(`SELECT did, handle, status FROM _repos WHERE did ILIKE $1 OR handle ILIKE $1 ORDER BY handle LIMIT $2`, `%${query}%`, limit);
1280
+ }
1281
+ export async function getAccountRecordCount(did) {
1282
+ let total = 0;
1283
+ for (const [, schema] of schemas) {
1284
+ const rows = await all(`SELECT COUNT(*) as count FROM ${schema.tableName} WHERE did = $1`, did);
1285
+ total += Number(rows[0]?.count || 0);
1286
+ }
1287
+ return total;
1288
+ }
1289
+ export async function getAllRecordUrisForDid(did) {
1290
+ const uris = [];
1291
+ for (const [, schema] of schemas) {
1292
+ const rows = await all(`SELECT uri FROM ${schema.tableName} WHERE did = $1`, did);
1293
+ uris.push(...rows.map((r) => r.uri));
1294
+ }
1295
+ return uris;
1296
+ }
1297
+ export async function isTakendownDid(did) {
1298
+ const rows = await all(`SELECT 1 FROM _repos WHERE did = $1 AND status = 'takendown' LIMIT 1`, did);
1299
+ return rows.length > 0;
1300
+ }
1301
+ export async function getPreferences(did) {
1302
+ const rows = await all(`SELECT key, value FROM _preferences WHERE did = $1`, did);
1303
+ const prefs = {};
1304
+ for (const row of rows) {
1305
+ try {
1306
+ prefs[row.key] = typeof row.value === 'string' ? JSON.parse(row.value) : row.value;
1307
+ }
1308
+ catch {
1309
+ prefs[row.key] = row.value;
1310
+ }
1311
+ }
1312
+ return prefs;
1313
+ }
1314
+ export async function putPreference(did, key, value) {
1315
+ await run(`INSERT OR REPLACE INTO _preferences (did, key, value, updated_at) VALUES ($1, $2, $3, $4)`, did, key, JSON.stringify(value), new Date().toISOString());
1316
+ }
1317
+ export async function filterTakendownDids(dids) {
1318
+ if (dids.length === 0)
1319
+ return new Set();
1320
+ const placeholders = dids.map((_, i) => `$${i + 1}`).join(',');
1321
+ const rows = await all(`SELECT did FROM _repos WHERE did IN (${placeholders}) AND status = 'takendown'`, ...dids);
1322
+ return new Set(rows.map((r) => r.did));
1323
+ }
1324
+ export async function backfillChildTables() {
1325
+ for (const [, schema] of schemas) {
1326
+ for (const child of schema.children) {
1327
+ // Check if child table needs backfill (significantly fewer rows than parent)
1328
+ const mainCount = (await all(`SELECT COUNT(*)::INTEGER as n FROM ${schema.tableName}`))[0]?.n || 0;
1329
+ if (mainCount === 0)
1330
+ continue;
1331
+ const childCount = (await all(`SELECT COUNT(DISTINCT parent_uri)::INTEGER as n FROM ${child.tableName}`))[0]?.n || 0;
1332
+ if (childCount >= mainCount * 0.9)
1333
+ continue;
1334
+ console.log(`[db] Backfilling ${child.tableName} from ${schema.tableName}...`);
1335
+ const snakeField = toSnakeCase(child.fieldName);
1336
+ const childColSelects = child.columns
1337
+ .map((c) => `json_extract_string(item.val, '$.${c.originalName}')`)
1338
+ .join(', ');
1339
+ const childColNames = ['parent_uri', 'parent_did', ...child.columns.map((c) => c.name)];
1340
+ const notNullFilters = child.columns
1341
+ .filter((c) => c.notNull)
1342
+ .map((c) => `json_extract_string(item.val, '$.${c.originalName}') IS NOT NULL`);
1343
+ const whereClause = [`p.${snakeField} IS NOT NULL`, ...notNullFilters].join(' AND ');
1344
+ try {
1345
+ await run(`DELETE FROM ${child.tableName}`);
1346
+ await run(`
1347
+ INSERT INTO ${child.tableName} (${childColNames.join(', ')})
1348
+ SELECT p.uri, p.did, ${childColSelects}
1349
+ FROM ${schema.tableName} p,
1350
+ unnest(from_json(p.${snakeField}::JSON, '["json"]')) AS item(val)
1351
+ WHERE ${whereClause}
1352
+ `);
1353
+ const result = await all(`SELECT COUNT(*)::INTEGER as n FROM ${child.tableName}`);
1354
+ console.log(`[db] Backfilled ${child.tableName}: ${result[0]?.n || 0} rows`);
1355
+ }
1356
+ catch (err) {
1357
+ console.warn(`[db] Backfill skipped for ${child.tableName}: ${err.message}`);
1358
+ }
1359
+ }
1360
+ }
1361
+ }