@hatk/hatk 0.0.1-alpha.4 → 0.0.1-alpha.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/adapter.d.ts +19 -0
  2. package/dist/adapter.d.ts.map +1 -0
  3. package/dist/adapter.js +107 -0
  4. package/dist/backfill.d.ts +60 -1
  5. package/dist/backfill.d.ts.map +1 -1
  6. package/dist/backfill.js +167 -33
  7. package/dist/car.d.ts +59 -1
  8. package/dist/car.d.ts.map +1 -1
  9. package/dist/car.js +179 -7
  10. package/dist/cbor.d.ts +37 -0
  11. package/dist/cbor.d.ts.map +1 -1
  12. package/dist/cbor.js +36 -3
  13. package/dist/cid.d.ts +37 -0
  14. package/dist/cid.d.ts.map +1 -1
  15. package/dist/cid.js +38 -3
  16. package/dist/cli.js +417 -133
  17. package/dist/cloudflare/container.d.ts +73 -0
  18. package/dist/cloudflare/container.d.ts.map +1 -0
  19. package/dist/cloudflare/container.js +232 -0
  20. package/dist/cloudflare/hooks.d.ts +33 -0
  21. package/dist/cloudflare/hooks.d.ts.map +1 -0
  22. package/dist/cloudflare/hooks.js +40 -0
  23. package/dist/cloudflare/init.d.ts +27 -0
  24. package/dist/cloudflare/init.d.ts.map +1 -0
  25. package/dist/cloudflare/init.js +103 -0
  26. package/dist/cloudflare/worker.d.ts +27 -0
  27. package/dist/cloudflare/worker.d.ts.map +1 -0
  28. package/dist/cloudflare/worker.js +54 -0
  29. package/dist/config.d.ts +12 -1
  30. package/dist/config.d.ts.map +1 -1
  31. package/dist/config.js +36 -9
  32. package/dist/database/adapter-factory.d.ts +6 -0
  33. package/dist/database/adapter-factory.d.ts.map +1 -0
  34. package/dist/database/adapter-factory.js +20 -0
  35. package/dist/database/adapters/d1.d.ts +56 -0
  36. package/dist/database/adapters/d1.d.ts.map +1 -0
  37. package/dist/database/adapters/d1.js +108 -0
  38. package/dist/database/adapters/duckdb-search.d.ts +12 -0
  39. package/dist/database/adapters/duckdb-search.d.ts.map +1 -0
  40. package/dist/database/adapters/duckdb-search.js +27 -0
  41. package/dist/database/adapters/duckdb.d.ts +25 -0
  42. package/dist/database/adapters/duckdb.d.ts.map +1 -0
  43. package/dist/database/adapters/duckdb.js +161 -0
  44. package/dist/database/adapters/sqlite-search.d.ts +23 -0
  45. package/dist/database/adapters/sqlite-search.d.ts.map +1 -0
  46. package/dist/database/adapters/sqlite-search.js +74 -0
  47. package/dist/database/adapters/sqlite.d.ts +18 -0
  48. package/dist/database/adapters/sqlite.d.ts.map +1 -0
  49. package/dist/database/adapters/sqlite.js +87 -0
  50. package/dist/database/db.d.ts +159 -0
  51. package/dist/database/db.d.ts.map +1 -0
  52. package/dist/database/db.js +1445 -0
  53. package/dist/database/dialect.d.ts +45 -0
  54. package/dist/database/dialect.d.ts.map +1 -0
  55. package/dist/database/dialect.js +72 -0
  56. package/dist/database/fts.d.ts +27 -0
  57. package/dist/database/fts.d.ts.map +1 -0
  58. package/dist/database/fts.js +846 -0
  59. package/dist/database/index.d.ts +7 -0
  60. package/dist/database/index.d.ts.map +1 -0
  61. package/dist/database/index.js +6 -0
  62. package/dist/database/ports.d.ts +50 -0
  63. package/dist/database/ports.d.ts.map +1 -0
  64. package/dist/database/ports.js +1 -0
  65. package/dist/database/schema.d.ts +61 -0
  66. package/dist/database/schema.d.ts.map +1 -0
  67. package/dist/database/schema.js +394 -0
  68. package/dist/db.d.ts +1 -1
  69. package/dist/db.d.ts.map +1 -1
  70. package/dist/db.js +4 -38
  71. package/dist/dev-entry.d.ts +8 -0
  72. package/dist/dev-entry.d.ts.map +1 -0
  73. package/dist/dev-entry.js +110 -0
  74. package/dist/feeds.d.ts +12 -8
  75. package/dist/feeds.d.ts.map +1 -1
  76. package/dist/feeds.js +45 -6
  77. package/dist/fts.d.ts.map +1 -1
  78. package/dist/fts.js +5 -0
  79. package/dist/hooks.d.ts +22 -0
  80. package/dist/hooks.d.ts.map +1 -0
  81. package/dist/hooks.js +75 -0
  82. package/dist/hydrate.d.ts +6 -5
  83. package/dist/hydrate.d.ts.map +1 -1
  84. package/dist/hydrate.js +4 -16
  85. package/dist/indexer.d.ts +20 -0
  86. package/dist/indexer.d.ts.map +1 -1
  87. package/dist/indexer.js +53 -7
  88. package/dist/labels.d.ts +34 -0
  89. package/dist/labels.d.ts.map +1 -1
  90. package/dist/labels.js +66 -6
  91. package/dist/logger.d.ts +29 -0
  92. package/dist/logger.d.ts.map +1 -1
  93. package/dist/logger.js +29 -0
  94. package/dist/main.js +134 -67
  95. package/dist/mst.d.ts +18 -1
  96. package/dist/mst.d.ts.map +1 -1
  97. package/dist/mst.js +19 -8
  98. package/dist/oauth/db.d.ts.map +1 -1
  99. package/dist/oauth/db.js +43 -17
  100. package/dist/oauth/server.d.ts +2 -0
  101. package/dist/oauth/server.d.ts.map +1 -1
  102. package/dist/oauth/server.js +102 -7
  103. package/dist/oauth/session.d.ts +11 -0
  104. package/dist/oauth/session.d.ts.map +1 -0
  105. package/dist/oauth/session.js +65 -0
  106. package/dist/opengraph.d.ts +10 -0
  107. package/dist/opengraph.d.ts.map +1 -1
  108. package/dist/opengraph.js +73 -39
  109. package/dist/pds-proxy.d.ts +42 -0
  110. package/dist/pds-proxy.d.ts.map +1 -0
  111. package/dist/pds-proxy.js +189 -0
  112. package/dist/renderer.d.ts +27 -0
  113. package/dist/renderer.d.ts.map +1 -0
  114. package/dist/renderer.js +46 -0
  115. package/dist/resolve-hatk.d.ts +6 -0
  116. package/dist/resolve-hatk.d.ts.map +1 -0
  117. package/dist/resolve-hatk.js +20 -0
  118. package/dist/response.d.ts +16 -0
  119. package/dist/response.d.ts.map +1 -0
  120. package/dist/response.js +69 -0
  121. package/dist/scanner.d.ts +21 -0
  122. package/dist/scanner.d.ts.map +1 -0
  123. package/dist/scanner.js +88 -0
  124. package/dist/schema.d.ts +8 -0
  125. package/dist/schema.d.ts.map +1 -1
  126. package/dist/schema.js +29 -0
  127. package/dist/seed.d.ts +19 -0
  128. package/dist/seed.d.ts.map +1 -1
  129. package/dist/seed.js +43 -4
  130. package/dist/server-init.d.ts +8 -0
  131. package/dist/server-init.d.ts.map +1 -0
  132. package/dist/server-init.js +61 -0
  133. package/dist/server.d.ts +26 -3
  134. package/dist/server.d.ts.map +1 -1
  135. package/dist/server.js +528 -635
  136. package/dist/setup.d.ts +28 -1
  137. package/dist/setup.d.ts.map +1 -1
  138. package/dist/setup.js +50 -3
  139. package/dist/test.d.ts +1 -1
  140. package/dist/test.d.ts.map +1 -1
  141. package/dist/test.js +38 -32
  142. package/dist/views.js +1 -1
  143. package/dist/vite-plugin.d.ts +1 -1
  144. package/dist/vite-plugin.d.ts.map +1 -1
  145. package/dist/vite-plugin.js +254 -66
  146. package/dist/xrpc.d.ts +46 -10
  147. package/dist/xrpc.d.ts.map +1 -1
  148. package/dist/xrpc.js +128 -39
  149. package/package.json +13 -6
  150. package/public/admin.html +0 -54
@@ -0,0 +1,846 @@
1
+ import { getSchema, runSQL, getSqlDialect, querySQL } from "./db.js";
2
+ import { getLexicon } from "./schema.js";
3
+ import { emit, timer } from "../logger.js";
4
+ /**
5
+ * Resolve a lexicon ref like "#artist" to its definition.
6
+ * Only handles local refs (same lexicon).
7
+ */
8
+ function resolveRef(ref, lexicon) {
9
+ if (!ref.startsWith('#'))
10
+ return null;
11
+ const defName = ref.slice(1);
12
+ return lexicon.defs?.[defName] || null;
13
+ }
14
+ /**
15
+ * Given a JSON column and its lexicon property definition, produce
16
+ * search column expressions that extract searchable text.
17
+ */
18
+ function jsonSearchColumns(colName, prop, lexicon, dialect) {
19
+ const columns = [];
20
+ // Strip table qualifier (e.g. "t.artists" → "artists") for use in aliases
21
+ const aliasBase = colName.includes('.') ? colName.split('.').pop() : colName;
22
+ if (prop.type === 'array' && prop.items) {
23
+ const itemDef = prop.items.type === 'ref' && prop.items.ref ? resolveRef(prop.items.ref, lexicon) : prop.items;
24
+ if (!itemDef)
25
+ return columns;
26
+ if (itemDef.type === 'string') {
27
+ // array of strings — join into one text column
28
+ columns.push({
29
+ expr: dialect.jsonArrayStringAgg(colName, '$[*]'),
30
+ alias: `${aliasBase}_text`,
31
+ });
32
+ }
33
+ else if (itemDef.type === 'object' && itemDef.properties) {
34
+ // array of objects — one column per string property
35
+ for (const [field, fieldProp] of Object.entries(itemDef.properties)) {
36
+ if (fieldProp.type === 'string') {
37
+ columns.push({
38
+ expr: dialect.jsonArrayStringAgg(colName, `$[*].${field}`),
39
+ alias: `${aliasBase}_${field}`,
40
+ });
41
+ }
42
+ }
43
+ }
44
+ }
45
+ else if (prop.type === 'object' && prop.properties) {
46
+ // plain object — one column per string property
47
+ for (const [field, fieldProp] of Object.entries(prop.properties)) {
48
+ if (fieldProp.type === 'string') {
49
+ columns.push({
50
+ expr: dialect.jsonExtractString(colName, `$.${field}`),
51
+ alias: `${aliasBase}_${field}`,
52
+ });
53
+ }
54
+ }
55
+ }
56
+ // blob, union, unknown — skip (no useful text to extract)
57
+ return columns;
58
+ }
59
+ let searchPort = null;
60
+ export function setSearchPort(port) {
61
+ searchPort = port;
62
+ }
63
+ export function hasSearchPort() {
64
+ return searchPort !== null;
65
+ }
66
+ export function getSearchPort() {
67
+ return searchPort;
68
+ }
69
+ // Tracks when each collection's FTS index was last rebuilt
70
+ const lastRebuiltAt = new Map();
71
+ // Cache of search column metadata per collection, populated during buildFtsIndex
72
+ const searchColumnCache = new Map();
73
+ // Cache of computed FTS schemas per collection (deterministic, so compute once)
74
+ const ftsSchemaCache = new Map();
75
+ export function getSearchColumns(collection) {
76
+ return searchColumnCache.get(collection) || [];
77
+ }
78
+ export function getLastRebuiltAt(collection) {
79
+ return lastRebuiltAt.get(collection) ?? null;
80
+ }
81
+ /**
82
+ * DuckDB FTS can't handle dots in table names (interprets them as catalog.schema.table).
83
+ * We create a shadow table with underscored names for FTS indexing.
84
+ */
85
+ export function ftsTableName(collection) {
86
+ return '_fts_' + collection.replace(/\./g, '_');
87
+ }
88
+ /**
89
+ * Compute the FTS schema for a collection: search column names, source query, and safe table name.
90
+ */
91
+ function computeFtsSchema(collection) {
92
+ const cached = ftsSchemaCache.get(collection);
93
+ if (cached)
94
+ return cached;
95
+ const schema = getSchema(collection);
96
+ if (!schema)
97
+ throw new Error(`Unknown collection: ${collection}`);
98
+ const lexicon = getLexicon(collection);
99
+ const record = lexicon?.defs?.main?.record;
100
+ // Build column list for shadow table
101
+ const dialect = getSqlDialect();
102
+ const selectExprs = ['t.uri', 't.cid', 't.did', 't.indexed_at'];
103
+ const searchColNames = [];
104
+ for (const col of schema.columns) {
105
+ if (col.sqlType === 'TEXT') {
106
+ selectExprs.push(`t.${col.name}`);
107
+ searchColNames.push(col.name);
108
+ }
109
+ else if (col.isJson && record?.properties) {
110
+ const prop = record.properties[col.originalName];
111
+ if (prop?.type === 'blob')
112
+ continue; // skip blobs
113
+ if (prop && lexicon) {
114
+ const derived = jsonSearchColumns(`t.${col.name}`, prop, lexicon, dialect);
115
+ if (derived.length > 0) {
116
+ for (const d of derived) {
117
+ selectExprs.push(`${d.expr} AS ${d.alias}`);
118
+ searchColNames.push(d.alias);
119
+ }
120
+ continue;
121
+ }
122
+ }
123
+ // Fallback: cast JSON to TEXT
124
+ selectExprs.push(`CAST(t.${col.name} AS TEXT) AS ${col.name}`);
125
+ searchColNames.push(col.name);
126
+ }
127
+ }
128
+ // Include searchable text from child tables (decomposed array fields)
129
+ for (const child of schema.children) {
130
+ for (const col of child.columns) {
131
+ if (col.sqlType === 'TEXT') {
132
+ const alias = `${child.fieldName}_${col.name}`;
133
+ const agg = dialect.stringAgg(`c.${col.name}`, "' '");
134
+ selectExprs.push(`(SELECT ${agg} FROM ${child.tableName} c WHERE c.parent_uri = t.uri) AS ${alias}`);
135
+ searchColNames.push(alias);
136
+ }
137
+ }
138
+ }
139
+ // Include searchable text from union branch tables
140
+ for (const union of schema.unions) {
141
+ for (const branch of union.branches) {
142
+ for (const col of branch.columns) {
143
+ if (col.sqlType === 'TEXT') {
144
+ const alias = `${union.fieldName}_${branch.branchName}_${col.name}`;
145
+ const agg = dialect.stringAgg(`c.${col.name}`, "' '");
146
+ selectExprs.push(`(SELECT ${agg} FROM ${branch.tableName} c WHERE c.parent_uri = t.uri) AS ${alias}`);
147
+ searchColNames.push(alias);
148
+ }
149
+ }
150
+ }
151
+ }
152
+ // Include handle from _repos for people search
153
+ selectExprs.push('r.handle');
154
+ searchColNames.push('handle');
155
+ const safeName = ftsTableName(collection);
156
+ const sourceQuery = `SELECT ${selectExprs.join(', ')} FROM ${schema.tableName} t LEFT JOIN _repos r ON t.did = r.did`;
157
+ const result = { searchColNames, sourceQuery, safeName };
158
+ ftsSchemaCache.set(collection, result);
159
+ return result;
160
+ }
161
+ /**
162
+ * Build FTS index for a collection.
163
+ * Creates a shadow table copy and indexes all TEXT NOT NULL columns
164
+ * using Porter stemmer with English stopwords.
165
+ */
166
+ export async function buildFtsIndex(collection) {
167
+ if (!searchPort)
168
+ return; // No FTS support for this adapter
169
+ const { searchColNames, sourceQuery, safeName } = computeFtsSchema(collection);
170
+ if (searchColNames.length === 0)
171
+ return;
172
+ // For incremental ports: skip rebuild if index already exists
173
+ if (searchPort.indexExists) {
174
+ const exists = await searchPort.indexExists(safeName);
175
+ if (exists) {
176
+ searchColumnCache.set(collection, searchColNames);
177
+ lastRebuiltAt.set(collection, new Date().toISOString());
178
+ return;
179
+ }
180
+ }
181
+ await searchPort.buildIndex(safeName, sourceQuery, searchColNames);
182
+ searchColumnCache.set(collection, searchColNames);
183
+ lastRebuiltAt.set(collection, new Date().toISOString());
184
+ }
185
+ export async function buildFtsRow(collection, uri) {
186
+ const { searchColNames, sourceQuery } = computeFtsSchema(collection);
187
+ if (searchColNames.length === 0)
188
+ return null;
189
+ // Append WHERE clause to filter for single record
190
+ const sql = sourceQuery + ' WHERE t.uri = $1';
191
+ const rows = await querySQL(sql, [uri]);
192
+ if (!rows || rows.length === 0)
193
+ return null;
194
+ const row = rows[0];
195
+ const result = {};
196
+ for (const col of searchColNames) {
197
+ result[col] = row[col] != null ? String(row[col]) : null;
198
+ }
199
+ return result;
200
+ }
201
+ export async function updateFtsRecord(collection, uri) {
202
+ if (!searchPort || !searchPort.updateIndex)
203
+ return;
204
+ const searchCols = searchColumnCache.get(collection);
205
+ if (!searchCols || searchCols.length === 0)
206
+ return;
207
+ try {
208
+ const row = await buildFtsRow(collection, uri);
209
+ if (!row)
210
+ return;
211
+ const safeName = ftsTableName(collection);
212
+ await searchPort.updateIndex(safeName, uri, row, searchCols);
213
+ }
214
+ catch (err) {
215
+ emit('fts', 'update_error', { collection, uri, error: err.message });
216
+ }
217
+ }
218
+ export async function deleteFtsRecord(collection, uri) {
219
+ if (!searchPort || !searchPort.deleteFromIndex)
220
+ return;
221
+ const searchCols = searchColumnCache.get(collection);
222
+ if (!searchCols || searchCols.length === 0)
223
+ return;
224
+ try {
225
+ const safeName = ftsTableName(collection);
226
+ await searchPort.deleteFromIndex(safeName, uri, searchCols);
227
+ }
228
+ catch (err) {
229
+ emit('fts', 'delete_error', { collection, uri, error: err.message });
230
+ }
231
+ }
232
+ /**
233
+ * Rebuild FTS indexes for all registered collections.
234
+ */
235
+ // DuckDB's built-in English stop words (571 words) — must match stopwords='english' in create_fts_index
236
+ const ENGLISH_STOP_WORDS = new Set([
237
+ 'a',
238
+ "a's",
239
+ 'able',
240
+ 'about',
241
+ 'above',
242
+ 'according',
243
+ 'accordingly',
244
+ 'across',
245
+ 'actually',
246
+ 'after',
247
+ 'afterwards',
248
+ 'again',
249
+ 'against',
250
+ "ain't",
251
+ 'all',
252
+ 'allow',
253
+ 'allows',
254
+ 'almost',
255
+ 'alone',
256
+ 'along',
257
+ 'already',
258
+ 'also',
259
+ 'although',
260
+ 'always',
261
+ 'am',
262
+ 'among',
263
+ 'amongst',
264
+ 'an',
265
+ 'and',
266
+ 'another',
267
+ 'any',
268
+ 'anybody',
269
+ 'anyhow',
270
+ 'anyone',
271
+ 'anything',
272
+ 'anyway',
273
+ 'anyways',
274
+ 'anywhere',
275
+ 'apart',
276
+ 'appear',
277
+ 'appreciate',
278
+ 'appropriate',
279
+ 'are',
280
+ "aren't",
281
+ 'around',
282
+ 'as',
283
+ 'aside',
284
+ 'ask',
285
+ 'asking',
286
+ 'associated',
287
+ 'at',
288
+ 'available',
289
+ 'away',
290
+ 'awfully',
291
+ 'b',
292
+ 'be',
293
+ 'became',
294
+ 'because',
295
+ 'become',
296
+ 'becomes',
297
+ 'becoming',
298
+ 'been',
299
+ 'before',
300
+ 'beforehand',
301
+ 'behind',
302
+ 'being',
303
+ 'believe',
304
+ 'below',
305
+ 'beside',
306
+ 'besides',
307
+ 'best',
308
+ 'better',
309
+ 'between',
310
+ 'beyond',
311
+ 'both',
312
+ 'brief',
313
+ 'but',
314
+ 'by',
315
+ 'c',
316
+ "c'mon",
317
+ "c's",
318
+ 'came',
319
+ 'can',
320
+ "can't",
321
+ 'cannot',
322
+ 'cant',
323
+ 'cause',
324
+ 'causes',
325
+ 'certain',
326
+ 'certainly',
327
+ 'changes',
328
+ 'clearly',
329
+ 'co',
330
+ 'com',
331
+ 'come',
332
+ 'comes',
333
+ 'concerning',
334
+ 'consequently',
335
+ 'consider',
336
+ 'considering',
337
+ 'contain',
338
+ 'containing',
339
+ 'contains',
340
+ 'corresponding',
341
+ 'could',
342
+ "couldn't",
343
+ 'course',
344
+ 'currently',
345
+ 'd',
346
+ 'definitely',
347
+ 'described',
348
+ 'despite',
349
+ 'did',
350
+ "didn't",
351
+ 'different',
352
+ 'do',
353
+ 'does',
354
+ "doesn't",
355
+ 'doing',
356
+ "don't",
357
+ 'done',
358
+ 'down',
359
+ 'downwards',
360
+ 'during',
361
+ 'e',
362
+ 'each',
363
+ 'edu',
364
+ 'eg',
365
+ 'eight',
366
+ 'either',
367
+ 'else',
368
+ 'elsewhere',
369
+ 'enough',
370
+ 'entirely',
371
+ 'especially',
372
+ 'et',
373
+ 'etc',
374
+ 'even',
375
+ 'ever',
376
+ 'every',
377
+ 'everybody',
378
+ 'everyone',
379
+ 'everything',
380
+ 'everywhere',
381
+ 'ex',
382
+ 'exactly',
383
+ 'example',
384
+ 'except',
385
+ 'f',
386
+ 'far',
387
+ 'few',
388
+ 'fifth',
389
+ 'first',
390
+ 'five',
391
+ 'followed',
392
+ 'following',
393
+ 'follows',
394
+ 'for',
395
+ 'former',
396
+ 'formerly',
397
+ 'forth',
398
+ 'four',
399
+ 'from',
400
+ 'further',
401
+ 'furthermore',
402
+ 'g',
403
+ 'get',
404
+ 'gets',
405
+ 'getting',
406
+ 'given',
407
+ 'gives',
408
+ 'go',
409
+ 'goes',
410
+ 'going',
411
+ 'gone',
412
+ 'got',
413
+ 'gotten',
414
+ 'greetings',
415
+ 'h',
416
+ 'had',
417
+ "hadn't",
418
+ 'happens',
419
+ 'hardly',
420
+ 'has',
421
+ "hasn't",
422
+ 'have',
423
+ "haven't",
424
+ 'having',
425
+ 'he',
426
+ "he's",
427
+ 'hello',
428
+ 'help',
429
+ 'hence',
430
+ 'her',
431
+ 'here',
432
+ "here's",
433
+ 'hereafter',
434
+ 'hereby',
435
+ 'herein',
436
+ 'hereupon',
437
+ 'hers',
438
+ 'herself',
439
+ 'hi',
440
+ 'him',
441
+ 'himself',
442
+ 'his',
443
+ 'hither',
444
+ 'hopefully',
445
+ 'how',
446
+ 'howbeit',
447
+ 'however',
448
+ 'i',
449
+ "i'd",
450
+ "i'll",
451
+ "i'm",
452
+ "i've",
453
+ 'ie',
454
+ 'if',
455
+ 'ignored',
456
+ 'immediate',
457
+ 'in',
458
+ 'inasmuch',
459
+ 'inc',
460
+ 'indeed',
461
+ 'indicate',
462
+ 'indicated',
463
+ 'indicates',
464
+ 'inner',
465
+ 'insofar',
466
+ 'instead',
467
+ 'into',
468
+ 'inward',
469
+ 'is',
470
+ "isn't",
471
+ 'it',
472
+ "it'd",
473
+ "it'll",
474
+ "it's",
475
+ 'its',
476
+ 'itself',
477
+ 'j',
478
+ 'just',
479
+ 'k',
480
+ 'keep',
481
+ 'keeps',
482
+ 'kept',
483
+ 'know',
484
+ 'known',
485
+ 'knows',
486
+ 'l',
487
+ 'last',
488
+ 'lately',
489
+ 'later',
490
+ 'latter',
491
+ 'latterly',
492
+ 'least',
493
+ 'less',
494
+ 'lest',
495
+ 'let',
496
+ "let's",
497
+ 'like',
498
+ 'liked',
499
+ 'likely',
500
+ 'little',
501
+ 'look',
502
+ 'looking',
503
+ 'looks',
504
+ 'ltd',
505
+ 'm',
506
+ 'mainly',
507
+ 'many',
508
+ 'may',
509
+ 'maybe',
510
+ 'me',
511
+ 'mean',
512
+ 'meanwhile',
513
+ 'merely',
514
+ 'might',
515
+ 'more',
516
+ 'moreover',
517
+ 'most',
518
+ 'mostly',
519
+ 'much',
520
+ 'must',
521
+ 'my',
522
+ 'myself',
523
+ 'n',
524
+ 'name',
525
+ 'namely',
526
+ 'nd',
527
+ 'near',
528
+ 'nearly',
529
+ 'necessary',
530
+ 'need',
531
+ 'needs',
532
+ 'neither',
533
+ 'never',
534
+ 'nevertheless',
535
+ 'new',
536
+ 'next',
537
+ 'nine',
538
+ 'no',
539
+ 'nobody',
540
+ 'non',
541
+ 'none',
542
+ 'noone',
543
+ 'nor',
544
+ 'normally',
545
+ 'not',
546
+ 'nothing',
547
+ 'novel',
548
+ 'now',
549
+ 'nowhere',
550
+ 'o',
551
+ 'obviously',
552
+ 'of',
553
+ 'off',
554
+ 'often',
555
+ 'oh',
556
+ 'ok',
557
+ 'okay',
558
+ 'old',
559
+ 'on',
560
+ 'once',
561
+ 'one',
562
+ 'ones',
563
+ 'only',
564
+ 'onto',
565
+ 'or',
566
+ 'other',
567
+ 'others',
568
+ 'otherwise',
569
+ 'ought',
570
+ 'our',
571
+ 'ours',
572
+ 'ourselves',
573
+ 'out',
574
+ 'outside',
575
+ 'over',
576
+ 'overall',
577
+ 'own',
578
+ 'p',
579
+ 'particular',
580
+ 'particularly',
581
+ 'per',
582
+ 'perhaps',
583
+ 'placed',
584
+ 'please',
585
+ 'plus',
586
+ 'possible',
587
+ 'presumably',
588
+ 'probably',
589
+ 'provides',
590
+ 'q',
591
+ 'que',
592
+ 'quite',
593
+ 'qv',
594
+ 'r',
595
+ 'rather',
596
+ 'rd',
597
+ 're',
598
+ 'really',
599
+ 'reasonably',
600
+ 'regarding',
601
+ 'regardless',
602
+ 'regards',
603
+ 'relatively',
604
+ 'respectively',
605
+ 'right',
606
+ 's',
607
+ 'said',
608
+ 'same',
609
+ 'saw',
610
+ 'say',
611
+ 'saying',
612
+ 'says',
613
+ 'second',
614
+ 'secondly',
615
+ 'see',
616
+ 'seeing',
617
+ 'seem',
618
+ 'seemed',
619
+ 'seeming',
620
+ 'seems',
621
+ 'seen',
622
+ 'self',
623
+ 'selves',
624
+ 'sensible',
625
+ 'sent',
626
+ 'serious',
627
+ 'seriously',
628
+ 'seven',
629
+ 'several',
630
+ 'shall',
631
+ 'she',
632
+ 'should',
633
+ "shouldn't",
634
+ 'since',
635
+ 'six',
636
+ 'so',
637
+ 'some',
638
+ 'somebody',
639
+ 'somehow',
640
+ 'someone',
641
+ 'something',
642
+ 'sometime',
643
+ 'sometimes',
644
+ 'somewhat',
645
+ 'somewhere',
646
+ 'soon',
647
+ 'sorry',
648
+ 'specified',
649
+ 'specify',
650
+ 'specifying',
651
+ 'still',
652
+ 'sub',
653
+ 'such',
654
+ 'sup',
655
+ 'sure',
656
+ 't',
657
+ "t's",
658
+ 'take',
659
+ 'taken',
660
+ 'tell',
661
+ 'tends',
662
+ 'th',
663
+ 'than',
664
+ 'thank',
665
+ 'thanks',
666
+ 'thanx',
667
+ 'that',
668
+ "that's",
669
+ 'thats',
670
+ 'the',
671
+ 'their',
672
+ 'theirs',
673
+ 'them',
674
+ 'themselves',
675
+ 'then',
676
+ 'thence',
677
+ 'there',
678
+ "there's",
679
+ 'thereafter',
680
+ 'thereby',
681
+ 'therefore',
682
+ 'therein',
683
+ 'theres',
684
+ 'thereupon',
685
+ 'these',
686
+ 'they',
687
+ "they'd",
688
+ "they'll",
689
+ "they're",
690
+ "they've",
691
+ 'think',
692
+ 'third',
693
+ 'this',
694
+ 'thorough',
695
+ 'thoroughly',
696
+ 'those',
697
+ 'though',
698
+ 'three',
699
+ 'through',
700
+ 'throughout',
701
+ 'thru',
702
+ 'thus',
703
+ 'to',
704
+ 'together',
705
+ 'too',
706
+ 'took',
707
+ 'toward',
708
+ 'towards',
709
+ 'tried',
710
+ 'tries',
711
+ 'truly',
712
+ 'try',
713
+ 'trying',
714
+ 'twice',
715
+ 'two',
716
+ 'u',
717
+ 'un',
718
+ 'under',
719
+ 'unfortunately',
720
+ 'unless',
721
+ 'unlikely',
722
+ 'until',
723
+ 'unto',
724
+ 'up',
725
+ 'upon',
726
+ 'us',
727
+ 'use',
728
+ 'used',
729
+ 'useful',
730
+ 'uses',
731
+ 'using',
732
+ 'usually',
733
+ 'uucp',
734
+ 'v',
735
+ 'value',
736
+ 'various',
737
+ 'very',
738
+ 'via',
739
+ 'viz',
740
+ 'vs',
741
+ 'w',
742
+ 'want',
743
+ 'wants',
744
+ 'was',
745
+ "wasn't",
746
+ 'way',
747
+ 'we',
748
+ "we'd",
749
+ "we'll",
750
+ "we're",
751
+ "we've",
752
+ 'welcome',
753
+ 'well',
754
+ 'went',
755
+ 'were',
756
+ "weren't",
757
+ 'what',
758
+ "what's",
759
+ 'whatever',
760
+ 'when',
761
+ 'whence',
762
+ 'whenever',
763
+ 'where',
764
+ "where's",
765
+ 'whereafter',
766
+ 'whereas',
767
+ 'whereby',
768
+ 'wherein',
769
+ 'whereupon',
770
+ 'wherever',
771
+ 'whether',
772
+ 'which',
773
+ 'while',
774
+ 'whither',
775
+ 'who',
776
+ "who's",
777
+ 'whoever',
778
+ 'whole',
779
+ 'whom',
780
+ 'whose',
781
+ 'why',
782
+ 'will',
783
+ 'willing',
784
+ 'wish',
785
+ 'with',
786
+ 'within',
787
+ 'without',
788
+ "won't",
789
+ 'wonder',
790
+ 'would',
791
+ 'would',
792
+ "wouldn't",
793
+ 'x',
794
+ 'y',
795
+ 'yes',
796
+ 'yet',
797
+ 'you',
798
+ "you'd",
799
+ "you'll",
800
+ "you're",
801
+ "you've",
802
+ 'your',
803
+ 'yours',
804
+ 'yourself',
805
+ 'yourselves',
806
+ 'z',
807
+ 'zero',
808
+ ]);
809
+ /**
810
+ * Strip English stop words from a search query, preserving non-stop-word terms.
811
+ * Returns the cleaned query string. If all words are stop words, returns the original query.
812
+ */
813
+ export function stripStopWords(query) {
814
+ const terms = query.trim().split(/\s+/);
815
+ const filtered = terms.filter((t) => !ENGLISH_STOP_WORDS.has(t.toLowerCase()));
816
+ return filtered.length > 0 ? filtered.join(' ') : query;
817
+ }
818
+ export async function rebuildAllIndexes(collections) {
819
+ const elapsed = timer();
820
+ let rebuilt = 0;
821
+ const errors = [];
822
+ for (const collection of collections) {
823
+ try {
824
+ await buildFtsIndex(collection);
825
+ rebuilt++;
826
+ }
827
+ catch (err) {
828
+ errors.push(`${collection}: ${err.message}`);
829
+ }
830
+ }
831
+ // Compact WAL to free memory after heavy FTS operations (DuckDB only)
832
+ try {
833
+ const { getSqlDialect } = await import("./db.js");
834
+ const d = getSqlDialect();
835
+ if (d.checkpointSQL)
836
+ await runSQL(d.checkpointSQL);
837
+ }
838
+ catch { }
839
+ emit('fts', 'rebuild', {
840
+ collections_total: collections.length,
841
+ collections_rebuilt: rebuilt,
842
+ error_count: errors.length,
843
+ duration_ms: elapsed(),
844
+ errors: errors.length > 0 ? errors : undefined,
845
+ });
846
+ }