@hatk/hatk 0.0.1-alpha.3 → 0.0.1-alpha.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. package/dist/adapter.d.ts +19 -0
  2. package/dist/adapter.d.ts.map +1 -0
  3. package/dist/adapter.js +94 -0
  4. package/dist/backfill.d.ts +60 -1
  5. package/dist/backfill.d.ts.map +1 -1
  6. package/dist/backfill.js +166 -32
  7. package/dist/car.d.ts +59 -1
  8. package/dist/car.d.ts.map +1 -1
  9. package/dist/car.js +179 -7
  10. package/dist/cbor.d.ts +37 -0
  11. package/dist/cbor.d.ts.map +1 -1
  12. package/dist/cbor.js +36 -3
  13. package/dist/cid.d.ts +37 -0
  14. package/dist/cid.d.ts.map +1 -1
  15. package/dist/cid.js +38 -3
  16. package/dist/cli.js +356 -123
  17. package/dist/config.d.ts +12 -1
  18. package/dist/config.d.ts.map +1 -1
  19. package/dist/config.js +36 -9
  20. package/dist/database/adapter-factory.d.ts +6 -0
  21. package/dist/database/adapter-factory.d.ts.map +1 -0
  22. package/dist/database/adapter-factory.js +20 -0
  23. package/dist/database/adapters/duckdb-search.d.ts +12 -0
  24. package/dist/database/adapters/duckdb-search.d.ts.map +1 -0
  25. package/dist/database/adapters/duckdb-search.js +27 -0
  26. package/dist/database/adapters/duckdb.d.ts +25 -0
  27. package/dist/database/adapters/duckdb.d.ts.map +1 -0
  28. package/dist/database/adapters/duckdb.js +161 -0
  29. package/dist/database/adapters/sqlite-search.d.ts +18 -0
  30. package/dist/database/adapters/sqlite-search.d.ts.map +1 -0
  31. package/dist/database/adapters/sqlite-search.js +38 -0
  32. package/dist/database/adapters/sqlite.d.ts +18 -0
  33. package/dist/database/adapters/sqlite.d.ts.map +1 -0
  34. package/dist/database/adapters/sqlite.js +87 -0
  35. package/dist/database/db.d.ts +149 -0
  36. package/dist/database/db.d.ts.map +1 -0
  37. package/dist/database/db.js +1460 -0
  38. package/dist/database/dialect.d.ts +45 -0
  39. package/dist/database/dialect.d.ts.map +1 -0
  40. package/dist/database/dialect.js +72 -0
  41. package/dist/database/fts.d.ts +24 -0
  42. package/dist/database/fts.d.ts.map +1 -0
  43. package/dist/database/fts.js +777 -0
  44. package/dist/database/index.d.ts +7 -0
  45. package/dist/database/index.d.ts.map +1 -0
  46. package/dist/database/index.js +6 -0
  47. package/dist/database/ports.d.ts +44 -0
  48. package/dist/database/ports.d.ts.map +1 -0
  49. package/dist/database/ports.js +1 -0
  50. package/dist/database/schema.d.ts +60 -0
  51. package/dist/database/schema.d.ts.map +1 -0
  52. package/dist/database/schema.js +388 -0
  53. package/dist/db.d.ts +1 -1
  54. package/dist/db.d.ts.map +1 -1
  55. package/dist/db.js +4 -38
  56. package/dist/dev-entry.d.ts +8 -0
  57. package/dist/dev-entry.d.ts.map +1 -0
  58. package/dist/dev-entry.js +109 -0
  59. package/dist/feeds.d.ts +4 -0
  60. package/dist/feeds.d.ts.map +1 -1
  61. package/dist/feeds.js +42 -3
  62. package/dist/fts.d.ts.map +1 -1
  63. package/dist/fts.js +5 -0
  64. package/dist/hooks.d.ts +22 -0
  65. package/dist/hooks.d.ts.map +1 -0
  66. package/dist/hooks.js +75 -0
  67. package/dist/hydrate.js +1 -1
  68. package/dist/indexer.d.ts +20 -0
  69. package/dist/indexer.d.ts.map +1 -1
  70. package/dist/indexer.js +48 -6
  71. package/dist/labels.d.ts +34 -0
  72. package/dist/labels.d.ts.map +1 -1
  73. package/dist/labels.js +63 -3
  74. package/dist/logger.d.ts +29 -0
  75. package/dist/logger.d.ts.map +1 -1
  76. package/dist/logger.js +29 -0
  77. package/dist/main.js +131 -67
  78. package/dist/mst.d.ts +18 -1
  79. package/dist/mst.d.ts.map +1 -1
  80. package/dist/mst.js +19 -8
  81. package/dist/oauth/db.d.ts.map +1 -1
  82. package/dist/oauth/db.js +41 -15
  83. package/dist/oauth/server.d.ts +2 -0
  84. package/dist/oauth/server.d.ts.map +1 -1
  85. package/dist/oauth/server.js +102 -7
  86. package/dist/oauth/session.d.ts +9 -0
  87. package/dist/oauth/session.d.ts.map +1 -0
  88. package/dist/oauth/session.js +65 -0
  89. package/dist/opengraph.d.ts +10 -0
  90. package/dist/opengraph.d.ts.map +1 -1
  91. package/dist/opengraph.js +103 -5
  92. package/dist/pds-proxy.d.ts +39 -0
  93. package/dist/pds-proxy.d.ts.map +1 -0
  94. package/dist/pds-proxy.js +173 -0
  95. package/dist/renderer.d.ts +27 -0
  96. package/dist/renderer.d.ts.map +1 -0
  97. package/dist/renderer.js +46 -0
  98. package/dist/resolve-hatk.d.ts +6 -0
  99. package/dist/resolve-hatk.d.ts.map +1 -0
  100. package/dist/resolve-hatk.js +20 -0
  101. package/dist/response.d.ts +16 -0
  102. package/dist/response.d.ts.map +1 -0
  103. package/dist/response.js +69 -0
  104. package/dist/scanner.d.ts +21 -0
  105. package/dist/scanner.d.ts.map +1 -0
  106. package/dist/scanner.js +88 -0
  107. package/dist/schema.d.ts +8 -0
  108. package/dist/schema.d.ts.map +1 -1
  109. package/dist/schema.js +29 -0
  110. package/dist/seed.d.ts +19 -0
  111. package/dist/seed.d.ts.map +1 -1
  112. package/dist/seed.js +43 -4
  113. package/dist/server-init.d.ts +8 -0
  114. package/dist/server-init.d.ts.map +1 -0
  115. package/dist/server-init.js +59 -0
  116. package/dist/server.d.ts +26 -3
  117. package/dist/server.d.ts.map +1 -1
  118. package/dist/server.js +487 -616
  119. package/dist/setup.d.ts +28 -1
  120. package/dist/setup.d.ts.map +1 -1
  121. package/dist/setup.js +50 -3
  122. package/dist/test.d.ts +1 -1
  123. package/dist/test.d.ts.map +1 -1
  124. package/dist/test.js +38 -32
  125. package/dist/views.js +1 -1
  126. package/dist/vite-plugin.d.ts +1 -1
  127. package/dist/vite-plugin.d.ts.map +1 -1
  128. package/dist/vite-plugin.js +252 -66
  129. package/dist/xrpc.d.ts +36 -0
  130. package/dist/xrpc.d.ts.map +1 -1
  131. package/dist/xrpc.js +124 -3
  132. package/package.json +12 -5
@@ -0,0 +1,777 @@
1
+ import { getSchema, runSQL, getSqlDialect } from "./db.js";
2
+ import { getLexicon } from "./schema.js";
3
+ import { emit, timer } from "../logger.js";
4
+ /**
5
+ * Resolve a lexicon ref like "#artist" to its definition.
6
+ * Only handles local refs (same lexicon).
7
+ */
8
+ function resolveRef(ref, lexicon) {
9
+ if (!ref.startsWith('#'))
10
+ return null;
11
+ const defName = ref.slice(1);
12
+ return lexicon.defs?.[defName] || null;
13
+ }
14
+ /**
15
+ * Given a JSON column and its lexicon property definition, produce
16
+ * search column expressions that extract searchable text.
17
+ */
18
+ function jsonSearchColumns(colName, prop, lexicon, dialect) {
19
+ const columns = [];
20
+ // Strip table qualifier (e.g. "t.artists" → "artists") for use in aliases
21
+ const aliasBase = colName.includes('.') ? colName.split('.').pop() : colName;
22
+ if (prop.type === 'array' && prop.items) {
23
+ const itemDef = prop.items.type === 'ref' && prop.items.ref ? resolveRef(prop.items.ref, lexicon) : prop.items;
24
+ if (!itemDef)
25
+ return columns;
26
+ if (itemDef.type === 'string') {
27
+ // array of strings — join into one text column
28
+ columns.push({
29
+ expr: dialect.jsonArrayStringAgg(colName, '$[*]'),
30
+ alias: `${aliasBase}_text`,
31
+ });
32
+ }
33
+ else if (itemDef.type === 'object' && itemDef.properties) {
34
+ // array of objects — one column per string property
35
+ for (const [field, fieldProp] of Object.entries(itemDef.properties)) {
36
+ if (fieldProp.type === 'string') {
37
+ columns.push({
38
+ expr: dialect.jsonArrayStringAgg(colName, `$[*].${field}`),
39
+ alias: `${aliasBase}_${field}`,
40
+ });
41
+ }
42
+ }
43
+ }
44
+ }
45
+ else if (prop.type === 'object' && prop.properties) {
46
+ // plain object — one column per string property
47
+ for (const [field, fieldProp] of Object.entries(prop.properties)) {
48
+ if (fieldProp.type === 'string') {
49
+ columns.push({
50
+ expr: dialect.jsonExtractString(colName, `$.${field}`),
51
+ alias: `${aliasBase}_${field}`,
52
+ });
53
+ }
54
+ }
55
+ }
56
+ // blob, union, unknown — skip (no useful text to extract)
57
+ return columns;
58
+ }
59
+ let searchPort = null;
60
+ export function setSearchPort(port) {
61
+ searchPort = port;
62
+ }
63
+ export function hasSearchPort() {
64
+ return searchPort !== null;
65
+ }
66
+ export function getSearchPort() {
67
+ return searchPort;
68
+ }
69
+ // Tracks when each collection's FTS index was last rebuilt
70
+ const lastRebuiltAt = new Map();
71
+ // Cache of search column metadata per collection, populated during buildFtsIndex
72
+ const searchColumnCache = new Map();
73
+ export function getSearchColumns(collection) {
74
+ return searchColumnCache.get(collection) || [];
75
+ }
76
+ export function getLastRebuiltAt(collection) {
77
+ return lastRebuiltAt.get(collection) ?? null;
78
+ }
79
+ /**
80
+ * DuckDB FTS can't handle dots in table names (interprets them as catalog.schema.table).
81
+ * We create a shadow table with underscored names for FTS indexing.
82
+ */
83
+ export function ftsTableName(collection) {
84
+ return '_fts_' + collection.replace(/\./g, '_');
85
+ }
86
+ /**
87
+ * Build FTS index for a collection.
88
+ * Creates a shadow table copy and indexes all TEXT NOT NULL columns
89
+ * using Porter stemmer with English stopwords.
90
+ */
91
+ export async function buildFtsIndex(collection) {
92
+ if (!searchPort)
93
+ return; // No FTS support for this adapter
94
+ const schema = getSchema(collection);
95
+ if (!schema)
96
+ throw new Error(`Unknown collection: ${collection}`);
97
+ const lexicon = getLexicon(collection);
98
+ const record = lexicon?.defs?.main?.record;
99
+ // Build column list for shadow table
100
+ const dialect = getSqlDialect();
101
+ const selectExprs = ['t.uri', 't.cid', 't.did', 't.indexed_at'];
102
+ const searchColNames = [];
103
+ for (const col of schema.columns) {
104
+ if (col.sqlType === 'TEXT') {
105
+ selectExprs.push(`t.${col.name}`);
106
+ searchColNames.push(col.name);
107
+ }
108
+ else if ((col.sqlType === 'JSON' || col.sqlType === 'TEXT') && record?.properties) {
109
+ const prop = record.properties[col.originalName];
110
+ if (prop?.type === 'blob')
111
+ continue; // skip blobs
112
+ if (prop && lexicon) {
113
+ const derived = jsonSearchColumns(`t.${col.name}`, prop, lexicon, dialect);
114
+ if (derived.length > 0) {
115
+ for (const d of derived) {
116
+ selectExprs.push(`${d.expr} AS ${d.alias}`);
117
+ searchColNames.push(d.alias);
118
+ }
119
+ continue;
120
+ }
121
+ }
122
+ // Fallback: cast JSON to TEXT
123
+ selectExprs.push(`CAST(t.${col.name} AS TEXT) AS ${col.name}`);
124
+ searchColNames.push(col.name);
125
+ }
126
+ }
127
+ // Include searchable text from child tables (decomposed array fields)
128
+ for (const child of schema.children) {
129
+ for (const col of child.columns) {
130
+ if (col.sqlType === 'TEXT') {
131
+ const alias = `${child.fieldName}_${col.name}`;
132
+ const agg = dialect.stringAgg(`c.${col.name}`, "' '");
133
+ selectExprs.push(`(SELECT ${agg} FROM ${child.tableName} c WHERE c.parent_uri = t.uri) AS ${alias}`);
134
+ searchColNames.push(alias);
135
+ }
136
+ }
137
+ }
138
+ // Include searchable text from union branch tables
139
+ for (const union of schema.unions) {
140
+ for (const branch of union.branches) {
141
+ for (const col of branch.columns) {
142
+ if (col.sqlType === 'TEXT') {
143
+ const alias = `${union.fieldName}_${branch.branchName}_${col.name}`;
144
+ const agg = dialect.stringAgg(`c.${col.name}`, "' '");
145
+ selectExprs.push(`(SELECT ${agg} FROM ${branch.tableName} c WHERE c.parent_uri = t.uri) AS ${alias}`);
146
+ searchColNames.push(alias);
147
+ }
148
+ }
149
+ }
150
+ }
151
+ // Include handle from _repos for people search
152
+ selectExprs.push('r.handle');
153
+ searchColNames.push('handle');
154
+ if (searchColNames.length === 0) {
155
+ return;
156
+ }
157
+ const safeName = ftsTableName(collection);
158
+ const sourceQuery = `SELECT ${selectExprs.join(', ')} FROM ${schema.tableName} t LEFT JOIN _repos r ON t.did = r.did`;
159
+ await searchPort.buildIndex(safeName, sourceQuery, searchColNames);
160
+ searchColumnCache.set(collection, searchColNames);
161
+ lastRebuiltAt.set(collection, new Date().toISOString());
162
+ }
163
+ /**
164
+ * Rebuild FTS indexes for all registered collections.
165
+ */
166
+ // DuckDB's built-in English stop words (571 words) — must match stopwords='english' in create_fts_index
167
+ const ENGLISH_STOP_WORDS = new Set([
168
+ 'a',
169
+ "a's",
170
+ 'able',
171
+ 'about',
172
+ 'above',
173
+ 'according',
174
+ 'accordingly',
175
+ 'across',
176
+ 'actually',
177
+ 'after',
178
+ 'afterwards',
179
+ 'again',
180
+ 'against',
181
+ "ain't",
182
+ 'all',
183
+ 'allow',
184
+ 'allows',
185
+ 'almost',
186
+ 'alone',
187
+ 'along',
188
+ 'already',
189
+ 'also',
190
+ 'although',
191
+ 'always',
192
+ 'am',
193
+ 'among',
194
+ 'amongst',
195
+ 'an',
196
+ 'and',
197
+ 'another',
198
+ 'any',
199
+ 'anybody',
200
+ 'anyhow',
201
+ 'anyone',
202
+ 'anything',
203
+ 'anyway',
204
+ 'anyways',
205
+ 'anywhere',
206
+ 'apart',
207
+ 'appear',
208
+ 'appreciate',
209
+ 'appropriate',
210
+ 'are',
211
+ "aren't",
212
+ 'around',
213
+ 'as',
214
+ 'aside',
215
+ 'ask',
216
+ 'asking',
217
+ 'associated',
218
+ 'at',
219
+ 'available',
220
+ 'away',
221
+ 'awfully',
222
+ 'b',
223
+ 'be',
224
+ 'became',
225
+ 'because',
226
+ 'become',
227
+ 'becomes',
228
+ 'becoming',
229
+ 'been',
230
+ 'before',
231
+ 'beforehand',
232
+ 'behind',
233
+ 'being',
234
+ 'believe',
235
+ 'below',
236
+ 'beside',
237
+ 'besides',
238
+ 'best',
239
+ 'better',
240
+ 'between',
241
+ 'beyond',
242
+ 'both',
243
+ 'brief',
244
+ 'but',
245
+ 'by',
246
+ 'c',
247
+ "c'mon",
248
+ "c's",
249
+ 'came',
250
+ 'can',
251
+ "can't",
252
+ 'cannot',
253
+ 'cant',
254
+ 'cause',
255
+ 'causes',
256
+ 'certain',
257
+ 'certainly',
258
+ 'changes',
259
+ 'clearly',
260
+ 'co',
261
+ 'com',
262
+ 'come',
263
+ 'comes',
264
+ 'concerning',
265
+ 'consequently',
266
+ 'consider',
267
+ 'considering',
268
+ 'contain',
269
+ 'containing',
270
+ 'contains',
271
+ 'corresponding',
272
+ 'could',
273
+ "couldn't",
274
+ 'course',
275
+ 'currently',
276
+ 'd',
277
+ 'definitely',
278
+ 'described',
279
+ 'despite',
280
+ 'did',
281
+ "didn't",
282
+ 'different',
283
+ 'do',
284
+ 'does',
285
+ "doesn't",
286
+ 'doing',
287
+ "don't",
288
+ 'done',
289
+ 'down',
290
+ 'downwards',
291
+ 'during',
292
+ 'e',
293
+ 'each',
294
+ 'edu',
295
+ 'eg',
296
+ 'eight',
297
+ 'either',
298
+ 'else',
299
+ 'elsewhere',
300
+ 'enough',
301
+ 'entirely',
302
+ 'especially',
303
+ 'et',
304
+ 'etc',
305
+ 'even',
306
+ 'ever',
307
+ 'every',
308
+ 'everybody',
309
+ 'everyone',
310
+ 'everything',
311
+ 'everywhere',
312
+ 'ex',
313
+ 'exactly',
314
+ 'example',
315
+ 'except',
316
+ 'f',
317
+ 'far',
318
+ 'few',
319
+ 'fifth',
320
+ 'first',
321
+ 'five',
322
+ 'followed',
323
+ 'following',
324
+ 'follows',
325
+ 'for',
326
+ 'former',
327
+ 'formerly',
328
+ 'forth',
329
+ 'four',
330
+ 'from',
331
+ 'further',
332
+ 'furthermore',
333
+ 'g',
334
+ 'get',
335
+ 'gets',
336
+ 'getting',
337
+ 'given',
338
+ 'gives',
339
+ 'go',
340
+ 'goes',
341
+ 'going',
342
+ 'gone',
343
+ 'got',
344
+ 'gotten',
345
+ 'greetings',
346
+ 'h',
347
+ 'had',
348
+ "hadn't",
349
+ 'happens',
350
+ 'hardly',
351
+ 'has',
352
+ "hasn't",
353
+ 'have',
354
+ "haven't",
355
+ 'having',
356
+ 'he',
357
+ "he's",
358
+ 'hello',
359
+ 'help',
360
+ 'hence',
361
+ 'her',
362
+ 'here',
363
+ "here's",
364
+ 'hereafter',
365
+ 'hereby',
366
+ 'herein',
367
+ 'hereupon',
368
+ 'hers',
369
+ 'herself',
370
+ 'hi',
371
+ 'him',
372
+ 'himself',
373
+ 'his',
374
+ 'hither',
375
+ 'hopefully',
376
+ 'how',
377
+ 'howbeit',
378
+ 'however',
379
+ 'i',
380
+ "i'd",
381
+ "i'll",
382
+ "i'm",
383
+ "i've",
384
+ 'ie',
385
+ 'if',
386
+ 'ignored',
387
+ 'immediate',
388
+ 'in',
389
+ 'inasmuch',
390
+ 'inc',
391
+ 'indeed',
392
+ 'indicate',
393
+ 'indicated',
394
+ 'indicates',
395
+ 'inner',
396
+ 'insofar',
397
+ 'instead',
398
+ 'into',
399
+ 'inward',
400
+ 'is',
401
+ "isn't",
402
+ 'it',
403
+ "it'd",
404
+ "it'll",
405
+ "it's",
406
+ 'its',
407
+ 'itself',
408
+ 'j',
409
+ 'just',
410
+ 'k',
411
+ 'keep',
412
+ 'keeps',
413
+ 'kept',
414
+ 'know',
415
+ 'known',
416
+ 'knows',
417
+ 'l',
418
+ 'last',
419
+ 'lately',
420
+ 'later',
421
+ 'latter',
422
+ 'latterly',
423
+ 'least',
424
+ 'less',
425
+ 'lest',
426
+ 'let',
427
+ "let's",
428
+ 'like',
429
+ 'liked',
430
+ 'likely',
431
+ 'little',
432
+ 'look',
433
+ 'looking',
434
+ 'looks',
435
+ 'ltd',
436
+ 'm',
437
+ 'mainly',
438
+ 'many',
439
+ 'may',
440
+ 'maybe',
441
+ 'me',
442
+ 'mean',
443
+ 'meanwhile',
444
+ 'merely',
445
+ 'might',
446
+ 'more',
447
+ 'moreover',
448
+ 'most',
449
+ 'mostly',
450
+ 'much',
451
+ 'must',
452
+ 'my',
453
+ 'myself',
454
+ 'n',
455
+ 'name',
456
+ 'namely',
457
+ 'nd',
458
+ 'near',
459
+ 'nearly',
460
+ 'necessary',
461
+ 'need',
462
+ 'needs',
463
+ 'neither',
464
+ 'never',
465
+ 'nevertheless',
466
+ 'new',
467
+ 'next',
468
+ 'nine',
469
+ 'no',
470
+ 'nobody',
471
+ 'non',
472
+ 'none',
473
+ 'noone',
474
+ 'nor',
475
+ 'normally',
476
+ 'not',
477
+ 'nothing',
478
+ 'novel',
479
+ 'now',
480
+ 'nowhere',
481
+ 'o',
482
+ 'obviously',
483
+ 'of',
484
+ 'off',
485
+ 'often',
486
+ 'oh',
487
+ 'ok',
488
+ 'okay',
489
+ 'old',
490
+ 'on',
491
+ 'once',
492
+ 'one',
493
+ 'ones',
494
+ 'only',
495
+ 'onto',
496
+ 'or',
497
+ 'other',
498
+ 'others',
499
+ 'otherwise',
500
+ 'ought',
501
+ 'our',
502
+ 'ours',
503
+ 'ourselves',
504
+ 'out',
505
+ 'outside',
506
+ 'over',
507
+ 'overall',
508
+ 'own',
509
+ 'p',
510
+ 'particular',
511
+ 'particularly',
512
+ 'per',
513
+ 'perhaps',
514
+ 'placed',
515
+ 'please',
516
+ 'plus',
517
+ 'possible',
518
+ 'presumably',
519
+ 'probably',
520
+ 'provides',
521
+ 'q',
522
+ 'que',
523
+ 'quite',
524
+ 'qv',
525
+ 'r',
526
+ 'rather',
527
+ 'rd',
528
+ 're',
529
+ 'really',
530
+ 'reasonably',
531
+ 'regarding',
532
+ 'regardless',
533
+ 'regards',
534
+ 'relatively',
535
+ 'respectively',
536
+ 'right',
537
+ 's',
538
+ 'said',
539
+ 'same',
540
+ 'saw',
541
+ 'say',
542
+ 'saying',
543
+ 'says',
544
+ 'second',
545
+ 'secondly',
546
+ 'see',
547
+ 'seeing',
548
+ 'seem',
549
+ 'seemed',
550
+ 'seeming',
551
+ 'seems',
552
+ 'seen',
553
+ 'self',
554
+ 'selves',
555
+ 'sensible',
556
+ 'sent',
557
+ 'serious',
558
+ 'seriously',
559
+ 'seven',
560
+ 'several',
561
+ 'shall',
562
+ 'she',
563
+ 'should',
564
+ "shouldn't",
565
+ 'since',
566
+ 'six',
567
+ 'so',
568
+ 'some',
569
+ 'somebody',
570
+ 'somehow',
571
+ 'someone',
572
+ 'something',
573
+ 'sometime',
574
+ 'sometimes',
575
+ 'somewhat',
576
+ 'somewhere',
577
+ 'soon',
578
+ 'sorry',
579
+ 'specified',
580
+ 'specify',
581
+ 'specifying',
582
+ 'still',
583
+ 'sub',
584
+ 'such',
585
+ 'sup',
586
+ 'sure',
587
+ 't',
588
+ "t's",
589
+ 'take',
590
+ 'taken',
591
+ 'tell',
592
+ 'tends',
593
+ 'th',
594
+ 'than',
595
+ 'thank',
596
+ 'thanks',
597
+ 'thanx',
598
+ 'that',
599
+ "that's",
600
+ 'thats',
601
+ 'the',
602
+ 'their',
603
+ 'theirs',
604
+ 'them',
605
+ 'themselves',
606
+ 'then',
607
+ 'thence',
608
+ 'there',
609
+ "there's",
610
+ 'thereafter',
611
+ 'thereby',
612
+ 'therefore',
613
+ 'therein',
614
+ 'theres',
615
+ 'thereupon',
616
+ 'these',
617
+ 'they',
618
+ "they'd",
619
+ "they'll",
620
+ "they're",
621
+ "they've",
622
+ 'think',
623
+ 'third',
624
+ 'this',
625
+ 'thorough',
626
+ 'thoroughly',
627
+ 'those',
628
+ 'though',
629
+ 'three',
630
+ 'through',
631
+ 'throughout',
632
+ 'thru',
633
+ 'thus',
634
+ 'to',
635
+ 'together',
636
+ 'too',
637
+ 'took',
638
+ 'toward',
639
+ 'towards',
640
+ 'tried',
641
+ 'tries',
642
+ 'truly',
643
+ 'try',
644
+ 'trying',
645
+ 'twice',
646
+ 'two',
647
+ 'u',
648
+ 'un',
649
+ 'under',
650
+ 'unfortunately',
651
+ 'unless',
652
+ 'unlikely',
653
+ 'until',
654
+ 'unto',
655
+ 'up',
656
+ 'upon',
657
+ 'us',
658
+ 'use',
659
+ 'used',
660
+ 'useful',
661
+ 'uses',
662
+ 'using',
663
+ 'usually',
664
+ 'uucp',
665
+ 'v',
666
+ 'value',
667
+ 'various',
668
+ 'very',
669
+ 'via',
670
+ 'viz',
671
+ 'vs',
672
+ 'w',
673
+ 'want',
674
+ 'wants',
675
+ 'was',
676
+ "wasn't",
677
+ 'way',
678
+ 'we',
679
+ "we'd",
680
+ "we'll",
681
+ "we're",
682
+ "we've",
683
+ 'welcome',
684
+ 'well',
685
+ 'went',
686
+ 'were',
687
+ "weren't",
688
+ 'what',
689
+ "what's",
690
+ 'whatever',
691
+ 'when',
692
+ 'whence',
693
+ 'whenever',
694
+ 'where',
695
+ "where's",
696
+ 'whereafter',
697
+ 'whereas',
698
+ 'whereby',
699
+ 'wherein',
700
+ 'whereupon',
701
+ 'wherever',
702
+ 'whether',
703
+ 'which',
704
+ 'while',
705
+ 'whither',
706
+ 'who',
707
+ "who's",
708
+ 'whoever',
709
+ 'whole',
710
+ 'whom',
711
+ 'whose',
712
+ 'why',
713
+ 'will',
714
+ 'willing',
715
+ 'wish',
716
+ 'with',
717
+ 'within',
718
+ 'without',
719
+ "won't",
720
+ 'wonder',
721
+ 'would',
722
+ 'would',
723
+ "wouldn't",
724
+ 'x',
725
+ 'y',
726
+ 'yes',
727
+ 'yet',
728
+ 'you',
729
+ "you'd",
730
+ "you'll",
731
+ "you're",
732
+ "you've",
733
+ 'your',
734
+ 'yours',
735
+ 'yourself',
736
+ 'yourselves',
737
+ 'z',
738
+ 'zero',
739
+ ]);
740
+ /**
741
+ * Strip English stop words from a search query, preserving non-stop-word terms.
742
+ * Returns the cleaned query string. If all words are stop words, returns the original query.
743
+ */
744
+ export function stripStopWords(query) {
745
+ const terms = query.trim().split(/\s+/);
746
+ const filtered = terms.filter((t) => !ENGLISH_STOP_WORDS.has(t.toLowerCase()));
747
+ return filtered.length > 0 ? filtered.join(' ') : query;
748
+ }
749
+ export async function rebuildAllIndexes(collections) {
750
+ const elapsed = timer();
751
+ let rebuilt = 0;
752
+ const errors = [];
753
+ for (const collection of collections) {
754
+ try {
755
+ await buildFtsIndex(collection);
756
+ rebuilt++;
757
+ }
758
+ catch (err) {
759
+ errors.push(`${collection}: ${err.message}`);
760
+ }
761
+ }
762
+ // Compact WAL to free memory after heavy FTS operations (DuckDB only)
763
+ try {
764
+ const { getSqlDialect } = await import("./db.js");
765
+ const d = getSqlDialect();
766
+ if (d.checkpointSQL)
767
+ await runSQL(d.checkpointSQL);
768
+ }
769
+ catch { }
770
+ emit('fts', 'rebuild', {
771
+ collections_total: collections.length,
772
+ collections_rebuilt: rebuilt,
773
+ error_count: errors.length,
774
+ duration_ms: elapsed(),
775
+ errors: errors.length > 0 ? errors : undefined,
776
+ });
777
+ }