@strav/search 0.4.31 → 1.0.0-alpha.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/package.json +20 -22
  2. package/src/console/index.ts +5 -0
  3. package/src/console/search_console_provider.ts +20 -0
  4. package/src/console/search_flush.ts +49 -0
  5. package/src/console/search_import.ts +103 -0
  6. package/src/console/search_list.ts +46 -0
  7. package/src/console/search_reindex.ts +94 -0
  8. package/src/drivers/meilisearch/meilisearch_driver.ts +304 -0
  9. package/src/drivers/memory/memory_driver.ts +344 -0
  10. package/src/drivers/postgres/apply_search_migration.ts +74 -0
  11. package/src/drivers/postgres/postgres_fts_driver.ts +493 -135
  12. package/src/drivers/typesense/typesense_driver.ts +345 -0
  13. package/src/index.ts +50 -39
  14. package/src/search_engine.ts +40 -25
  15. package/src/search_error.ts +86 -0
  16. package/src/search_manager.ts +112 -94
  17. package/src/search_provider.ts +68 -6
  18. package/src/searchable.ts +173 -160
  19. package/src/searchable_registry.ts +61 -0
  20. package/src/types.ts +59 -49
  21. package/README.md +0 -191
  22. package/src/commands/search_flush.ts +0 -41
  23. package/src/commands/search_import.ts +0 -43
  24. package/src/commands/search_optimize.ts +0 -52
  25. package/src/commands/search_rebuild.ts +0 -73
  26. package/src/drivers/algolia_driver.ts +0 -170
  27. package/src/drivers/embedded/embedded_driver.ts +0 -136
  28. package/src/drivers/embedded/engine/field_registry.ts +0 -97
  29. package/src/drivers/embedded/engine/fts_query_builder.ts +0 -184
  30. package/src/drivers/embedded/engine/query_compiler.ts +0 -134
  31. package/src/drivers/embedded/engine/schema.ts +0 -99
  32. package/src/drivers/embedded/engine/snippet_formatter.ts +0 -29
  33. package/src/drivers/embedded/engine/sqlite_engine.ts +0 -255
  34. package/src/drivers/embedded/engine/typo_expander.ts +0 -138
  35. package/src/drivers/embedded/errors.ts +0 -15
  36. package/src/drivers/embedded/filters/filter_compiler.ts +0 -136
  37. package/src/drivers/embedded/index.ts +0 -3
  38. package/src/drivers/embedded/storage/paths.ts +0 -23
  39. package/src/drivers/embedded/types.ts +0 -34
  40. package/src/drivers/meilisearch_driver.ts +0 -150
  41. package/src/drivers/null_driver.ts +0 -27
  42. package/src/drivers/postgres/engine/field_registry.ts +0 -116
  43. package/src/drivers/postgres/engine/fts_query_builder.ts +0 -105
  44. package/src/drivers/postgres/engine/pg_engine.ts +0 -300
  45. package/src/drivers/postgres/engine/query_compiler.ts +0 -165
  46. package/src/drivers/postgres/engine/schema.ts +0 -187
  47. package/src/drivers/postgres/engine/snippet_formatter.ts +0 -31
  48. package/src/drivers/postgres/engine/typo_expander.ts +0 -131
  49. package/src/drivers/postgres/errors.ts +0 -33
  50. package/src/drivers/postgres/filters/filter_compiler.ts +0 -138
  51. package/src/drivers/postgres/index.ts +0 -14
  52. package/src/drivers/postgres/rebuild/rebuild_inplace.ts +0 -113
  53. package/src/drivers/postgres/storage/identifiers.ts +0 -46
  54. package/src/drivers/postgres/types.ts +0 -53
  55. package/src/drivers/typesense_driver.ts +0 -229
  56. package/src/errors.ts +0 -18
  57. package/src/helpers.ts +0 -120
  58. package/stubs/config/search.ts +0 -57
  59. package/tsconfig.json +0 -5
@@ -1,184 +1,542 @@
1
- import type { SQL } from 'bun'
1
+ /**
2
+ * `PostgresFtsDriver` — `SearchEngine` backed by Postgres
3
+ * `tsvector` + GIN, one table per index inside a dedicated
4
+ * schema (`strav_search` by default).
5
+ *
6
+ * What the driver provisions for each `createIndex(name, …)`:
7
+ *
8
+ * ```
9
+ * CREATE TABLE "strav_search"."<name>" (
10
+ * id text PRIMARY KEY,
11
+ * tenant_id text, -- nullable for single-tenant apps
12
+ * document jsonb NOT NULL,
13
+ * fts tsvector NOT NULL
14
+ * );
15
+ * CREATE INDEX "<name>_fts" ON … USING GIN(fts);
16
+ * CREATE INDEX "<name>_tenant" ON … (tenant_id) WHERE tenant_id IS NOT NULL;
17
+ * -- Per filterable / sortable attribute:
18
+ * CREATE INDEX "<name>_<attr>" ON … ((document->>'<attr>'));
19
+ * -- RLS on `tenant_id`:
20
+ * ALTER TABLE … ENABLE ROW LEVEL SECURITY;
21
+ * ALTER TABLE … FORCE ROW LEVEL SECURITY;
22
+ * CREATE POLICY "<name>_isolate" ON …
23
+ * USING (tenant_id IS NULL OR tenant_id = current_setting('app.tenant_id', true));
24
+ * ```
25
+ *
26
+ * Settings (search/filter/sort attributes, primary key,
27
+ * language) live in `strav_search._meta` so multi-process
28
+ * deployments stay coherent. The driver caches the resolved
29
+ * SQL fragments per index in memory.
30
+ *
31
+ * Search uses `websearch_to_tsquery` + `ts_rank_cd` for
32
+ * ranking, plus `ts_headline` snippets for any field in
33
+ * `attributesToHighlight`. The total hit count comes from a
34
+ * `COUNT(*) OVER ()` window so pagination metadata is correct
35
+ * without a separate query.
36
+ *
37
+ * What V1 does **not** do:
38
+ *
39
+ * - **Typo tolerance / `pg_trgm`.** The 0.x driver shipped a
40
+ * trigram-based fuzzy expander; the surface area is large
41
+ * and the trade-offs index-specific. Deferred to V1.1 —
42
+ * apps that need it today drop down to the Memory or
43
+ * Meilisearch drivers.
44
+ * - **In-place rebuild tiers.** Apps re-index via the mixin's
45
+ * `Repository.importAll()` (or the future `search:reindex`
46
+ * console command) — no per-driver size heuristic.
47
+ * - **Adding a `filterableAttribute` to a large existing
48
+ * index** is fine — the index is built lazily via
49
+ * `CREATE INDEX IF NOT EXISTS` on `(document->>'<attr>')`,
50
+ * no `ALTER TABLE ADD COLUMN`.
51
+ */
52
+
53
+ import {
54
+ currentTransactionalContext,
55
+ type DatabaseExecutor,
56
+ type PostgresDatabase,
57
+ } from '@strav/database'
58
+ import { IndexNotFoundError, SearchError, SearchQueryError } from '../../search_error.ts'
2
59
  import type { SearchEngine } from '../../search_engine.ts'
3
60
  import type {
61
+ DriverConfig,
62
+ IndexSettings,
4
63
  SearchDocument,
64
+ SearchHit,
5
65
  SearchOptions,
6
66
  SearchResult,
7
- IndexSettings,
8
- DriverConfig,
9
67
  } from '../../types.ts'
10
- import { PgEngine } from './engine/pg_engine.ts'
11
- import { ensureSchemaAndExtensions } from './engine/schema.ts'
12
- import { resolveTypoTolerance } from './engine/typo_expander.ts'
13
- import type { PostgresFtsConfig, PgIndexSettings, ResolvedTypoTolerance } from './types.ts'
14
- import { MissingConnectionError } from './errors.ts'
68
+ import { DEFAULT_SEARCH_SCHEMA, validateIdentifier } from './apply_search_migration.ts'
69
+
70
+ interface ResolvedIndex {
71
+ settings: IndexSettings
72
+ language: string
73
+ /** Tokenized + validated attribute names — safe to splice into SQL. */
74
+ searchable: string[]
75
+ filterable: Set<string>
76
+ sortable: Set<string>
77
+ /** SQL fragment that produces the tsvector for the document JSONB. */
78
+ ftsExpression: string
79
+ }
15
80
 
16
- const DEFAULT_SCHEMA = 'strav_search'
17
81
  const DEFAULT_LANGUAGE = 'english'
18
- const DEFAULT_WORK_MEM = '64MB'
82
+ const TENANT_SETTING = 'app.tenant_id'
83
+
84
+ export interface PostgresFtsDriverOptions {
85
+ db: PostgresDatabase
86
+ /** Schema for index tables. Defaults to `strav_search`. */
87
+ schema?: string
88
+ /** Default text-search configuration (`english`, `french`, ...). */
89
+ language?: string
90
+ }
19
91
 
20
- /**
21
- * Postgres-backed full-text search driver. Implements the same `SearchEngine`
22
- * interface as the embedded SQLite driver — drop-in swap by config.
23
- *
24
- * Sized for higher-volume workloads (1M-100M docs per index) using `tsvector`
25
- * + GIN + `pg_trgm` for typo tolerance + `ts_headline` for snippets.
26
- *
27
- * Connection: pass `connection` (a Bun `SQL` instance) in the driver config,
28
- * or rely on `Database.raw` from `@strav/database` (must be bootstrapped).
29
- */
30
92
  export class PostgresFtsDriver implements SearchEngine {
31
93
  readonly name = 'postgres-fts'
32
94
 
33
- private readonly config: PostgresFtsConfig
34
- private readonly schemaName: string
95
+ private readonly db: PostgresDatabase
96
+ private readonly schema: string
35
97
  private readonly defaultLanguage: string
36
- private readonly typo: ResolvedTypoTolerance
37
- private readonly ginFastUpdate: boolean
38
- private readonly workMem: string | null
39
- private readonly engines = new Map<string, PgEngine>()
40
- private readonly pendingSettings = new Map<string, PgIndexSettings>()
41
- private bootstrapped: Promise<void> | null = null
42
- private resolvedSql: SQL | null = null
43
-
44
- constructor(config: DriverConfig) {
45
- this.config = (config ?? {}) as PostgresFtsConfig
46
- this.schemaName = this.config.schema ?? DEFAULT_SCHEMA
47
- this.defaultLanguage = this.config.language ?? DEFAULT_LANGUAGE
48
- this.typo = resolveTypoTolerance(this.config.typoTolerance)
49
- this.ginFastUpdate = this.config.gin?.fastupdate ?? false
50
- this.workMem =
51
- this.config.workMem === null
52
- ? null
53
- : (this.config.workMem ?? DEFAULT_WORK_MEM)
54
- }
55
-
56
- // ── Document operations ──────────────────────────────────────────────────
98
+ private readonly indexes = new Map<string, ResolvedIndex>()
99
+
100
+ constructor(options: PostgresFtsDriverOptions) {
101
+ this.db = options.db
102
+ this.schema = validateIdentifier(options.schema ?? DEFAULT_SEARCH_SCHEMA, 'schema')
103
+ this.defaultLanguage = options.language ?? DEFAULT_LANGUAGE
104
+ }
105
+
106
+ static fromConfig(db: PostgresDatabase, config: DriverConfig): PostgresFtsDriver {
107
+ return new PostgresFtsDriver({
108
+ db,
109
+ ...(typeof config.schema === 'string' ? { schema: config.schema } : {}),
110
+ ...(typeof config.language === 'string' ? { language: config.language } : {}),
111
+ })
112
+ }
113
+
114
+ /**
115
+ * Route reads + writes through the ambient `UnitOfWork`
116
+ * transaction when one is active (e.g., inside
117
+ * `tenants.withTenant(...)`), so RLS scoping applies
118
+ * uniformly with the rest of the app's database calls.
119
+ */
120
+ private exec(): DatabaseExecutor {
121
+ const ambient = currentTransactionalContext()
122
+ if (ambient) return ambient.tx
123
+ return this.db as unknown as DatabaseExecutor
124
+ }
125
+
126
+ // ─── Index lifecycle ────────────────────────────────────────────────────
127
+
128
+ async createIndex(index: string, settings: IndexSettings = {}): Promise<void> {
129
+ const indexName = validateIdentifier(index, 'index')
130
+ const language = sanitizeLanguage(this.defaultLanguage)
131
+ const resolved = resolveIndexSettings(settings, language)
132
+
133
+ const ex = this.exec()
134
+ const table = this.qualify(indexName)
135
+
136
+ // Table + columns.
137
+ await ex.execute(
138
+ `CREATE TABLE IF NOT EXISTS ${table} (
139
+ "id" text PRIMARY KEY,
140
+ "tenant_id" text,
141
+ "document" jsonb NOT NULL,
142
+ "fts" tsvector NOT NULL
143
+ )`,
144
+ )
145
+
146
+ // Indexes — GIN over fts + B-tree over tenant_id + expression
147
+ // index per filterable / sortable attribute.
148
+ await ex.execute(
149
+ `CREATE INDEX IF NOT EXISTS "${indexName}_fts"
150
+ ON ${table} USING GIN ("fts")`,
151
+ )
152
+ await ex.execute(
153
+ `CREATE INDEX IF NOT EXISTS "${indexName}_tenant"
154
+ ON ${table} ("tenant_id") WHERE "tenant_id" IS NOT NULL`,
155
+ )
156
+ for (const attr of [...resolved.filterable, ...resolved.sortable]) {
157
+ await ex.execute(
158
+ `CREATE INDEX IF NOT EXISTS "${indexName}_${attr}"
159
+ ON ${table} (("document"->>'${attr}'))`,
160
+ )
161
+ }
162
+
163
+ // RLS — single policy keyed off `app.tenant_id`. Rows with
164
+ // `tenant_id IS NULL` are visible to every tenant (the
165
+ // single-tenant default).
166
+ await ex.execute(`ALTER TABLE ${table} ENABLE ROW LEVEL SECURITY`)
167
+ await ex.execute(`ALTER TABLE ${table} FORCE ROW LEVEL SECURITY`)
168
+ await ex.execute(`DROP POLICY IF EXISTS "${indexName}_isolate" ON ${table}`)
169
+ await ex.execute(
170
+ `CREATE POLICY "${indexName}_isolate" ON ${table}
171
+ USING (
172
+ "tenant_id" IS NULL
173
+ OR "tenant_id" = current_setting('${TENANT_SETTING}', true)
174
+ )
175
+ WITH CHECK (
176
+ "tenant_id" IS NULL
177
+ OR "tenant_id" = current_setting('${TENANT_SETTING}', true)
178
+ )`,
179
+ )
180
+
181
+ // Persist the settings + language for cross-process reads.
182
+ await ex.execute(
183
+ `INSERT INTO "${this.schema}"."_meta" ("index_name", "settings", "language", "updated_at")
184
+ VALUES ($1, $2::jsonb, $3, now())
185
+ ON CONFLICT ("index_name") DO UPDATE
186
+ SET "settings" = EXCLUDED."settings",
187
+ "language" = EXCLUDED."language",
188
+ "updated_at" = now()`,
189
+ [indexName, JSON.stringify(settings), language],
190
+ )
191
+
192
+ this.indexes.set(indexName, resolved)
193
+ }
194
+
195
+ async deleteIndex(index: string): Promise<void> {
196
+ const indexName = validateIdentifier(index, 'index')
197
+ const ex = this.exec()
198
+ await ex.execute(`DROP TABLE IF EXISTS ${this.qualify(indexName)} CASCADE`)
199
+ await ex.execute(`DELETE FROM "${this.schema}"."_meta" WHERE "index_name" = $1`, [indexName])
200
+ this.indexes.delete(indexName)
201
+ }
202
+
203
+ async flush(index: string): Promise<void> {
204
+ const indexName = validateIdentifier(index, 'index')
205
+ const exists = await this.indexExists(indexName)
206
+ if (!exists) return
207
+ await this.exec().execute(`DELETE FROM ${this.qualify(indexName)}`)
208
+ }
209
+
210
+ // ─── Writes ─────────────────────────────────────────────────────────────
57
211
 
58
212
  async upsert(
59
213
  index: string,
60
214
  id: string | number,
61
- document: Record<string, unknown>
215
+ document: Record<string, unknown>,
62
216
  ): Promise<void> {
63
- await (await this.engineFor(index)).upsert(id, document)
217
+ await this.upsertMany(index, [{ id, ...document }])
64
218
  }
65
219
 
66
- async upsertMany(index: string, documents: SearchDocument[]): Promise<void> {
67
- await (await this.engineFor(index)).upsertMany(documents)
220
+ async upsertMany(index: string, documents: readonly SearchDocument[]): Promise<void> {
221
+ if (documents.length === 0) return
222
+ const indexName = validateIdentifier(index, 'index')
223
+ const resolved = await this.resolveIndex(indexName)
224
+ const table = this.qualify(indexName)
225
+ const ex = this.exec()
226
+
227
+ // Insert one row at a time so each row's fts expression can
228
+ // bind its own JSONB parameter cleanly. Bulk INSERT with
229
+ // VALUES + per-row CTE was tried and complicates the
230
+ // parameter math far more than it saves in latency for
231
+ // typical batches of <500 rows.
232
+ for (const doc of documents) {
233
+ await ex.execute(
234
+ `INSERT INTO ${table} ("id", "tenant_id", "document", "fts")
235
+ VALUES (
236
+ $1,
237
+ current_setting('${TENANT_SETTING}', true),
238
+ $2,
239
+ ${resolved.ftsExpression}
240
+ )
241
+ ON CONFLICT ("id") DO UPDATE
242
+ SET "document" = EXCLUDED."document",
243
+ "fts" = EXCLUDED."fts"`,
244
+ // Pass the document as a JS object — `bun:sql` encodes JSONB
245
+ // params directly. Pre-`JSON.stringify`-ing causes Postgres
246
+ // to store a JSON string scalar instead of an object, which
247
+ // breaks every `->>` projection downstream.
248
+ [String(doc.id), { ...(doc as Record<string, unknown>) }],
249
+ )
250
+ }
68
251
  }
69
252
 
70
253
  async delete(index: string, id: string | number): Promise<void> {
71
- await (await this.engineFor(index)).delete(id)
254
+ await this.deleteMany(index, [id])
72
255
  }
73
256
 
74
- async deleteMany(index: string, ids: Array<string | number>): Promise<void> {
75
- await (await this.engineFor(index)).deleteMany(ids)
257
+ async deleteMany(index: string, ids: readonly (string | number)[]): Promise<void> {
258
+ if (ids.length === 0) return
259
+ const indexName = validateIdentifier(index, 'index')
260
+ const placeholders = ids.map((_, i) => `$${i + 1}`).join(', ')
261
+ const params = ids.map((id) => String(id))
262
+ await this.exec().execute(
263
+ `DELETE FROM ${this.qualify(indexName)} WHERE "id" IN (${placeholders})`,
264
+ params,
265
+ )
76
266
  }
77
267
 
78
- // ── Index operations ─────────────────────────────────────────────────────
268
+ // ─── Reads ──────────────────────────────────────────────────────────────
79
269
 
80
- async flush(index: string): Promise<void> {
81
- await (await this.engineFor(index)).flush()
82
- }
270
+ async search(index: string, query: string, options: SearchOptions = {}): Promise<SearchResult> {
271
+ const indexName = validateIdentifier(index, 'index')
272
+ const resolved = await this.resolveIndex(indexName)
83
273
 
84
- async deleteIndex(index: string): Promise<void> {
85
- const engine = this.engines.get(index)
86
- if (engine) {
87
- await engine.drop()
88
- this.engines.delete(index)
274
+ if (options.filter !== undefined && (typeof options.filter !== 'object' || Array.isArray(options.filter))) {
275
+ throw new SearchQueryError(
276
+ 'PostgresFtsDriver: `filter` must be a flat key/value object. Engine-native strings are not portable.',
277
+ )
278
+ }
279
+
280
+ const page = Math.max(1, options.page ?? 1)
281
+ const perPage = Math.max(1, options.perPage ?? 20)
282
+ const limit = perPage
283
+ const offset = (page - 1) * perPage
284
+
285
+ const table = this.qualify(indexName)
286
+ const params: unknown[] = []
287
+ const select: string[] = ['"id"', '"document"']
288
+ const whereClauses: string[] = []
289
+ const trimmedQuery = query.trim()
290
+
291
+ if (trimmedQuery.length > 0) {
292
+ params.push(trimmedQuery)
293
+ const qIdx = params.length
294
+ select.push(`ts_rank_cd("fts", websearch_to_tsquery('${resolved.language}', $${qIdx})) AS rank`)
295
+ whereClauses.push(`"fts" @@ websearch_to_tsquery('${resolved.language}', $${qIdx})`)
89
296
  } else {
90
- // Drop directly without instantiating an engine.
91
- const sql = this.resolveSql()
92
- const { dropIndex } = await import('./engine/schema.ts')
93
- await dropIndex(sql, this.schemaName, index)
297
+ select.push('0.0 AS rank')
94
298
  }
95
- this.pendingSettings.delete(index)
96
- }
97
299
 
98
- async createIndex(index: string, options?: IndexSettings): Promise<void> {
99
- if (options) this.pendingSettings.set(index, options as PgIndexSettings)
100
- const engine = await this.engineFor(index)
101
- await engine.ensure()
102
- }
300
+ if (options.filter) {
301
+ for (const [key, value] of Object.entries(options.filter)) {
302
+ if (!resolved.filterable.has(key)) {
303
+ throw new SearchQueryError(
304
+ `PostgresFtsDriver: filter key "${key}" is not in this index's filterableAttributes.`,
305
+ { context: { index: indexName, key, filterable: [...resolved.filterable] } },
306
+ )
307
+ }
308
+ params.push(typeof value === 'number' || typeof value === 'boolean' ? String(value) : value)
309
+ whereClauses.push(`("document"->>'${key}') = $${params.length}`)
310
+ }
311
+ }
312
+
313
+ // Highlights — one ts_headline column per requested attribute.
314
+ if (options.attributesToHighlight) {
315
+ params.push(trimmedQuery.length > 0 ? trimmedQuery : '')
316
+ const qIdx = params.length
317
+ for (const attr of options.attributesToHighlight) {
318
+ if (!isSafeAttribute(attr)) {
319
+ throw new SearchQueryError(
320
+ `PostgresFtsDriver: highlight attribute "${attr}" contains illegal characters.`,
321
+ )
322
+ }
323
+ select.push(
324
+ `ts_headline(
325
+ '${resolved.language}',
326
+ coalesce("document"->>'${attr}', ''),
327
+ websearch_to_tsquery('${resolved.language}', $${qIdx}),
328
+ 'StartSel=<mark>,StopSel=</mark>,MaxFragments=2,MinWords=3,MaxWords=15'
329
+ ) AS "hl_${attr}"`,
330
+ )
331
+ }
332
+ }
103
333
 
104
- // ── Search ───────────────────────────────────────────────────────────────
334
+ // Sort explicit user sort takes precedence; otherwise rank for queried,
335
+ // id for empty queries.
336
+ const orderClauses: string[] = []
337
+ if (options.sort) {
338
+ for (const directive of options.sort) {
339
+ const [field, dirRaw] = directive.split(':') as [string, string | undefined]
340
+ const dir = dirRaw?.toLowerCase() === 'desc' ? 'DESC' : 'ASC'
341
+ if (!resolved.sortable.has(field)) {
342
+ throw new SearchQueryError(
343
+ `PostgresFtsDriver: sort attribute "${field}" is not in this index's sortableAttributes.`,
344
+ { context: { index: indexName, field, sortable: [...resolved.sortable] } },
345
+ )
346
+ }
347
+ orderClauses.push(`("document"->>'${field}') ${dir}`)
348
+ }
349
+ } else if (trimmedQuery.length > 0) {
350
+ orderClauses.push('rank DESC')
351
+ } else {
352
+ orderClauses.push('"id" ASC')
353
+ }
105
354
 
106
- async search(index: string, query: string, options?: SearchOptions): Promise<SearchResult> {
107
- return (await this.engineFor(index)).search(query, options)
108
- }
355
+ select.push('COUNT(*) OVER () AS total')
356
+ params.push(limit, offset)
357
+ const sql =
358
+ `SELECT ${select.join(', ')}
359
+ FROM ${table}
360
+ ${whereClauses.length > 0 ? `WHERE ${whereClauses.join(' AND ')}` : ''}
361
+ ORDER BY ${orderClauses.join(', ')}
362
+ LIMIT $${params.length - 1} OFFSET $${params.length}`
109
363
 
110
- // ── Lifecycle ────────────────────────────────────────────────────────────
364
+ const start = performance.now()
365
+ let rows: Array<Record<string, unknown>>
366
+ try {
367
+ rows = await this.exec().query<Record<string, unknown>>(sql, params)
368
+ } catch (cause) {
369
+ const msg = (cause as Error).message ?? ''
370
+ if (msg.includes('does not exist')) {
371
+ throw new IndexNotFoundError(indexName, this.name)
372
+ }
373
+ throw new SearchQueryError(`PostgresFtsDriver: search query failed: ${msg}`, { cause })
374
+ }
111
375
 
112
- /** Run REINDEX on every open index, or just one if specified. */
113
- async optimize(index?: string): Promise<void> {
114
- if (index) {
115
- await (await this.engineFor(index)).optimize()
116
- return
376
+ const totalHits = rows.length === 0 ? 0 : Number(rows[0]!.total ?? 0)
377
+ const hits: SearchHit[] = rows.map((row) => {
378
+ // `bun:sql` returns jsonb columns as text — parse so apps see
379
+ // the original document shape.
380
+ const document =
381
+ typeof row.document === 'string'
382
+ ? (JSON.parse(row.document) as Record<string, unknown>)
383
+ : (row.document as Record<string, unknown>)
384
+ const projected = projectAttributes(document, options.attributesToRetrieve)
385
+ const hit: SearchHit = { document: projected }
386
+ if (options.attributesToHighlight) {
387
+ const highlights: Record<string, string> = {}
388
+ for (const attr of options.attributesToHighlight) {
389
+ const value = row[`hl_${attr}`]
390
+ if (typeof value === 'string') highlights[attr] = value
391
+ }
392
+ if (Object.keys(highlights).length > 0) hit.highlights = highlights
393
+ }
394
+ return hit
395
+ })
396
+
397
+ return {
398
+ hits,
399
+ totalHits,
400
+ page,
401
+ perPage,
402
+ processingTimeMs: performance.now() - start,
117
403
  }
118
- for (const engine of this.engines.values()) await engine.optimize()
404
+ }
405
+
406
+ // ─── Internals ──────────────────────────────────────────────────────────
407
+
408
+ private qualify(index: string): string {
409
+ return `"${this.schema}"."${index}"`
410
+ }
411
+
412
+ private async indexExists(index: string): Promise<boolean> {
413
+ const row = await this.exec().queryOne<{ exists: boolean }>(
414
+ `SELECT EXISTS (
415
+ SELECT 1 FROM pg_tables WHERE schemaname = $1 AND tablename = $2
416
+ ) AS "exists"`,
417
+ [this.schema, index],
418
+ )
419
+ return row?.exists === true
119
420
  }
120
421
 
121
422
  /**
122
- * Rebuild a single index's `fts` column in place. Use after changing
123
- * `searchableAttributes` or weights without it, existing rows keep the
124
- * old fts values.
423
+ * Load `_meta` into the in-memory cache. Called lazily so apps
424
+ * that only ever read from indexes created by another process
425
+ * still get the right `ftsExpression` shape.
125
426
  */
126
- async rebuild(
127
- index: string,
128
- options?: { reindex?: boolean; pauseMs?: number; onProgress?: (done: number, total: number) => void }
129
- ): Promise<{ tier: 1 | 2; rows: number; elapsedMs: number }> {
130
- return (await this.engineFor(index)).rebuild(options)
131
- }
132
-
133
- // ── Internals ────────────────────────────────────────────────────────────
134
-
135
- private async engineFor(index: string): Promise<PgEngine> {
136
- let engine = this.engines.get(index)
137
- if (engine) return engine
138
-
139
- await this.bootstrap()
140
- const settings = this.pendingSettings.get(index)
141
- engine = new PgEngine({
142
- sql: this.resolveSql(),
143
- schema: this.schemaName,
144
- index,
145
- language: settings?.language ?? this.defaultLanguage,
146
- typoTolerance: this.typo,
147
- ginFastUpdate: this.ginFastUpdate,
148
- workMem: this.workMem,
149
- settings,
150
- })
151
- this.engines.set(index, engine)
152
- this.pendingSettings.delete(index)
153
- return engine
154
- }
427
+ private async resolveIndex(index: string): Promise<ResolvedIndex> {
428
+ const cached = this.indexes.get(index)
429
+ if (cached) return cached
155
430
 
156
- /** Resolve the SQL connection (config.connection or Database.raw fallback). */
157
- private resolveSql(): SQL {
158
- if (this.resolvedSql) return this.resolvedSql
159
- if (this.config.connection) {
160
- this.resolvedSql = this.config.connection
161
- return this.resolvedSql
431
+ const row = await this.exec().queryOne<{ settings: unknown; language: string }>(
432
+ `SELECT "settings", "language" FROM "${this.schema}"."_meta" WHERE "index_name" = $1`,
433
+ [index],
434
+ )
435
+ if (!row) {
436
+ throw new IndexNotFoundError(index, this.name)
162
437
  }
163
- try {
164
- // Lazy require to avoid a hard dep at import time.
165
- const databaseModule = require('@strav/database')
166
- const Database = databaseModule.default ?? databaseModule.Database
167
- this.resolvedSql = Database.raw as SQL
168
- return this.resolvedSql
169
- } catch {
170
- throw new MissingConnectionError()
438
+ const settings = (row.settings ?? {}) as IndexSettings
439
+ const resolved = resolveIndexSettings(settings, sanitizeLanguage(row.language))
440
+ this.indexes.set(index, resolved)
441
+ return resolved
442
+ }
443
+ }
444
+
445
+ // ─── Helpers ────────────────────────────────────────────────────────────
446
+
447
+ const FTS_WEIGHTS: readonly ('A' | 'B' | 'C' | 'D')[] = ['A', 'B', 'C', 'D']
448
+
449
+ function resolveIndexSettings(settings: IndexSettings, language: string): ResolvedIndex {
450
+ const searchable = (settings.searchableAttributes ?? []).map((attr) => {
451
+ if (!isSafeAttribute(attr)) {
452
+ throw new SearchError(
453
+ `PostgresFtsDriver: searchable attribute "${attr}" contains illegal characters. ` +
454
+ `Must match /^[a-z_][a-z0-9_]*$/.`,
455
+ { code: 'search.config' },
456
+ )
171
457
  }
458
+ return attr
459
+ })
460
+ const filterable = new Set(
461
+ (settings.filterableAttributes ?? []).filter((a) => {
462
+ if (!isSafeAttribute(a)) {
463
+ throw new SearchError(
464
+ `PostgresFtsDriver: filterable attribute "${a}" contains illegal characters.`,
465
+ { code: 'search.config' },
466
+ )
467
+ }
468
+ return true
469
+ }),
470
+ )
471
+ const sortable = new Set(
472
+ (settings.sortableAttributes ?? []).filter((a) => {
473
+ if (!isSafeAttribute(a)) {
474
+ throw new SearchError(
475
+ `PostgresFtsDriver: sortable attribute "${a}" contains illegal characters.`,
476
+ { code: 'search.config' },
477
+ )
478
+ }
479
+ return true
480
+ }),
481
+ )
482
+
483
+ const ftsExpression = buildFtsExpression(searchable, language)
484
+
485
+ return { settings, language, searchable, filterable, sortable, ftsExpression }
486
+ }
487
+
488
+ /**
489
+ * Compose a SQL expression that produces a tsvector for the row
490
+ * being inserted. The JSONB document is bound as `$2::jsonb`.
491
+ *
492
+ * When `searchableAttributes` is set, each one becomes a weighted
493
+ * `setweight(to_tsvector(lang, coalesce(doc->>'<attr>', '')), 'A|B|C|D')`
494
+ * pegged to its position. When the list is empty we fall back to
495
+ * indexing every text leaf in the JSONB via `jsonb_path_query_array`
496
+ * → `array_to_string`, weighted 'A'.
497
+ */
498
+ function buildFtsExpression(searchable: readonly string[], language: string): string {
499
+ if (searchable.length === 0) {
500
+ return (
501
+ `setweight(to_tsvector('${language}', ` +
502
+ `coalesce((SELECT string_agg(value::text, ' ') FROM jsonb_each_text($2::jsonb)), '')),` +
503
+ ` 'A')`
504
+ )
172
505
  }
506
+ return searchable
507
+ .map((attr, i) => {
508
+ const weight = FTS_WEIGHTS[Math.min(i, FTS_WEIGHTS.length - 1)]
509
+ return `setweight(to_tsvector('${language}', coalesce($2::jsonb->>'${attr}', '')), '${weight}')`
510
+ })
511
+ .join(' || ')
512
+ }
513
+
514
+ function projectAttributes(
515
+ document: Record<string, unknown>,
516
+ attributes: string[] | undefined,
517
+ ): Record<string, unknown> {
518
+ if (!attributes || attributes.length === 0) return { ...document }
519
+ const out: Record<string, unknown> = {}
520
+ for (const attr of attributes) {
521
+ if (attr in document) out[attr] = document[attr]
522
+ }
523
+ if ('id' in document && !('id' in out)) out.id = document.id
524
+ return out
525
+ }
526
+
527
+ function isSafeAttribute(name: string): boolean {
528
+ return /^[a-z_][a-z0-9_]*$/.test(name)
529
+ }
173
530
 
174
- /** Idempotent: ensure schema + extensions exist, once per driver. */
175
- private bootstrap(): Promise<void> {
176
- if (this.bootstrapped) return this.bootstrapped
177
- this.bootstrapped = ensureSchemaAndExtensions(
178
- this.resolveSql(),
179
- this.schemaName,
180
- this.typo
531
+ function sanitizeLanguage(language: string): string {
532
+ // Postgres text-search configuration names are user-defined,
533
+ // but the built-ins ship with lowercase ASCII identifiers; we
534
+ // refuse anything else so the value can be spliced directly.
535
+ if (!/^[a-z_][a-z0-9_]*$/.test(language)) {
536
+ throw new SearchError(
537
+ `PostgresFtsDriver: invalid language ${JSON.stringify(language)} — must match /^[a-z_][a-z0-9_]*$/.`,
538
+ { code: 'search.config' },
181
539
  )
182
- return this.bootstrapped
183
540
  }
541
+ return language
184
542
  }