@strav/search 0.3.20 → 0.3.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +122 -3
  2. package/package.json +4 -4
  3. package/src/commands/search_optimize.ts +52 -0
  4. package/src/commands/search_rebuild.ts +73 -0
  5. package/src/drivers/embedded/embedded_driver.ts +136 -0
  6. package/src/drivers/embedded/engine/field_registry.ts +97 -0
  7. package/src/drivers/embedded/engine/fts_query_builder.ts +184 -0
  8. package/src/drivers/embedded/engine/query_compiler.ts +134 -0
  9. package/src/drivers/embedded/engine/schema.ts +99 -0
  10. package/src/drivers/embedded/engine/snippet_formatter.ts +29 -0
  11. package/src/drivers/embedded/engine/sqlite_engine.ts +255 -0
  12. package/src/drivers/embedded/engine/typo_expander.ts +138 -0
  13. package/src/drivers/embedded/errors.ts +15 -0
  14. package/src/drivers/embedded/filters/filter_compiler.ts +136 -0
  15. package/src/drivers/embedded/index.ts +3 -0
  16. package/src/drivers/embedded/storage/paths.ts +23 -0
  17. package/src/drivers/embedded/types.ts +34 -0
  18. package/src/drivers/postgres/engine/field_registry.ts +116 -0
  19. package/src/drivers/postgres/engine/fts_query_builder.ts +105 -0
  20. package/src/drivers/postgres/engine/pg_engine.ts +300 -0
  21. package/src/drivers/postgres/engine/query_compiler.ts +165 -0
  22. package/src/drivers/postgres/engine/schema.ts +187 -0
  23. package/src/drivers/postgres/engine/snippet_formatter.ts +31 -0
  24. package/src/drivers/postgres/engine/typo_expander.ts +131 -0
  25. package/src/drivers/postgres/errors.ts +33 -0
  26. package/src/drivers/postgres/filters/filter_compiler.ts +138 -0
  27. package/src/drivers/postgres/index.ts +14 -0
  28. package/src/drivers/postgres/postgres_fts_driver.ts +184 -0
  29. package/src/drivers/postgres/rebuild/rebuild_inplace.ts +113 -0
  30. package/src/drivers/postgres/storage/identifiers.ts +46 -0
  31. package/src/drivers/postgres/types.ts +53 -0
  32. package/src/index.ts +11 -0
  33. package/src/search_manager.ts +7 -0
  34. package/stubs/config/search.ts +25 -0
@@ -0,0 +1,131 @@
1
+ import type { SQL } from 'bun'
2
+ import { termsTableName } from '../storage/identifiers.ts'
3
+ import type { ResolvedTypoTolerance } from '../types.ts'
4
+
5
+ /** Tokeniser used for terms-dict maintenance. Mirrors embedded driver. */
6
+ export function tokenize(text: string): string[] {
7
+ if (!text) return []
8
+ const tokens: string[] = []
9
+ for (const raw of text.toLowerCase().split(/[^\p{L}\p{N}]+/u)) {
10
+ if (raw.length >= 2) tokens.push(raw)
11
+ }
12
+ return tokens
13
+ }
14
+
15
+ /** Increment per-document term frequencies (counting unique tokens per doc). */
16
+ export async function recordTerms(
17
+ sql: SQL,
18
+ schema: string,
19
+ index: string,
20
+ text: string
21
+ ): Promise<void> {
22
+ const unique = Array.from(new Set(tokenize(text)))
23
+ if (unique.length === 0) return
24
+
25
+ const placeholders = unique.map((_, i) => `($${i + 1})`).join(', ')
26
+ await sql.unsafe(
27
+ `INSERT INTO ${termsTableName(schema, index)} (term) VALUES ${placeholders} ` +
28
+ `ON CONFLICT (term) DO UPDATE SET doc_freq = ${termsTableName(schema, index)}.doc_freq + 1`,
29
+ unique
30
+ )
31
+ }
32
+
33
+ /** Decrement; purge rows that drop to zero. */
34
+ export async function unrecordTerms(
35
+ sql: SQL,
36
+ schema: string,
37
+ index: string,
38
+ text: string
39
+ ): Promise<void> {
40
+ const unique = Array.from(new Set(tokenize(text)))
41
+ if (unique.length === 0) return
42
+
43
+ const placeholders = unique.map((_, i) => `$${i + 1}`).join(', ')
44
+ await sql.unsafe(
45
+ `UPDATE ${termsTableName(schema, index)} SET doc_freq = doc_freq - 1 WHERE term IN (${placeholders})`,
46
+ unique
47
+ )
48
+ await sql.unsafe(`DELETE FROM ${termsTableName(schema, index)} WHERE doc_freq <= 0`)
49
+ }
50
+
51
+ /**
52
+ * Look up Levenshtein-near terms via pg_trgm prefilter. When fuzzystrmatch is
53
+ * available we re-rank with bounded Levenshtein for precision (trigram on
54
+ * short tokens is statistically noisy).
55
+ */
56
+ export async function expandTokens(
57
+ sql: SQL,
58
+ schema: string,
59
+ index: string,
60
+ tokens: string[],
61
+ settings: ResolvedTypoTolerance,
62
+ hasFuzzystrmatch: boolean,
63
+ maxCandidates = 8
64
+ ): Promise<Map<string, string[]>> {
65
+ const out = new Map<string, string[]>()
66
+ if (!settings.enabled || tokens.length === 0) return out
67
+
68
+ for (const token of tokens) {
69
+ if (token.length < settings.minTokenLength) continue
70
+
71
+ // pg_trgm uses a per-session similarity threshold. We set it transactionally
72
+ // via the WHERE clause comparison instead, so caller's session isn't touched.
73
+ const rows = (await sql.unsafe(
74
+ hasFuzzystrmatch
75
+ ? `WITH cands AS (
76
+ SELECT term FROM ${termsTableName(schema, index)}
77
+ WHERE similarity(term, $1) >= $2 AND term <> $1
78
+ ORDER BY similarity(term, $1) DESC
79
+ LIMIT 32
80
+ )
81
+ SELECT term FROM cands
82
+ WHERE levenshtein(term, $1) <= $3
83
+ LIMIT $4`
84
+ : `SELECT term FROM ${termsTableName(schema, index)}
85
+ WHERE similarity(term, $1) >= $2 AND term <> $1
86
+ ORDER BY similarity(term, $1) DESC
87
+ LIMIT $3`,
88
+ hasFuzzystrmatch
89
+ ? [token, settings.similarity, settings.maxDistance, maxCandidates]
90
+ : [token, settings.similarity, maxCandidates]
91
+ )) as Array<{ term: string }>
92
+
93
+ if (rows.length > 0) out.set(token, rows.map(r => r.term))
94
+ }
95
+
96
+ return out
97
+ }
98
+
99
+ /** Resolve user-provided typo tolerance settings into concrete numbers. */
100
+ export function resolveTypoTolerance(
101
+ setting:
102
+ | 'off'
103
+ | 'auto'
104
+ | { minTokenLength?: number; maxDistance?: number; similarity?: number }
105
+ | undefined
106
+ ): ResolvedTypoTolerance {
107
+ if (setting === 'off') {
108
+ return { enabled: false, minTokenLength: 4, maxDistance: 1, similarity: 0.4 }
109
+ }
110
+ if (setting === undefined || setting === 'auto') {
111
+ return { enabled: true, minTokenLength: 4, maxDistance: 1, similarity: 0.4 }
112
+ }
113
+ return {
114
+ enabled: true,
115
+ minTokenLength: setting.minTokenLength ?? 4,
116
+ maxDistance: setting.maxDistance ?? 1,
117
+ similarity: setting.similarity ?? 0.4,
118
+ }
119
+ }
120
+
121
+ /** Detect whether fuzzystrmatch.levenshtein is available. */
122
+ export async function hasFuzzystrmatch(sql: SQL): Promise<boolean> {
123
+ try {
124
+ const rows = (await sql.unsafe(
125
+ `SELECT 1 FROM pg_proc WHERE proname = 'levenshtein' LIMIT 1`
126
+ )) as Array<Record<string, unknown>>
127
+ return rows.length > 0
128
+ } catch {
129
+ return false
130
+ }
131
+ }
@@ -0,0 +1,33 @@
1
+ import { SearchError } from '../../errors.ts'
2
+
3
+ export class PostgresFtsError extends SearchError {}
4
+
5
+ export class MissingExtensionError extends PostgresFtsError {
6
+ constructor(extension: string) {
7
+ super(
8
+ `Postgres extension "${extension}" is required by the postgres-fts driver. ` +
9
+ `Run \`CREATE EXTENSION ${extension}\` as a superuser, or set typoTolerance: 'off' if you can't.`
10
+ )
11
+ }
12
+ }
13
+
14
+ export class RebuildRequiredError extends PostgresFtsError {
15
+ constructor(message: string) {
16
+ super(message)
17
+ }
18
+ }
19
+
20
+ export class UnsupportedFilterError extends PostgresFtsError {
21
+ constructor(message: string) {
22
+ super(`Postgres-fts driver filter is unsupported: ${message}`)
23
+ }
24
+ }
25
+
26
+ export class MissingConnectionError extends PostgresFtsError {
27
+ constructor() {
28
+ super(
29
+ 'PostgresFtsDriver has no Postgres connection. ' +
30
+ 'Pass `connection` in the driver config, or bootstrap @strav/database first so Database.raw is available.'
31
+ )
32
+ }
33
+ }
@@ -0,0 +1,138 @@
1
+ import { UnsupportedFilterError } from '../errors.ts'
2
+ import { quoteIdent } from '../storage/identifiers.ts'
3
+
4
+ export interface CompiledFilter {
5
+ /** SQL fragment to splice into a WHERE clause (no leading 'WHERE'). Empty if no filter. */
6
+ sql: string
7
+ /** Bound parameters in the order their `$N` placeholders appear. */
8
+ params: unknown[]
9
+ /** Number of params already used (caller offsets later placeholders). */
10
+ paramCount: number
11
+ }
12
+
13
+ const OPERATORS = new Set(['eq', 'neq', 'gt', 'gte', 'lt', 'lte', 'in', 'nin'])
14
+
15
+ /**
16
+ * Compile a filter object into a parameterized SQL WHERE fragment.
17
+ * Mirrors the embedded driver's contract — same operator set, same shape.
18
+ *
19
+ * Placeholder numbering starts at `startAt + 1` ($N+1, $N+2, ...) so callers
20
+ * can compose with their own bindings.
21
+ */
22
+ export function compileFilter(
23
+ filter: Record<string, unknown> | string | undefined,
24
+ filterableAttributes: ReadonlySet<string>,
25
+ startAt = 0
26
+ ): CompiledFilter {
27
+ if (!filter) return { sql: '', params: [], paramCount: 0 }
28
+
29
+ if (typeof filter === 'string') {
30
+ throw new UnsupportedFilterError(
31
+ 'Raw string filters are not supported by the postgres-fts driver. ' +
32
+ 'Pass an object like `{ status: "published" }` instead.'
33
+ )
34
+ }
35
+
36
+ const clauses: string[] = []
37
+ const params: unknown[] = []
38
+ let cursor = startAt
39
+
40
+ const ph = () => `$${++cursor}`
41
+
42
+ for (const [key, value] of Object.entries(filter)) {
43
+ if (value === undefined) continue
44
+ if (!filterableAttributes.has(key)) {
45
+ throw new UnsupportedFilterError(
46
+ `Field "${key}" is not in filterableAttributes. Add it to the index settings before filtering on it.`
47
+ )
48
+ }
49
+
50
+ const col = quoteIdent(key)
51
+
52
+ if (value === null) {
53
+ clauses.push(`${col} IS NULL`)
54
+ continue
55
+ }
56
+
57
+ if (Array.isArray(value)) {
58
+ if (value.length === 0) {
59
+ clauses.push('1 = 0')
60
+ } else {
61
+ const placeholders = value.map(() => ph()).join(', ')
62
+ clauses.push(`${col} IN (${placeholders})`)
63
+ params.push(...value.map(coerce))
64
+ }
65
+ continue
66
+ }
67
+
68
+ if (isOperatorObject(value)) {
69
+ for (const [op, opValue] of Object.entries(value)) {
70
+ const compiled = compileOperator(col, op, opValue, ph)
71
+ clauses.push(compiled.sql)
72
+ params.push(...compiled.params)
73
+ }
74
+ continue
75
+ }
76
+
77
+ if (isPrimitive(value)) {
78
+ clauses.push(`${col} = ${ph()}`)
79
+ params.push(coerce(value))
80
+ continue
81
+ }
82
+
83
+ throw new UnsupportedFilterError(
84
+ `Unsupported filter value for key "${key}": ${JSON.stringify(value)}`
85
+ )
86
+ }
87
+
88
+ return { sql: clauses.join(' AND '), params, paramCount: cursor - startAt }
89
+ }
90
+
91
+ function compileOperator(
92
+ col: string,
93
+ op: string,
94
+ value: unknown,
95
+ ph: () => string
96
+ ): { sql: string; params: unknown[] } {
97
+ switch (op) {
98
+ case 'eq':
99
+ return { sql: `${col} = ${ph()}`, params: [coerce(value)] }
100
+ case 'neq':
101
+ return { sql: `${col} <> ${ph()}`, params: [coerce(value)] }
102
+ case 'gt':
103
+ return { sql: `${col} > ${ph()}`, params: [coerce(value)] }
104
+ case 'gte':
105
+ return { sql: `${col} >= ${ph()}`, params: [coerce(value)] }
106
+ case 'lt':
107
+ return { sql: `${col} < ${ph()}`, params: [coerce(value)] }
108
+ case 'lte':
109
+ return { sql: `${col} <= ${ph()}`, params: [coerce(value)] }
110
+ case 'in': {
111
+ if (!Array.isArray(value) || value.length === 0) return { sql: '1 = 0', params: [] }
112
+ const placeholders = value.map(() => ph()).join(', ')
113
+ return { sql: `${col} IN (${placeholders})`, params: value.map(coerce) }
114
+ }
115
+ case 'nin': {
116
+ if (!Array.isArray(value) || value.length === 0) return { sql: '1 = 1', params: [] }
117
+ const placeholders = value.map(() => ph()).join(', ')
118
+ return { sql: `${col} NOT IN (${placeholders})`, params: value.map(coerce) }
119
+ }
120
+ default:
121
+ throw new UnsupportedFilterError(`Unknown operator "${op}"`)
122
+ }
123
+ }
124
+
125
+ function isOperatorObject(value: unknown): value is Record<string, unknown> {
126
+ if (value === null || typeof value !== 'object' || Array.isArray(value)) return false
127
+ return Object.keys(value).every(k => OPERATORS.has(k))
128
+ }
129
+
130
+ function isPrimitive(value: unknown): boolean {
131
+ return typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean'
132
+ }
133
+
134
+ function coerce(value: unknown): unknown {
135
+ if (value === null || value === undefined) return null
136
+ if (typeof value === 'boolean') return value ? 1 : 0
137
+ return value
138
+ }
@@ -0,0 +1,14 @@
1
+ export { PostgresFtsDriver } from './postgres_fts_driver.ts'
2
+ export type {
3
+ PostgresFtsConfig,
4
+ TypoToleranceMode,
5
+ TypoToleranceSettings,
6
+ PgIndexSettings,
7
+ } from './types.ts'
8
+ export {
9
+ PostgresFtsError,
10
+ MissingExtensionError,
11
+ RebuildRequiredError,
12
+ UnsupportedFilterError,
13
+ MissingConnectionError,
14
+ } from './errors.ts'
@@ -0,0 +1,184 @@
1
+ import type { SQL } from 'bun'
2
+ import type { SearchEngine } from '../../search_engine.ts'
3
+ import type {
4
+ SearchDocument,
5
+ SearchOptions,
6
+ SearchResult,
7
+ IndexSettings,
8
+ DriverConfig,
9
+ } from '../../types.ts'
10
+ import { PgEngine } from './engine/pg_engine.ts'
11
+ import { ensureSchemaAndExtensions } from './engine/schema.ts'
12
+ import { resolveTypoTolerance } from './engine/typo_expander.ts'
13
+ import type { PostgresFtsConfig, PgIndexSettings, ResolvedTypoTolerance } from './types.ts'
14
+ import { MissingConnectionError } from './errors.ts'
15
+
16
+ const DEFAULT_SCHEMA = 'strav_search'
17
+ const DEFAULT_LANGUAGE = 'english'
18
+ const DEFAULT_WORK_MEM = '64MB'
19
+
20
+ /**
21
+ * Postgres-backed full-text search driver. Implements the same `SearchEngine`
22
+ * interface as the embedded SQLite driver — drop-in swap by config.
23
+ *
24
+ * Sized for higher-volume workloads (1M-100M docs per index) using `tsvector`
25
+ * + GIN + `pg_trgm` for typo tolerance + `ts_headline` for snippets.
26
+ *
27
+ * Connection: pass `connection` (a Bun `SQL` instance) in the driver config,
28
+ * or rely on `Database.raw` from `@strav/database` (must be bootstrapped).
29
+ */
30
+ export class PostgresFtsDriver implements SearchEngine {
31
+ readonly name = 'postgres-fts'
32
+
33
+ private readonly config: PostgresFtsConfig
34
+ private readonly schemaName: string
35
+ private readonly defaultLanguage: string
36
+ private readonly typo: ResolvedTypoTolerance
37
+ private readonly ginFastUpdate: boolean
38
+ private readonly workMem: string | null
39
+ private readonly engines = new Map<string, PgEngine>()
40
+ private readonly pendingSettings = new Map<string, PgIndexSettings>()
41
+ private bootstrapped: Promise<void> | null = null
42
+ private resolvedSql: SQL | null = null
43
+
44
+ constructor(config: DriverConfig) {
45
+ this.config = (config ?? {}) as PostgresFtsConfig
46
+ this.schemaName = this.config.schema ?? DEFAULT_SCHEMA
47
+ this.defaultLanguage = this.config.language ?? DEFAULT_LANGUAGE
48
+ this.typo = resolveTypoTolerance(this.config.typoTolerance)
49
+ this.ginFastUpdate = this.config.gin?.fastupdate ?? false
50
+ this.workMem =
51
+ this.config.workMem === null
52
+ ? null
53
+ : (this.config.workMem ?? DEFAULT_WORK_MEM)
54
+ }
55
+
56
+ // ── Document operations ──────────────────────────────────────────────────
57
+
58
+ async upsert(
59
+ index: string,
60
+ id: string | number,
61
+ document: Record<string, unknown>
62
+ ): Promise<void> {
63
+ await (await this.engineFor(index)).upsert(id, document)
64
+ }
65
+
66
+ async upsertMany(index: string, documents: SearchDocument[]): Promise<void> {
67
+ await (await this.engineFor(index)).upsertMany(documents)
68
+ }
69
+
70
+ async delete(index: string, id: string | number): Promise<void> {
71
+ await (await this.engineFor(index)).delete(id)
72
+ }
73
+
74
+ async deleteMany(index: string, ids: Array<string | number>): Promise<void> {
75
+ await (await this.engineFor(index)).deleteMany(ids)
76
+ }
77
+
78
+ // ── Index operations ─────────────────────────────────────────────────────
79
+
80
+ async flush(index: string): Promise<void> {
81
+ await (await this.engineFor(index)).flush()
82
+ }
83
+
84
+ async deleteIndex(index: string): Promise<void> {
85
+ const engine = this.engines.get(index)
86
+ if (engine) {
87
+ await engine.drop()
88
+ this.engines.delete(index)
89
+ } else {
90
+ // Drop directly without instantiating an engine.
91
+ const sql = this.resolveSql()
92
+ const { dropIndex } = await import('./engine/schema.ts')
93
+ await dropIndex(sql, this.schemaName, index)
94
+ }
95
+ this.pendingSettings.delete(index)
96
+ }
97
+
98
+ async createIndex(index: string, options?: IndexSettings): Promise<void> {
99
+ if (options) this.pendingSettings.set(index, options as PgIndexSettings)
100
+ const engine = await this.engineFor(index)
101
+ await engine.ensure()
102
+ }
103
+
104
+ // ── Search ───────────────────────────────────────────────────────────────
105
+
106
+ async search(index: string, query: string, options?: SearchOptions): Promise<SearchResult> {
107
+ return (await this.engineFor(index)).search(query, options)
108
+ }
109
+
110
+ // ── Lifecycle ────────────────────────────────────────────────────────────
111
+
112
+ /** Run REINDEX on every open index, or just one if specified. */
113
+ async optimize(index?: string): Promise<void> {
114
+ if (index) {
115
+ await (await this.engineFor(index)).optimize()
116
+ return
117
+ }
118
+ for (const engine of this.engines.values()) await engine.optimize()
119
+ }
120
+
121
+ /**
122
+ * Rebuild a single index's `fts` column in place. Use after changing
123
+ * `searchableAttributes` or weights — without it, existing rows keep the
124
+ * old fts values.
125
+ */
126
+ async rebuild(
127
+ index: string,
128
+ options?: { reindex?: boolean; pauseMs?: number; onProgress?: (done: number, total: number) => void }
129
+ ): Promise<{ tier: 1 | 2; rows: number; elapsedMs: number }> {
130
+ return (await this.engineFor(index)).rebuild(options)
131
+ }
132
+
133
+ // ── Internals ────────────────────────────────────────────────────────────
134
+
135
+ private async engineFor(index: string): Promise<PgEngine> {
136
+ let engine = this.engines.get(index)
137
+ if (engine) return engine
138
+
139
+ await this.bootstrap()
140
+ const settings = this.pendingSettings.get(index)
141
+ engine = new PgEngine({
142
+ sql: this.resolveSql(),
143
+ schema: this.schemaName,
144
+ index,
145
+ language: settings?.language ?? this.defaultLanguage,
146
+ typoTolerance: this.typo,
147
+ ginFastUpdate: this.ginFastUpdate,
148
+ workMem: this.workMem,
149
+ settings,
150
+ })
151
+ this.engines.set(index, engine)
152
+ this.pendingSettings.delete(index)
153
+ return engine
154
+ }
155
+
156
+ /** Resolve the SQL connection (config.connection or Database.raw fallback). */
157
+ private resolveSql(): SQL {
158
+ if (this.resolvedSql) return this.resolvedSql
159
+ if (this.config.connection) {
160
+ this.resolvedSql = this.config.connection
161
+ return this.resolvedSql
162
+ }
163
+ try {
164
+ // Lazy require to avoid a hard dep at import time.
165
+ const databaseModule = require('@strav/database')
166
+ const Database = databaseModule.default ?? databaseModule.Database
167
+ this.resolvedSql = Database.raw as SQL
168
+ return this.resolvedSql
169
+ } catch {
170
+ throw new MissingConnectionError()
171
+ }
172
+ }
173
+
174
+ /** Idempotent: ensure schema + extensions exist, once per driver. */
175
+ private bootstrap(): Promise<void> {
176
+ if (this.bootstrapped) return this.bootstrapped
177
+ this.bootstrapped = ensureSchemaAndExtensions(
178
+ this.resolveSql(),
179
+ this.schemaName,
180
+ this.typo
181
+ )
182
+ return this.bootstrapped
183
+ }
184
+ }
@@ -0,0 +1,113 @@
1
+ import type { SQL } from 'bun'
2
+ import type { FieldRegistry } from '../engine/field_registry.ts'
3
+ import { indexTableName, quoteLiteral, quoteIdent } from '../storage/identifiers.ts'
4
+ import { RebuildRequiredError } from '../errors.ts'
5
+
6
+ /** Tier boundaries for rebuild strategy selection. */
7
+ const TIER1_MAX = 100_000
8
+ const TIER2_MAX = 10_000_000
9
+
10
+ /** Batch size for tier-2 batched UPDATE. */
11
+ const BATCH_SIZE = 5_000
12
+
13
+ export interface RebuildOptions {
14
+ /** If true, run REINDEX on the GIN index after the rebuild. Default true. */
15
+ reindex?: boolean
16
+ /** Per-batch sleep in milliseconds (tier 2 only). Default 50. */
17
+ pauseMs?: number
18
+ /** Optional progress callback fired after each batch. */
19
+ onProgress?: (done: number, total: number) => void
20
+ }
21
+
22
+ /**
23
+ * Rebuild an index's `fts` column in place using the current registry's
24
+ * language + weight scheme. Picks tier by row count:
25
+ * - < 100k → single UPDATE
26
+ * - 100k-10M → batched UPDATE with pauses
27
+ * - > 10M → RebuildRequiredError (defer to v1.1 swap strategy)
28
+ */
29
+ export async function rebuildInPlace(
30
+ sql: SQL,
31
+ schema: string,
32
+ index: string,
33
+ registry: FieldRegistry,
34
+ options: RebuildOptions = {}
35
+ ): Promise<{ tier: 1 | 2; rows: number; elapsedMs: number }> {
36
+ const reindex = options.reindex ?? true
37
+ const pauseMs = options.pauseMs ?? 50
38
+ const table = indexTableName(schema, index)
39
+ const start = performance.now()
40
+
41
+ const countRows = (await sql.unsafe(
42
+ `SELECT COUNT(*)::bigint AS n FROM ${table}`
43
+ )) as Array<{ n: string | number }>
44
+ const total = Number(countRows[0]?.n ?? 0)
45
+
46
+ if (total > TIER2_MAX) {
47
+ throw new RebuildRequiredError(
48
+ `Index "${index}" has ${total} rows (>${TIER2_MAX}). ` +
49
+ `In-place / batched rebuild is unsafe at this scale. ` +
50
+ `Use the v1.1 dual-table swap strategy (not yet shipped).`
51
+ )
52
+ }
53
+
54
+ const ftsExpr = buildSetFtsExpression(registry)
55
+
56
+ if (total <= TIER1_MAX) {
57
+ await sql.unsafe(`UPDATE ${table} SET fts = ${ftsExpr}`)
58
+ if (reindex) await reindexGin(sql, schema, index)
59
+ return { tier: 1, rows: total, elapsedMs: Math.round(performance.now() - start) }
60
+ }
61
+
62
+ // Tier 2: batched update keyed by id, with pauses for autovacuum.
63
+ let cursor: string | null = null
64
+ let done = 0
65
+
66
+ while (true) {
67
+ const where = cursor === null ? '' : `WHERE id > $1`
68
+ const params = cursor === null ? [] : [cursor]
69
+ const batch = (await sql.unsafe(
70
+ `SELECT id FROM ${table} ${where} ORDER BY id LIMIT ${BATCH_SIZE}`,
71
+ params
72
+ )) as Array<{ id: string }>
73
+ if (batch.length === 0) break
74
+
75
+ const ids = batch.map(r => r.id)
76
+ const placeholders = ids.map((_, i) => `$${i + 1}`).join(', ')
77
+ await sql.unsafe(
78
+ `UPDATE ${table} SET fts = ${ftsExpr} WHERE id IN (${placeholders})`,
79
+ ids
80
+ )
81
+
82
+ done += batch.length
83
+ cursor = ids[ids.length - 1]!
84
+ options.onProgress?.(done, total)
85
+ if (pauseMs > 0) await new Promise(r => setTimeout(r, pauseMs))
86
+ }
87
+
88
+ if (reindex) await reindexGin(sql, schema, index)
89
+ return { tier: 2, rows: total, elapsedMs: Math.round(performance.now() - start) }
90
+ }
91
+
92
+ function buildSetFtsExpression(registry: FieldRegistry): string {
93
+ const lang = `${quoteLiteral(registry.language)}::regconfig`
94
+ if (registry.usesDefaultTextColumn) {
95
+ return (
96
+ `setweight(to_tsvector(${lang}, ` +
97
+ `(SELECT coalesce(string_agg(value, ' '), '') FROM jsonb_each_text(doc))), 'A')`
98
+ )
99
+ }
100
+ return registry.searchable
101
+ .map(attr => {
102
+ const weight = registry.weights.get(attr)!
103
+ return (
104
+ `setweight(to_tsvector(${lang}, coalesce(doc->>${quoteLiteral(attr)}, '')), '${weight}')`
105
+ )
106
+ })
107
+ .join(' || ')
108
+ }
109
+
110
+ async function reindexGin(sql: SQL, schema: string, index: string): Promise<void> {
111
+ const ginName = `${quoteIdent(schema)}.${quoteIdent(`search_${index}_fts_gin`)}`
112
+ await sql.unsafe(`REINDEX INDEX ${ginName}`)
113
+ }
@@ -0,0 +1,46 @@
1
+ import { PostgresFtsError } from '../errors.ts'
2
+
3
+ const PG_IDENT_MAX = 63
4
+
5
+ /**
6
+ * Quote a Postgres identifier (schema, table, column). Throws on identifiers
7
+ * containing NUL or exceeding the 63-byte name limit.
8
+ */
9
+ export function quoteIdent(name: string): string {
10
+ if (name.includes('\0')) throw new PostgresFtsError(`Invalid identifier: contains NUL byte.`)
11
+ if (Buffer.byteLength(name, 'utf8') > PG_IDENT_MAX) {
12
+ throw new PostgresFtsError(
13
+ `Identifier "${name}" exceeds Postgres' ${PG_IDENT_MAX}-byte limit.`
14
+ )
15
+ }
16
+ return `"${name.replace(/"/g, '""')}"`
17
+ }
18
+
19
+ /** Quote a single-quoted SQL string literal (used inside DDL options). */
20
+ export function quoteLiteral(value: string): string {
21
+ return `'${value.replace(/'/g, "''")}'`
22
+ }
23
+
24
+ /** Build the schema-qualified table name for a search index. */
25
+ export function indexTableName(schema: string, index: string): string {
26
+ return `${quoteIdent(schema)}.${quoteIdent(`search_${index}`)}`
27
+ }
28
+
29
+ /** Terms-dictionary table name for a given index. */
30
+ export function termsTableName(schema: string, index: string): string {
31
+ return `${quoteIdent(schema)}.${quoteIdent(`search_${index}_terms`)}`
32
+ }
33
+
34
+ /** Meta table — single shared table; rows keyed by (index_name, key). */
35
+ export function metaTableName(schema: string): string {
36
+ return `${quoteIdent(schema)}.${quoteIdent('_meta')}`
37
+ }
38
+
39
+ /** Bare (unquoted) tablename — useful for pg_class lookups. */
40
+ export function bareIndexTable(index: string): string {
41
+ return `search_${index}`
42
+ }
43
+
44
+ export function bareTermsTable(index: string): string {
45
+ return `search_${index}_terms`
46
+ }