@strav/search 0.3.21 → 0.3.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ export { PostgresFtsDriver } from './postgres_fts_driver.ts'
2
+ export type {
3
+ PostgresFtsConfig,
4
+ TypoToleranceMode,
5
+ TypoToleranceSettings,
6
+ PgIndexSettings,
7
+ } from './types.ts'
8
+ export {
9
+ PostgresFtsError,
10
+ MissingExtensionError,
11
+ RebuildRequiredError,
12
+ UnsupportedFilterError,
13
+ MissingConnectionError,
14
+ } from './errors.ts'
@@ -0,0 +1,184 @@
1
+ import type { SQL } from 'bun'
2
+ import type { SearchEngine } from '../../search_engine.ts'
3
+ import type {
4
+ SearchDocument,
5
+ SearchOptions,
6
+ SearchResult,
7
+ IndexSettings,
8
+ DriverConfig,
9
+ } from '../../types.ts'
10
+ import { PgEngine } from './engine/pg_engine.ts'
11
+ import { ensureSchemaAndExtensions } from './engine/schema.ts'
12
+ import { resolveTypoTolerance } from './engine/typo_expander.ts'
13
+ import type { PostgresFtsConfig, PgIndexSettings, ResolvedTypoTolerance } from './types.ts'
14
+ import { MissingConnectionError } from './errors.ts'
15
+
16
+ const DEFAULT_SCHEMA = 'strav_search'
17
+ const DEFAULT_LANGUAGE = 'english'
18
+ const DEFAULT_WORK_MEM = '64MB'
19
+
20
+ /**
21
+ * Postgres-backed full-text search driver. Implements the same `SearchEngine`
22
+ * interface as the embedded SQLite driver — drop-in swap by config.
23
+ *
24
+ * Sized for higher-volume workloads (1M-100M docs per index) using `tsvector`
25
+ * + GIN + `pg_trgm` for typo tolerance + `ts_headline` for snippets.
26
+ *
27
+ * Connection: pass `connection` (a Bun `SQL` instance) in the driver config,
28
+ * or rely on `Database.raw` from `@strav/database` (must be bootstrapped).
29
+ */
30
+ export class PostgresFtsDriver implements SearchEngine {
31
+ readonly name = 'postgres-fts'
32
+
33
+ private readonly config: PostgresFtsConfig
34
+ private readonly schemaName: string
35
+ private readonly defaultLanguage: string
36
+ private readonly typo: ResolvedTypoTolerance
37
+ private readonly ginFastUpdate: boolean
38
+ private readonly workMem: string | null
39
+ private readonly engines = new Map<string, PgEngine>()
40
+ private readonly pendingSettings = new Map<string, PgIndexSettings>()
41
+ private bootstrapped: Promise<void> | null = null
42
+ private resolvedSql: SQL | null = null
43
+
44
+ constructor(config: DriverConfig) {
45
+ this.config = (config ?? {}) as PostgresFtsConfig
46
+ this.schemaName = this.config.schema ?? DEFAULT_SCHEMA
47
+ this.defaultLanguage = this.config.language ?? DEFAULT_LANGUAGE
48
+ this.typo = resolveTypoTolerance(this.config.typoTolerance)
49
+ this.ginFastUpdate = this.config.gin?.fastupdate ?? false
50
+ this.workMem =
51
+ this.config.workMem === null
52
+ ? null
53
+ : (this.config.workMem ?? DEFAULT_WORK_MEM)
54
+ }
55
+
56
+ // ── Document operations ──────────────────────────────────────────────────
57
+
58
+ async upsert(
59
+ index: string,
60
+ id: string | number,
61
+ document: Record<string, unknown>
62
+ ): Promise<void> {
63
+ await (await this.engineFor(index)).upsert(id, document)
64
+ }
65
+
66
+ async upsertMany(index: string, documents: SearchDocument[]): Promise<void> {
67
+ await (await this.engineFor(index)).upsertMany(documents)
68
+ }
69
+
70
+ async delete(index: string, id: string | number): Promise<void> {
71
+ await (await this.engineFor(index)).delete(id)
72
+ }
73
+
74
+ async deleteMany(index: string, ids: Array<string | number>): Promise<void> {
75
+ await (await this.engineFor(index)).deleteMany(ids)
76
+ }
77
+
78
+ // ── Index operations ─────────────────────────────────────────────────────
79
+
80
+ async flush(index: string): Promise<void> {
81
+ await (await this.engineFor(index)).flush()
82
+ }
83
+
84
+ async deleteIndex(index: string): Promise<void> {
85
+ const engine = this.engines.get(index)
86
+ if (engine) {
87
+ await engine.drop()
88
+ this.engines.delete(index)
89
+ } else {
90
+ // Drop directly without instantiating an engine.
91
+ const sql = this.resolveSql()
92
+ const { dropIndex } = await import('./engine/schema.ts')
93
+ await dropIndex(sql, this.schemaName, index)
94
+ }
95
+ this.pendingSettings.delete(index)
96
+ }
97
+
98
+ async createIndex(index: string, options?: IndexSettings): Promise<void> {
99
+ if (options) this.pendingSettings.set(index, options as PgIndexSettings)
100
+ const engine = await this.engineFor(index)
101
+ await engine.ensure()
102
+ }
103
+
104
+ // ── Search ───────────────────────────────────────────────────────────────
105
+
106
+ async search(index: string, query: string, options?: SearchOptions): Promise<SearchResult> {
107
+ return (await this.engineFor(index)).search(query, options)
108
+ }
109
+
110
+ // ── Lifecycle ────────────────────────────────────────────────────────────
111
+
112
+ /** Run REINDEX on every open index, or just one if specified. */
113
+ async optimize(index?: string): Promise<void> {
114
+ if (index) {
115
+ await (await this.engineFor(index)).optimize()
116
+ return
117
+ }
118
+ for (const engine of this.engines.values()) await engine.optimize()
119
+ }
120
+
121
+ /**
122
+ * Rebuild a single index's `fts` column in place. Use after changing
123
+ * `searchableAttributes` or weights — without it, existing rows keep the
124
+ * old fts values.
125
+ */
126
+ async rebuild(
127
+ index: string,
128
+ options?: { reindex?: boolean; pauseMs?: number; onProgress?: (done: number, total: number) => void }
129
+ ): Promise<{ tier: 1 | 2; rows: number; elapsedMs: number }> {
130
+ return (await this.engineFor(index)).rebuild(options)
131
+ }
132
+
133
+ // ── Internals ────────────────────────────────────────────────────────────
134
+
135
+ private async engineFor(index: string): Promise<PgEngine> {
136
+ let engine = this.engines.get(index)
137
+ if (engine) return engine
138
+
139
+ await this.bootstrap()
140
+ const settings = this.pendingSettings.get(index)
141
+ engine = new PgEngine({
142
+ sql: this.resolveSql(),
143
+ schema: this.schemaName,
144
+ index,
145
+ language: settings?.language ?? this.defaultLanguage,
146
+ typoTolerance: this.typo,
147
+ ginFastUpdate: this.ginFastUpdate,
148
+ workMem: this.workMem,
149
+ settings,
150
+ })
151
+ this.engines.set(index, engine)
152
+ this.pendingSettings.delete(index)
153
+ return engine
154
+ }
155
+
156
+ /** Resolve the SQL connection (config.connection or Database.raw fallback). */
157
+ private resolveSql(): SQL {
158
+ if (this.resolvedSql) return this.resolvedSql
159
+ if (this.config.connection) {
160
+ this.resolvedSql = this.config.connection
161
+ return this.resolvedSql
162
+ }
163
+ try {
164
+ // Lazy require to avoid a hard dep at import time.
165
+ const databaseModule = require('@strav/database')
166
+ const Database = databaseModule.default ?? databaseModule.Database
167
+ this.resolvedSql = Database.raw as SQL
168
+ return this.resolvedSql
169
+ } catch {
170
+ throw new MissingConnectionError()
171
+ }
172
+ }
173
+
174
+ /** Idempotent: ensure schema + extensions exist, once per driver. */
175
+ private bootstrap(): Promise<void> {
176
+ if (this.bootstrapped) return this.bootstrapped
177
+ this.bootstrapped = ensureSchemaAndExtensions(
178
+ this.resolveSql(),
179
+ this.schemaName,
180
+ this.typo
181
+ )
182
+ return this.bootstrapped
183
+ }
184
+ }
@@ -0,0 +1,113 @@
1
+ import type { SQL } from 'bun'
2
+ import type { FieldRegistry } from '../engine/field_registry.ts'
3
+ import { indexTableName, quoteLiteral, quoteIdent } from '../storage/identifiers.ts'
4
+ import { RebuildRequiredError } from '../errors.ts'
5
+
6
+ /** Tier boundaries for rebuild strategy selection. */
7
+ const TIER1_MAX = 100_000
8
+ const TIER2_MAX = 10_000_000
9
+
10
+ /** Batch size for tier-2 batched UPDATE. */
11
+ const BATCH_SIZE = 5_000
12
+
13
+ export interface RebuildOptions {
14
+ /** If true, run REINDEX on the GIN index after the rebuild. Default true. */
15
+ reindex?: boolean
16
+ /** Per-batch sleep in milliseconds (tier 2 only). Default 50. */
17
+ pauseMs?: number
18
+ /** Optional progress callback fired after each batch. */
19
+ onProgress?: (done: number, total: number) => void
20
+ }
21
+
22
+ /**
23
+ * Rebuild an index's `fts` column in place using the current registry's
24
+ * language + weight scheme. Picks tier by row count:
25
+ * - < 100k → single UPDATE
26
+ * - 100k-10M → batched UPDATE with pauses
27
+ * - > 10M → RebuildRequiredError (defer to v1.1 swap strategy)
28
+ */
29
+ export async function rebuildInPlace(
30
+ sql: SQL,
31
+ schema: string,
32
+ index: string,
33
+ registry: FieldRegistry,
34
+ options: RebuildOptions = {}
35
+ ): Promise<{ tier: 1 | 2; rows: number; elapsedMs: number }> {
36
+ const reindex = options.reindex ?? true
37
+ const pauseMs = options.pauseMs ?? 50
38
+ const table = indexTableName(schema, index)
39
+ const start = performance.now()
40
+
41
+ const countRows = (await sql.unsafe(
42
+ `SELECT COUNT(*)::bigint AS n FROM ${table}`
43
+ )) as Array<{ n: string | number }>
44
+ const total = Number(countRows[0]?.n ?? 0)
45
+
46
+ if (total > TIER2_MAX) {
47
+ throw new RebuildRequiredError(
48
+ `Index "${index}" has ${total} rows (>${TIER2_MAX}). ` +
49
+ `In-place / batched rebuild is unsafe at this scale. ` +
50
+ `Use the v1.1 dual-table swap strategy (not yet shipped).`
51
+ )
52
+ }
53
+
54
+ const ftsExpr = buildSetFtsExpression(registry)
55
+
56
+ if (total <= TIER1_MAX) {
57
+ await sql.unsafe(`UPDATE ${table} SET fts = ${ftsExpr}`)
58
+ if (reindex) await reindexGin(sql, schema, index)
59
+ return { tier: 1, rows: total, elapsedMs: Math.round(performance.now() - start) }
60
+ }
61
+
62
+ // Tier 2: batched update keyed by id, with pauses for autovacuum.
63
+ let cursor: string | null = null
64
+ let done = 0
65
+
66
+ while (true) {
67
+ const where = cursor === null ? '' : `WHERE id > $1`
68
+ const params = cursor === null ? [] : [cursor]
69
+ const batch = (await sql.unsafe(
70
+ `SELECT id FROM ${table} ${where} ORDER BY id LIMIT ${BATCH_SIZE}`,
71
+ params
72
+ )) as Array<{ id: string }>
73
+ if (batch.length === 0) break
74
+
75
+ const ids = batch.map(r => r.id)
76
+ const placeholders = ids.map((_, i) => `$${i + 1}`).join(', ')
77
+ await sql.unsafe(
78
+ `UPDATE ${table} SET fts = ${ftsExpr} WHERE id IN (${placeholders})`,
79
+ ids
80
+ )
81
+
82
+ done += batch.length
83
+ cursor = ids[ids.length - 1]!
84
+ options.onProgress?.(done, total)
85
+ if (pauseMs > 0) await new Promise(r => setTimeout(r, pauseMs))
86
+ }
87
+
88
+ if (reindex) await reindexGin(sql, schema, index)
89
+ return { tier: 2, rows: total, elapsedMs: Math.round(performance.now() - start) }
90
+ }
91
+
92
+ function buildSetFtsExpression(registry: FieldRegistry): string {
93
+ const lang = `${quoteLiteral(registry.language)}::regconfig`
94
+ if (registry.usesDefaultTextColumn) {
95
+ return (
96
+ `setweight(to_tsvector(${lang}, ` +
97
+ `(SELECT coalesce(string_agg(value, ' '), '') FROM jsonb_each_text(doc))), 'A')`
98
+ )
99
+ }
100
+ return registry.searchable
101
+ .map(attr => {
102
+ const weight = registry.weights.get(attr)!
103
+ return (
104
+ `setweight(to_tsvector(${lang}, coalesce(doc->>${quoteLiteral(attr)}, '')), '${weight}')`
105
+ )
106
+ })
107
+ .join(' || ')
108
+ }
109
+
110
+ async function reindexGin(sql: SQL, schema: string, index: string): Promise<void> {
111
+ const ginName = `${quoteIdent(schema)}.${quoteIdent(`search_${index}_fts_gin`)}`
112
+ await sql.unsafe(`REINDEX INDEX ${ginName}`)
113
+ }
@@ -0,0 +1,46 @@
1
+ import { PostgresFtsError } from '../errors.ts'
2
+
3
+ const PG_IDENT_MAX = 63
4
+
5
+ /**
6
+ * Quote a Postgres identifier (schema, table, column). Throws on identifiers
7
+ * containing NUL or exceeding the 63-byte name limit.
8
+ */
9
+ export function quoteIdent(name: string): string {
10
+ if (name.includes('\0')) throw new PostgresFtsError(`Invalid identifier: contains NUL byte.`)
11
+ if (Buffer.byteLength(name, 'utf8') > PG_IDENT_MAX) {
12
+ throw new PostgresFtsError(
13
+ `Identifier "${name}" exceeds Postgres' ${PG_IDENT_MAX}-byte limit.`
14
+ )
15
+ }
16
+ return `"${name.replace(/"/g, '""')}"`
17
+ }
18
+
19
+ /** Quote a single-quoted SQL string literal (used inside DDL options). */
20
+ export function quoteLiteral(value: string): string {
21
+ return `'${value.replace(/'/g, "''")}'`
22
+ }
23
+
24
+ /** Build the schema-qualified table name for a search index. */
25
+ export function indexTableName(schema: string, index: string): string {
26
+ return `${quoteIdent(schema)}.${quoteIdent(`search_${index}`)}`
27
+ }
28
+
29
+ /** Terms-dictionary table name for a given index. */
30
+ export function termsTableName(schema: string, index: string): string {
31
+ return `${quoteIdent(schema)}.${quoteIdent(`search_${index}_terms`)}`
32
+ }
33
+
34
+ /** Meta table — single shared table; rows keyed by (index_name, key). */
35
+ export function metaTableName(schema: string): string {
36
+ return `${quoteIdent(schema)}.${quoteIdent('_meta')}`
37
+ }
38
+
39
+ /** Bare (unquoted) tablename — useful for pg_class lookups. */
40
+ export function bareIndexTable(index: string): string {
41
+ return `search_${index}`
42
+ }
43
+
44
+ export function bareTermsTable(index: string): string {
45
+ return `search_${index}_terms`
46
+ }
@@ -0,0 +1,53 @@
1
+ import type { SQL } from 'bun'
2
+ import type { DriverConfig, IndexSettings } from '../../types.ts'
3
+
4
+ export type TypoToleranceMode = 'off' | 'auto'
5
+
6
+ export interface TypoToleranceSettings {
7
+ /** Minimum token length to consider for fuzzy expansion (default 4). */
8
+ minTokenLength?: number
9
+ /** Maximum Levenshtein distance to tolerate (default 1; 2 is supported but slower). */
10
+ maxDistance?: number
11
+ /** pg_trgm similarity threshold (default 0.4). Higher = stricter. */
12
+ similarity?: number
13
+ }
14
+
15
+ export interface PostgresFtsConfig extends DriverConfig {
16
+ driver: string
17
+ /**
18
+ * Bun SQL connection. If omitted, the driver falls back to
19
+ * `Database.raw` from `@strav/database` (must be bootstrapped first).
20
+ */
21
+ connection?: SQL
22
+ /** Postgres schema for index tables. Default 'strav_search'. */
23
+ schema?: string
24
+ /** Default text-search configuration ('english', 'french', ...). */
25
+ language?: string
26
+ /** Typo tolerance: 'off' disables; 'auto' uses defaults; object for fine-grained control. */
27
+ typoTolerance?: TypoToleranceMode | TypoToleranceSettings
28
+ /** GIN index tuning. */
29
+ gin?: {
30
+ /** Default false — better tail latency for read-heavy search. */
31
+ fastupdate?: boolean
32
+ }
33
+ /** Per-search-transaction work_mem hint, e.g. '64MB'. Set to null/empty to skip. */
34
+ workMem?: string | null
35
+ }
36
+
37
+ /** Resolved typo tolerance settings (after defaults applied). */
38
+ export interface ResolvedTypoTolerance {
39
+ enabled: boolean
40
+ minTokenLength: number
41
+ maxDistance: number
42
+ similarity: number
43
+ }
44
+
45
+ /** Per-index extra settings stored in `_meta`. */
46
+ export interface PgIndexSettings extends IndexSettings {
47
+ language?: string
48
+ /**
49
+ * Per-attribute weight tier override. Keys must appear in `searchableAttributes`.
50
+ * Values: 'A' | 'B' | 'C' | 'D'. Default = positional (1st=A, 2nd=B, ...).
51
+ */
52
+ weights?: Record<string, 'A' | 'B' | 'C' | 'D'>
53
+ }
package/src/index.ts CHANGED
@@ -18,6 +18,11 @@ export type {
18
18
  TypoToleranceMode,
19
19
  TypoToleranceSettings,
20
20
  } from './drivers/embedded/index.ts'
21
+ export { PostgresFtsDriver } from './drivers/postgres/index.ts'
22
+ export type {
23
+ PostgresFtsConfig,
24
+ PgIndexSettings,
25
+ } from './drivers/postgres/index.ts'
21
26
 
22
27
  // Mixin
23
28
  export { searchable } from './searchable.ts'
@@ -6,6 +6,7 @@ import { TypesenseDriver } from './drivers/typesense_driver.ts'
6
6
  import { AlgoliaDriver } from './drivers/algolia_driver.ts'
7
7
  import { NullDriver } from './drivers/null_driver.ts'
8
8
  import { EmbeddedDriver } from './drivers/embedded/index.ts'
9
+ import { PostgresFtsDriver } from './drivers/postgres/index.ts'
9
10
 
10
11
  @inject
11
12
  export default class SearchManager {
@@ -89,6 +90,9 @@ export default class SearchManager {
89
90
  return new AlgoliaDriver(config)
90
91
  case 'embedded':
91
92
  return new EmbeddedDriver(config)
93
+ case 'postgres-fts':
94
+ case 'postgres':
95
+ return new PostgresFtsDriver(config)
92
96
  case 'null':
93
97
  return new NullDriver()
94
98
  default:
@@ -38,5 +38,20 @@ export default {
38
38
  /** Typo tolerance: 'off' to disable, 'auto' for defaults, or { minTokenLength, maxDistance }. */
39
39
  typoTolerance: env('SEARCH_TYPO_TOLERANCE', 'auto'),
40
40
  },
41
+
42
+ postgres: {
43
+ driver: 'postgres-fts',
44
+ /** Postgres schema for index tables. */
45
+ schema: env('SEARCH_PG_SCHEMA', 'strav_search'),
46
+ /** Default text-search configuration ('english', 'french', ...). */
47
+ language: env('SEARCH_PG_LANGUAGE', 'english'),
48
+ /** Typo tolerance: 'off' to disable, 'auto' for defaults, or { minTokenLength, maxDistance, similarity }. */
49
+ typoTolerance: env('SEARCH_TYPO_TOLERANCE', 'auto'),
50
+ /** Per-search work_mem hint. Set to null/empty to skip. */
51
+ workMem: env('SEARCH_PG_WORK_MEM', '64MB'),
52
+ /** GIN index tuning — fastupdate=off improves read tail latency. */
53
+ gin: { fastupdate: false },
54
+ // `connection` (Bun SQL instance) is resolved from @strav/database at runtime.
55
+ },
41
56
  },
42
57
  }