@strav/search 0.4.31 → 1.0.0-alpha.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/package.json +20 -22
  2. package/src/console/index.ts +5 -0
  3. package/src/console/search_console_provider.ts +20 -0
  4. package/src/console/search_flush.ts +49 -0
  5. package/src/console/search_import.ts +103 -0
  6. package/src/console/search_list.ts +46 -0
  7. package/src/console/search_reindex.ts +94 -0
  8. package/src/drivers/meilisearch/meilisearch_driver.ts +304 -0
  9. package/src/drivers/memory/memory_driver.ts +344 -0
  10. package/src/drivers/postgres/apply_search_migration.ts +74 -0
  11. package/src/drivers/postgres/postgres_fts_driver.ts +493 -135
  12. package/src/drivers/typesense/typesense_driver.ts +345 -0
  13. package/src/index.ts +50 -39
  14. package/src/search_engine.ts +40 -25
  15. package/src/search_error.ts +86 -0
  16. package/src/search_manager.ts +112 -94
  17. package/src/search_provider.ts +68 -6
  18. package/src/searchable.ts +173 -160
  19. package/src/searchable_registry.ts +61 -0
  20. package/src/types.ts +59 -49
  21. package/README.md +0 -191
  22. package/src/commands/search_flush.ts +0 -41
  23. package/src/commands/search_import.ts +0 -43
  24. package/src/commands/search_optimize.ts +0 -52
  25. package/src/commands/search_rebuild.ts +0 -73
  26. package/src/drivers/algolia_driver.ts +0 -170
  27. package/src/drivers/embedded/embedded_driver.ts +0 -136
  28. package/src/drivers/embedded/engine/field_registry.ts +0 -97
  29. package/src/drivers/embedded/engine/fts_query_builder.ts +0 -184
  30. package/src/drivers/embedded/engine/query_compiler.ts +0 -134
  31. package/src/drivers/embedded/engine/schema.ts +0 -99
  32. package/src/drivers/embedded/engine/snippet_formatter.ts +0 -29
  33. package/src/drivers/embedded/engine/sqlite_engine.ts +0 -255
  34. package/src/drivers/embedded/engine/typo_expander.ts +0 -138
  35. package/src/drivers/embedded/errors.ts +0 -15
  36. package/src/drivers/embedded/filters/filter_compiler.ts +0 -136
  37. package/src/drivers/embedded/index.ts +0 -3
  38. package/src/drivers/embedded/storage/paths.ts +0 -23
  39. package/src/drivers/embedded/types.ts +0 -34
  40. package/src/drivers/meilisearch_driver.ts +0 -150
  41. package/src/drivers/null_driver.ts +0 -27
  42. package/src/drivers/postgres/engine/field_registry.ts +0 -116
  43. package/src/drivers/postgres/engine/fts_query_builder.ts +0 -105
  44. package/src/drivers/postgres/engine/pg_engine.ts +0 -300
  45. package/src/drivers/postgres/engine/query_compiler.ts +0 -165
  46. package/src/drivers/postgres/engine/schema.ts +0 -187
  47. package/src/drivers/postgres/engine/snippet_formatter.ts +0 -31
  48. package/src/drivers/postgres/engine/typo_expander.ts +0 -131
  49. package/src/drivers/postgres/errors.ts +0 -33
  50. package/src/drivers/postgres/filters/filter_compiler.ts +0 -138
  51. package/src/drivers/postgres/index.ts +0 -14
  52. package/src/drivers/postgres/rebuild/rebuild_inplace.ts +0 -113
  53. package/src/drivers/postgres/storage/identifiers.ts +0 -46
  54. package/src/drivers/postgres/types.ts +0 -53
  55. package/src/drivers/typesense_driver.ts +0 -229
  56. package/src/errors.ts +0 -18
  57. package/src/helpers.ts +0 -120
  58. package/stubs/config/search.ts +0 -57
  59. package/tsconfig.json +0 -5
@@ -1,165 +0,0 @@
1
- import type { SearchOptions } from '../../../types.ts'
2
- import type { FieldRegistry } from './field_registry.ts'
3
- import { compileFilter } from '../filters/filter_compiler.ts'
4
- import { quoteIdent, quoteLiteral, indexTableName } from '../storage/identifiers.ts'
5
-
6
- export interface CompiledSearch {
7
- /** Main SELECT returning hits + score + snippets. */
8
- sql: string
9
- /** Bound parameters for the SELECT. */
10
- params: unknown[]
11
- /** COUNT(*) variant for totalHits (uses the same MATCH + filter, no rank/snippets). */
12
- countSql: string
13
- countParams: unknown[]
14
- /** Names of headlight columns we asked PG to return (`__snip_<field>`). */
15
- snippetColumns: string[]
16
- }
17
-
18
- const DEFAULT_HEADLINE_OPTIONS =
19
- 'StartSel=<mark>,StopSel=</mark>,MaxWords=35,MinWords=15,ShortWord=0,HighlightAll=false,MaxFragments=2'
20
-
21
- /** ts_rank_cd normalization bitmask. 1 = divide by 1+log(doc length), 32 = rank/(rank+1). */
22
- const DEFAULT_RANK_FLAGS = 1 | 32
23
-
24
- export interface QueryCompilerOptions {
25
- registry: FieldRegistry
26
- schema: string
27
- index: string
28
- /** Output of buildTsqueryExpression — already starts at placeholder 1. */
29
- tsquery: { sql: string; params: string[] }
30
- search: SearchOptions
31
- }
32
-
33
- export function compileSearch(opts: QueryCompilerOptions): CompiledSearch {
34
- const { registry, schema, index, tsquery, search } = opts
35
- const filterableSet = new Set(registry.filterable)
36
- const sortableSet = new Set(registry.sortable)
37
-
38
- const filter = compileFilter(search.filter, filterableSet, tsquery.params.length)
39
- const params: unknown[] = [...tsquery.params, ...filter.params]
40
-
41
- const whereParts: string[] = []
42
- if (tsquery.sql) whereParts.push(`fts @@ q.query`)
43
- if (filter.sql) whereParts.push(filter.sql)
44
- const where = whereParts.length > 0 ? `WHERE ${whereParts.join(' AND ')}` : ''
45
-
46
- const orderBy = compileOrder(search.sort, sortableSet, !tsquery.sql)
47
-
48
- const perPage = Math.max(1, search.perPage ?? 20)
49
- const page = Math.max(1, search.page ?? 1)
50
- const offset = (page - 1) * perPage
51
-
52
- const limitPh = `$${params.length + 1}`
53
- const offsetPh = `$${params.length + 2}`
54
- params.push(perPage, offset)
55
-
56
- const wantedHighlights = pickHighlightFields(search.attributesToHighlight, registry)
57
- const lang = `${quoteLiteral(registry.language)}::regconfig`
58
-
59
- // The ranked CTE: filter + order + LIMIT, returns top-K rows + score only.
60
- // ts_headline runs only on this top-K slice (huge perf win — ts_headline
61
- // re-tokenizes raw text per row).
62
- const cte = tsquery.sql
63
- ? `WITH q AS (SELECT (${tsquery.sql}) AS query),
64
- ranked AS (
65
- SELECT id, doc, ts_rank_cd(fts, q.query, ${DEFAULT_RANK_FLAGS}) AS score
66
- FROM ${indexTableName(schema, index)}, q
67
- ${where}
68
- ${orderBy}
69
- LIMIT ${limitPh} OFFSET ${offsetPh}
70
- )`
71
- : `WITH ranked AS (
72
- SELECT id, doc, 0::real AS score
73
- FROM ${indexTableName(schema, index)}
74
- ${where}
75
- ${orderBy}
76
- LIMIT ${limitPh} OFFSET ${offsetPh}
77
- )`
78
-
79
- const snippetCols = wantedHighlights.map(field => {
80
- return `ts_headline(${lang}, coalesce(doc->>${quoteLiteral(field)}, ''), ` +
81
- `${tsquery.sql ? '(SELECT query FROM q)' : 'plainto_tsquery(' + lang + ", '')"}, ` +
82
- `${quoteLiteral(DEFAULT_HEADLINE_OPTIONS)}) AS ${quoteIdent(`__snip_${field}`)}`
83
- })
84
-
85
- const selectCols = ['id', 'doc', 'score', ...snippetCols]
86
-
87
- // Re-emit ORDER BY in the outer SELECT — Postgres doesn't preserve row
88
- // order across CTE boundaries.
89
- const outerOrderBy = compileOuterOrder(search.sort, sortableSet, !tsquery.sql)
90
- const sql = `${cte}
91
- SELECT ${selectCols.join(', ')}
92
- FROM ranked
93
- ${outerOrderBy}`
94
-
95
- // Count uses the MATCH + filter, but no rank/snippet/limit.
96
- const countSql = tsquery.sql
97
- ? `SELECT COUNT(*)::int AS n FROM ${indexTableName(schema, index)}, ` +
98
- `(SELECT (${tsquery.sql}) AS query) q ${where}`
99
- : `SELECT COUNT(*)::int AS n FROM ${indexTableName(schema, index)} ${where}`
100
-
101
- const countParams = [...tsquery.params, ...filter.params]
102
-
103
- return {
104
- sql,
105
- params,
106
- countSql,
107
- countParams,
108
- snippetColumns: wantedHighlights,
109
- }
110
- }
111
-
112
- function compileOrder(
113
- sort: string[] | undefined,
114
- sortableSet: ReadonlySet<string>,
115
- matchAll: boolean
116
- ): string {
117
- if (sort && sort.length > 0) {
118
- const parts: string[] = []
119
- for (const spec of sort) {
120
- const [field, dirRaw] = spec.split(':') as [string, string | undefined]
121
- if (!field || !sortableSet.has(field)) {
122
- throw new Error(
123
- `Field "${field}" is not in sortableAttributes. Add it to the index settings before sorting on it.`
124
- )
125
- }
126
- const dir = dirRaw?.toLowerCase() === 'desc' ? 'DESC' : 'ASC'
127
- parts.push(`${quoteIdent(field)} ${dir}`)
128
- }
129
- return `ORDER BY ${parts.join(', ')}`
130
- }
131
- if (matchAll) return 'ORDER BY id ASC'
132
- return 'ORDER BY score DESC'
133
- }
134
-
135
- /** ORDER BY for the outer SELECT — references columns visible on `ranked`. */
136
- function compileOuterOrder(
137
- sort: string[] | undefined,
138
- sortableSet: ReadonlySet<string>,
139
- matchAll: boolean
140
- ): string {
141
- if (sort && sort.length > 0) {
142
- // The CTE only exposes id, doc, score — sortable columns aren't in scope,
143
- // so we sort by `doc->>'field'` lexically. Same lex semantics as the
144
- // typed generated columns (which are TEXT) used inside the CTE.
145
- const parts: string[] = []
146
- for (const spec of sort) {
147
- const [field, dirRaw] = spec.split(':') as [string, string | undefined]
148
- if (!field || !sortableSet.has(field)) continue
149
- const dir = dirRaw?.toLowerCase() === 'desc' ? 'DESC' : 'ASC'
150
- parts.push(`(doc->>${quoteLiteral(field)}) ${dir}`)
151
- }
152
- return parts.length > 0 ? `ORDER BY ${parts.join(', ')}` : ''
153
- }
154
- if (matchAll) return 'ORDER BY id ASC'
155
- return 'ORDER BY score DESC'
156
- }
157
-
158
- function pickHighlightFields(
159
- requested: string[] | undefined,
160
- registry: FieldRegistry
161
- ): string[] {
162
- if (registry.usesDefaultTextColumn) return []
163
- if (!requested || requested.length === 0) return []
164
- return requested.filter(f => registry.searchable.includes(f))
165
- }
@@ -1,187 +0,0 @@
1
- import type { SQL } from 'bun'
2
- import { quoteIdent, quoteLiteral, indexTableName, termsTableName, metaTableName, bareIndexTable, bareTermsTable } from '../storage/identifiers.ts'
3
- import { MissingExtensionError } from '../errors.ts'
4
- import type { FieldRegistry } from './field_registry.ts'
5
- import type { ResolvedTypoTolerance } from '../types.ts'
6
-
7
- const SCHEMA_VERSION = 1
8
-
9
- /**
10
- * Idempotent: ensures the search schema, the shared `_meta` table, and the
11
- * required extensions exist. Called once per driver instantiation.
12
- */
13
- export async function ensureSchemaAndExtensions(
14
- sql: SQL,
15
- schema: string,
16
- typo: ResolvedTypoTolerance
17
- ): Promise<void> {
18
- await sql.unsafe(`CREATE SCHEMA IF NOT EXISTS ${quoteIdent(schema)}`)
19
-
20
- await sql.unsafe(`
21
- CREATE TABLE IF NOT EXISTS ${metaTableName(schema)} (
22
- index_name TEXT NOT NULL,
23
- key TEXT NOT NULL,
24
- value TEXT NOT NULL,
25
- PRIMARY KEY (index_name, key)
26
- )
27
- `)
28
-
29
- if (typo.enabled) {
30
- try {
31
- await sql.unsafe('CREATE EXTENSION IF NOT EXISTS pg_trgm')
32
- } catch {
33
- throw new MissingExtensionError('pg_trgm')
34
- }
35
- // fuzzystrmatch is optional — used to re-rank trigram candidates with a
36
- // bounded Levenshtein. If absent we silently fall back to trigram-only.
37
- try {
38
- await sql.unsafe('CREATE EXTENSION IF NOT EXISTS fuzzystrmatch')
39
- } catch {
40
- // ignore
41
- }
42
- }
43
- }
44
-
45
- /**
46
- * Idempotent: create the per-index table, GIN index, terms_dict, and trigger.
47
- * Returns true if the table was newly created (caller may seed `_meta`).
48
- */
49
- export async function ensureIndexTable(
50
- sql: SQL,
51
- schema: string,
52
- index: string,
53
- registry: FieldRegistry,
54
- ginFastUpdate: boolean
55
- ): Promise<boolean> {
56
- const exists = await tableExists(sql, schema, bareIndexTable(index))
57
- if (exists) return false
58
-
59
- const typedColsDdl = registry.typedColumns
60
- .map(c => `, ${quoteIdent(c.name)} TEXT GENERATED ALWAYS AS (${c.expression}) STORED`)
61
- .join('')
62
-
63
- await sql.unsafe(`
64
- CREATE TABLE ${indexTableName(schema, index)} (
65
- id TEXT PRIMARY KEY,
66
- doc JSONB NOT NULL,
67
- fts tsvector NOT NULL DEFAULT ''::tsvector${typedColsDdl}
68
- )
69
- `)
70
-
71
- await sql.unsafe(
72
- `CREATE INDEX ${quoteIdent(`${bareIndexTable(index)}_fts_gin`)} ` +
73
- `ON ${indexTableName(schema, index)} USING gin(fts) ` +
74
- `WITH (fastupdate = ${ginFastUpdate ? 'on' : 'off'})`
75
- )
76
-
77
- for (const col of registry.typedColumns) {
78
- await sql.unsafe(
79
- `CREATE INDEX ${quoteIdent(`${bareIndexTable(index)}_${col.name}_idx`)} ` +
80
- `ON ${indexTableName(schema, index)}(${quoteIdent(col.name)})`
81
- )
82
- }
83
-
84
- // Belt-and-suspenders: if anyone INSERTs without computing fts, recompute it
85
- // from doc using the current language + weight scheme. The driver always
86
- // sets fts itself; the trigger only fires when the caller didn't.
87
- await ensureFtsTrigger(sql, schema, index, registry)
88
-
89
- await ensureTermsDict(sql, schema, index)
90
-
91
- await sql.unsafe(
92
- `INSERT INTO ${metaTableName(schema)} (index_name, key, value) VALUES ` +
93
- `($1, 'schema_version', $2), ($1, 'language', $3), ($1, 'searchable', $4), ` +
94
- `($1, 'filterable', $5), ($1, 'sortable', $6) ` +
95
- `ON CONFLICT (index_name, key) DO NOTHING`,
96
- [
97
- index,
98
- String(SCHEMA_VERSION),
99
- registry.language,
100
- JSON.stringify(registry.searchable),
101
- JSON.stringify(registry.filterable),
102
- JSON.stringify(registry.sortable),
103
- ]
104
- )
105
-
106
- return true
107
- }
108
-
109
- export async function ensureTermsDict(sql: SQL, schema: string, index: string): Promise<void> {
110
- const exists = await tableExists(sql, schema, bareTermsTable(index))
111
- if (exists) return
112
-
113
- await sql.unsafe(`
114
- CREATE TABLE ${termsTableName(schema, index)} (
115
- term TEXT PRIMARY KEY,
116
- doc_freq INTEGER NOT NULL DEFAULT 0
117
- )
118
- `)
119
- await sql.unsafe(
120
- `CREATE INDEX ${quoteIdent(`${bareTermsTable(index)}_trgm`)} ` +
121
- `ON ${termsTableName(schema, index)} USING gin (term gin_trgm_ops)`
122
- )
123
- await sql.unsafe(
124
- `CREATE INDEX ${quoteIdent(`${bareTermsTable(index)}_len`)} ` +
125
- `ON ${termsTableName(schema, index)} (length(term))`
126
- )
127
- }
128
-
129
- async function ensureFtsTrigger(
130
- sql: SQL,
131
- schema: string,
132
- index: string,
133
- registry: FieldRegistry
134
- ): Promise<void> {
135
- const fnName = `${bareIndexTable(index)}_fts_trigger`
136
- const lang = quoteLiteral(registry.language)
137
- const segments = registry.usesDefaultTextColumn
138
- ? `setweight(to_tsvector(${lang}::regconfig, ${defaultTextProjection()}), 'A')`
139
- : registry.searchable
140
- .map(attr => {
141
- const weight = registry.weights.get(attr)!
142
- return `setweight(to_tsvector(${lang}::regconfig, coalesce(NEW.doc->>${quoteLiteral(attr)}, '')), '${weight}')`
143
- })
144
- .join(' || ')
145
-
146
- await sql.unsafe(`
147
- CREATE OR REPLACE FUNCTION ${quoteIdent(schema)}.${quoteIdent(fnName)}() RETURNS trigger AS $$
148
- BEGIN
149
- IF NEW.fts IS NULL OR NEW.fts = ''::tsvector THEN
150
- NEW.fts := ${segments};
151
- END IF;
152
- RETURN NEW;
153
- END;
154
- $$ LANGUAGE plpgsql
155
- `)
156
-
157
- await sql.unsafe(`
158
- CREATE TRIGGER ${quoteIdent(`${bareIndexTable(index)}_fts_trg`)}
159
- BEFORE INSERT OR UPDATE ON ${indexTableName(schema, index)}
160
- FOR EACH ROW EXECUTE FUNCTION ${quoteIdent(schema)}.${quoteIdent(fnName)}()
161
- `)
162
- }
163
-
164
- function defaultTextProjection(): string {
165
- // For default mode (no searchableAttributes) the trigger fallback walks
166
- // every JSONB string value — same shape as the field_registry default.
167
- return `(SELECT coalesce(string_agg(value, ' '), '') FROM jsonb_each_text(NEW.doc))`
168
- }
169
-
170
- async function tableExists(sql: SQL, schema: string, table: string): Promise<boolean> {
171
- const rows = (await sql.unsafe(
172
- `SELECT 1 AS present FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace ` +
173
- `WHERE n.nspname = $1 AND c.relname = $2 AND c.relkind = 'r' LIMIT 1`,
174
- [schema, table]
175
- )) as Array<Record<string, unknown>>
176
- return rows.length > 0
177
- }
178
-
179
- /** Drop a single index's tables and trigger function. Idempotent. */
180
- export async function dropIndex(sql: SQL, schema: string, index: string): Promise<void> {
181
- await sql.unsafe(`DROP TABLE IF EXISTS ${indexTableName(schema, index)} CASCADE`)
182
- await sql.unsafe(`DROP TABLE IF EXISTS ${termsTableName(schema, index)} CASCADE`)
183
- await sql.unsafe(
184
- `DROP FUNCTION IF EXISTS ${quoteIdent(schema)}.${quoteIdent(`${bareIndexTable(index)}_fts_trigger`)}() CASCADE`
185
- )
186
- await sql.unsafe(`DELETE FROM ${metaTableName(schema)} WHERE index_name = $1`, [index])
187
- }
@@ -1,31 +0,0 @@
1
- /**
2
- * `ts_headline` returns text with literal `<mark>` / `</mark>` markers around
3
- * matched terms. The surrounding text comes from the source document — which
4
- * may itself contain HTML the caller didn't escape. We HTML-escape the
5
- * snippet, then restore the marker tags, mirroring what the embedded driver
6
- * does with sentinel markers.
7
- */
8
- const OPEN_TAG = '<mark>'
9
- const CLOSE_TAG = '</mark>'
10
- const OPEN_PLACEHOLDER = 'STRAV_OPEN'
11
- const CLOSE_PLACEHOLDER = 'STRAV_CLOSE'
12
-
13
- export function formatSnippet(snippet: string | null | undefined): string {
14
- if (!snippet) return ''
15
- // Replace ts_headline's literal tags with sentinel control bytes that
16
- // can't appear in source text, escape, then swap back.
17
- const swapped = snippet
18
- .replaceAll(OPEN_TAG, OPEN_PLACEHOLDER)
19
- .replaceAll(CLOSE_TAG, CLOSE_PLACEHOLDER)
20
- const escaped = escapeHtml(swapped)
21
- return escaped.replaceAll(OPEN_PLACEHOLDER, OPEN_TAG).replaceAll(CLOSE_PLACEHOLDER, CLOSE_TAG)
22
- }
23
-
24
- function escapeHtml(input: string): string {
25
- return input
26
- .replace(/&/g, '&amp;')
27
- .replace(/</g, '&lt;')
28
- .replace(/>/g, '&gt;')
29
- .replace(/"/g, '&quot;')
30
- .replace(/'/g, '&#39;')
31
- }
@@ -1,131 +0,0 @@
1
- import type { SQL } from 'bun'
2
- import { termsTableName } from '../storage/identifiers.ts'
3
- import type { ResolvedTypoTolerance } from '../types.ts'
4
-
5
- /** Tokeniser used for terms-dict maintenance. Mirrors embedded driver. */
6
- export function tokenize(text: string): string[] {
7
- if (!text) return []
8
- const tokens: string[] = []
9
- for (const raw of text.toLowerCase().split(/[^\p{L}\p{N}]+/u)) {
10
- if (raw.length >= 2) tokens.push(raw)
11
- }
12
- return tokens
13
- }
14
-
15
- /** Increment per-document term frequencies (counting unique tokens per doc). */
16
- export async function recordTerms(
17
- sql: SQL,
18
- schema: string,
19
- index: string,
20
- text: string
21
- ): Promise<void> {
22
- const unique = Array.from(new Set(tokenize(text)))
23
- if (unique.length === 0) return
24
-
25
- const placeholders = unique.map((_, i) => `($${i + 1})`).join(', ')
26
- await sql.unsafe(
27
- `INSERT INTO ${termsTableName(schema, index)} (term) VALUES ${placeholders} ` +
28
- `ON CONFLICT (term) DO UPDATE SET doc_freq = ${termsTableName(schema, index)}.doc_freq + 1`,
29
- unique
30
- )
31
- }
32
-
33
- /** Decrement; purge rows that drop to zero. */
34
- export async function unrecordTerms(
35
- sql: SQL,
36
- schema: string,
37
- index: string,
38
- text: string
39
- ): Promise<void> {
40
- const unique = Array.from(new Set(tokenize(text)))
41
- if (unique.length === 0) return
42
-
43
- const placeholders = unique.map((_, i) => `$${i + 1}`).join(', ')
44
- await sql.unsafe(
45
- `UPDATE ${termsTableName(schema, index)} SET doc_freq = doc_freq - 1 WHERE term IN (${placeholders})`,
46
- unique
47
- )
48
- await sql.unsafe(`DELETE FROM ${termsTableName(schema, index)} WHERE doc_freq <= 0`)
49
- }
50
-
51
- /**
52
- * Look up Levenshtein-near terms via pg_trgm prefilter. When fuzzystrmatch is
53
- * available we re-rank with bounded Levenshtein for precision (trigram on
54
- * short tokens is statistically noisy).
55
- */
56
- export async function expandTokens(
57
- sql: SQL,
58
- schema: string,
59
- index: string,
60
- tokens: string[],
61
- settings: ResolvedTypoTolerance,
62
- hasFuzzystrmatch: boolean,
63
- maxCandidates = 8
64
- ): Promise<Map<string, string[]>> {
65
- const out = new Map<string, string[]>()
66
- if (!settings.enabled || tokens.length === 0) return out
67
-
68
- for (const token of tokens) {
69
- if (token.length < settings.minTokenLength) continue
70
-
71
- // pg_trgm uses a per-session similarity threshold. We set it transactionally
72
- // via the WHERE clause comparison instead, so caller's session isn't touched.
73
- const rows = (await sql.unsafe(
74
- hasFuzzystrmatch
75
- ? `WITH cands AS (
76
- SELECT term FROM ${termsTableName(schema, index)}
77
- WHERE similarity(term, $1) >= $2 AND term <> $1
78
- ORDER BY similarity(term, $1) DESC
79
- LIMIT 32
80
- )
81
- SELECT term FROM cands
82
- WHERE levenshtein(term, $1) <= $3
83
- LIMIT $4`
84
- : `SELECT term FROM ${termsTableName(schema, index)}
85
- WHERE similarity(term, $1) >= $2 AND term <> $1
86
- ORDER BY similarity(term, $1) DESC
87
- LIMIT $3`,
88
- hasFuzzystrmatch
89
- ? [token, settings.similarity, settings.maxDistance, maxCandidates]
90
- : [token, settings.similarity, maxCandidates]
91
- )) as Array<{ term: string }>
92
-
93
- if (rows.length > 0) out.set(token, rows.map(r => r.term))
94
- }
95
-
96
- return out
97
- }
98
-
99
- /** Resolve user-provided typo tolerance settings into concrete numbers. */
100
- export function resolveTypoTolerance(
101
- setting:
102
- | 'off'
103
- | 'auto'
104
- | { minTokenLength?: number; maxDistance?: number; similarity?: number }
105
- | undefined
106
- ): ResolvedTypoTolerance {
107
- if (setting === 'off') {
108
- return { enabled: false, minTokenLength: 4, maxDistance: 1, similarity: 0.4 }
109
- }
110
- if (setting === undefined || setting === 'auto') {
111
- return { enabled: true, minTokenLength: 4, maxDistance: 1, similarity: 0.4 }
112
- }
113
- return {
114
- enabled: true,
115
- minTokenLength: setting.minTokenLength ?? 4,
116
- maxDistance: setting.maxDistance ?? 1,
117
- similarity: setting.similarity ?? 0.4,
118
- }
119
- }
120
-
121
- /** Detect whether fuzzystrmatch.levenshtein is available. */
122
- export async function hasFuzzystrmatch(sql: SQL): Promise<boolean> {
123
- try {
124
- const rows = (await sql.unsafe(
125
- `SELECT 1 FROM pg_proc WHERE proname = 'levenshtein' LIMIT 1`
126
- )) as Array<Record<string, unknown>>
127
- return rows.length > 0
128
- } catch {
129
- return false
130
- }
131
- }
@@ -1,33 +0,0 @@
1
- import { SearchError } from '../../errors.ts'
2
-
3
- export class PostgresFtsError extends SearchError {}
4
-
5
- export class MissingExtensionError extends PostgresFtsError {
6
- constructor(extension: string) {
7
- super(
8
- `Postgres extension "${extension}" is required by the postgres-fts driver. ` +
9
- `Run \`CREATE EXTENSION ${extension}\` as a superuser, or set typoTolerance: 'off' if you can't.`
10
- )
11
- }
12
- }
13
-
14
- export class RebuildRequiredError extends PostgresFtsError {
15
- constructor(message: string) {
16
- super(message)
17
- }
18
- }
19
-
20
- export class UnsupportedFilterError extends PostgresFtsError {
21
- constructor(message: string) {
22
- super(`Postgres-fts driver filter is unsupported: ${message}`)
23
- }
24
- }
25
-
26
- export class MissingConnectionError extends PostgresFtsError {
27
- constructor() {
28
- super(
29
- 'PostgresFtsDriver has no Postgres connection. ' +
30
- 'Pass `connection` in the driver config, or bootstrap @strav/database first so Database.raw is available.'
31
- )
32
- }
33
- }
@@ -1,138 +0,0 @@
1
- import { UnsupportedFilterError } from '../errors.ts'
2
- import { quoteIdent } from '../storage/identifiers.ts'
3
-
4
- export interface CompiledFilter {
5
- /** SQL fragment to splice into a WHERE clause (no leading 'WHERE'). Empty if no filter. */
6
- sql: string
7
- /** Bound parameters in the order their `$N` placeholders appear. */
8
- params: unknown[]
9
- /** Number of params already used (caller offsets later placeholders). */
10
- paramCount: number
11
- }
12
-
13
- const OPERATORS = new Set(['eq', 'neq', 'gt', 'gte', 'lt', 'lte', 'in', 'nin'])
14
-
15
- /**
16
- * Compile a filter object into a parameterized SQL WHERE fragment.
17
- * Mirrors the embedded driver's contract — same operator set, same shape.
18
- *
19
- * Placeholder numbering starts at `startAt + 1` ($N+1, $N+2, ...) so callers
20
- * can compose with their own bindings.
21
- */
22
- export function compileFilter(
23
- filter: Record<string, unknown> | string | undefined,
24
- filterableAttributes: ReadonlySet<string>,
25
- startAt = 0
26
- ): CompiledFilter {
27
- if (!filter) return { sql: '', params: [], paramCount: 0 }
28
-
29
- if (typeof filter === 'string') {
30
- throw new UnsupportedFilterError(
31
- 'Raw string filters are not supported by the postgres-fts driver. ' +
32
- 'Pass an object like `{ status: "published" }` instead.'
33
- )
34
- }
35
-
36
- const clauses: string[] = []
37
- const params: unknown[] = []
38
- let cursor = startAt
39
-
40
- const ph = () => `$${++cursor}`
41
-
42
- for (const [key, value] of Object.entries(filter)) {
43
- if (value === undefined) continue
44
- if (!filterableAttributes.has(key)) {
45
- throw new UnsupportedFilterError(
46
- `Field "${key}" is not in filterableAttributes. Add it to the index settings before filtering on it.`
47
- )
48
- }
49
-
50
- const col = quoteIdent(key)
51
-
52
- if (value === null) {
53
- clauses.push(`${col} IS NULL`)
54
- continue
55
- }
56
-
57
- if (Array.isArray(value)) {
58
- if (value.length === 0) {
59
- clauses.push('1 = 0')
60
- } else {
61
- const placeholders = value.map(() => ph()).join(', ')
62
- clauses.push(`${col} IN (${placeholders})`)
63
- params.push(...value.map(coerce))
64
- }
65
- continue
66
- }
67
-
68
- if (isOperatorObject(value)) {
69
- for (const [op, opValue] of Object.entries(value)) {
70
- const compiled = compileOperator(col, op, opValue, ph)
71
- clauses.push(compiled.sql)
72
- params.push(...compiled.params)
73
- }
74
- continue
75
- }
76
-
77
- if (isPrimitive(value)) {
78
- clauses.push(`${col} = ${ph()}`)
79
- params.push(coerce(value))
80
- continue
81
- }
82
-
83
- throw new UnsupportedFilterError(
84
- `Unsupported filter value for key "${key}": ${JSON.stringify(value)}`
85
- )
86
- }
87
-
88
- return { sql: clauses.join(' AND '), params, paramCount: cursor - startAt }
89
- }
90
-
91
- function compileOperator(
92
- col: string,
93
- op: string,
94
- value: unknown,
95
- ph: () => string
96
- ): { sql: string; params: unknown[] } {
97
- switch (op) {
98
- case 'eq':
99
- return { sql: `${col} = ${ph()}`, params: [coerce(value)] }
100
- case 'neq':
101
- return { sql: `${col} <> ${ph()}`, params: [coerce(value)] }
102
- case 'gt':
103
- return { sql: `${col} > ${ph()}`, params: [coerce(value)] }
104
- case 'gte':
105
- return { sql: `${col} >= ${ph()}`, params: [coerce(value)] }
106
- case 'lt':
107
- return { sql: `${col} < ${ph()}`, params: [coerce(value)] }
108
- case 'lte':
109
- return { sql: `${col} <= ${ph()}`, params: [coerce(value)] }
110
- case 'in': {
111
- if (!Array.isArray(value) || value.length === 0) return { sql: '1 = 0', params: [] }
112
- const placeholders = value.map(() => ph()).join(', ')
113
- return { sql: `${col} IN (${placeholders})`, params: value.map(coerce) }
114
- }
115
- case 'nin': {
116
- if (!Array.isArray(value) || value.length === 0) return { sql: '1 = 1', params: [] }
117
- const placeholders = value.map(() => ph()).join(', ')
118
- return { sql: `${col} NOT IN (${placeholders})`, params: value.map(coerce) }
119
- }
120
- default:
121
- throw new UnsupportedFilterError(`Unknown operator "${op}"`)
122
- }
123
- }
124
-
125
- function isOperatorObject(value: unknown): value is Record<string, unknown> {
126
- if (value === null || typeof value !== 'object' || Array.isArray(value)) return false
127
- return Object.keys(value).every(k => OPERATORS.has(k))
128
- }
129
-
130
- function isPrimitive(value: unknown): boolean {
131
- return typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean'
132
- }
133
-
134
- function coerce(value: unknown): unknown {
135
- if (value === null || value === undefined) return null
136
- if (typeof value === 'boolean') return value ? 1 : 0
137
- return value
138
- }