@strav/search 0.4.31 → 1.0.0-alpha.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +20 -22
- package/src/console/index.ts +5 -0
- package/src/console/search_console_provider.ts +20 -0
- package/src/console/search_flush.ts +49 -0
- package/src/console/search_import.ts +103 -0
- package/src/console/search_list.ts +46 -0
- package/src/console/search_reindex.ts +94 -0
- package/src/drivers/meilisearch/meilisearch_driver.ts +304 -0
- package/src/drivers/memory/memory_driver.ts +344 -0
- package/src/drivers/postgres/apply_search_migration.ts +74 -0
- package/src/drivers/postgres/postgres_fts_driver.ts +493 -135
- package/src/drivers/typesense/typesense_driver.ts +345 -0
- package/src/index.ts +50 -39
- package/src/search_engine.ts +40 -25
- package/src/search_error.ts +86 -0
- package/src/search_manager.ts +112 -94
- package/src/search_provider.ts +68 -6
- package/src/searchable.ts +173 -160
- package/src/searchable_registry.ts +61 -0
- package/src/types.ts +59 -49
- package/README.md +0 -191
- package/src/commands/search_flush.ts +0 -41
- package/src/commands/search_import.ts +0 -43
- package/src/commands/search_optimize.ts +0 -52
- package/src/commands/search_rebuild.ts +0 -73
- package/src/drivers/algolia_driver.ts +0 -170
- package/src/drivers/embedded/embedded_driver.ts +0 -136
- package/src/drivers/embedded/engine/field_registry.ts +0 -97
- package/src/drivers/embedded/engine/fts_query_builder.ts +0 -184
- package/src/drivers/embedded/engine/query_compiler.ts +0 -134
- package/src/drivers/embedded/engine/schema.ts +0 -99
- package/src/drivers/embedded/engine/snippet_formatter.ts +0 -29
- package/src/drivers/embedded/engine/sqlite_engine.ts +0 -255
- package/src/drivers/embedded/engine/typo_expander.ts +0 -138
- package/src/drivers/embedded/errors.ts +0 -15
- package/src/drivers/embedded/filters/filter_compiler.ts +0 -136
- package/src/drivers/embedded/index.ts +0 -3
- package/src/drivers/embedded/storage/paths.ts +0 -23
- package/src/drivers/embedded/types.ts +0 -34
- package/src/drivers/meilisearch_driver.ts +0 -150
- package/src/drivers/null_driver.ts +0 -27
- package/src/drivers/postgres/engine/field_registry.ts +0 -116
- package/src/drivers/postgres/engine/fts_query_builder.ts +0 -105
- package/src/drivers/postgres/engine/pg_engine.ts +0 -300
- package/src/drivers/postgres/engine/query_compiler.ts +0 -165
- package/src/drivers/postgres/engine/schema.ts +0 -187
- package/src/drivers/postgres/engine/snippet_formatter.ts +0 -31
- package/src/drivers/postgres/engine/typo_expander.ts +0 -131
- package/src/drivers/postgres/errors.ts +0 -33
- package/src/drivers/postgres/filters/filter_compiler.ts +0 -138
- package/src/drivers/postgres/index.ts +0 -14
- package/src/drivers/postgres/rebuild/rebuild_inplace.ts +0 -113
- package/src/drivers/postgres/storage/identifiers.ts +0 -46
- package/src/drivers/postgres/types.ts +0 -53
- package/src/drivers/typesense_driver.ts +0 -229
- package/src/errors.ts +0 -18
- package/src/helpers.ts +0 -120
- package/stubs/config/search.ts +0 -57
- package/tsconfig.json +0 -5
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
import type { SearchOptions } from '../../../types.ts'
|
|
2
|
-
import type { FieldRegistry } from './field_registry.ts'
|
|
3
|
-
import { compileFilter } from '../filters/filter_compiler.ts'
|
|
4
|
-
import { quoteIdent, quoteLiteral, indexTableName } from '../storage/identifiers.ts'
|
|
5
|
-
|
|
6
|
-
export interface CompiledSearch {
|
|
7
|
-
/** Main SELECT returning hits + score + snippets. */
|
|
8
|
-
sql: string
|
|
9
|
-
/** Bound parameters for the SELECT. */
|
|
10
|
-
params: unknown[]
|
|
11
|
-
/** COUNT(*) variant for totalHits (uses the same MATCH + filter, no rank/snippets). */
|
|
12
|
-
countSql: string
|
|
13
|
-
countParams: unknown[]
|
|
14
|
-
/** Names of headlight columns we asked PG to return (`__snip_<field>`). */
|
|
15
|
-
snippetColumns: string[]
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
const DEFAULT_HEADLINE_OPTIONS =
|
|
19
|
-
'StartSel=<mark>,StopSel=</mark>,MaxWords=35,MinWords=15,ShortWord=0,HighlightAll=false,MaxFragments=2'
|
|
20
|
-
|
|
21
|
-
/** ts_rank_cd normalization bitmask. 1 = divide by 1+log(doc length), 32 = rank/(rank+1). */
|
|
22
|
-
const DEFAULT_RANK_FLAGS = 1 | 32
|
|
23
|
-
|
|
24
|
-
export interface QueryCompilerOptions {
|
|
25
|
-
registry: FieldRegistry
|
|
26
|
-
schema: string
|
|
27
|
-
index: string
|
|
28
|
-
/** Output of buildTsqueryExpression — already starts at placeholder 1. */
|
|
29
|
-
tsquery: { sql: string; params: string[] }
|
|
30
|
-
search: SearchOptions
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
export function compileSearch(opts: QueryCompilerOptions): CompiledSearch {
|
|
34
|
-
const { registry, schema, index, tsquery, search } = opts
|
|
35
|
-
const filterableSet = new Set(registry.filterable)
|
|
36
|
-
const sortableSet = new Set(registry.sortable)
|
|
37
|
-
|
|
38
|
-
const filter = compileFilter(search.filter, filterableSet, tsquery.params.length)
|
|
39
|
-
const params: unknown[] = [...tsquery.params, ...filter.params]
|
|
40
|
-
|
|
41
|
-
const whereParts: string[] = []
|
|
42
|
-
if (tsquery.sql) whereParts.push(`fts @@ q.query`)
|
|
43
|
-
if (filter.sql) whereParts.push(filter.sql)
|
|
44
|
-
const where = whereParts.length > 0 ? `WHERE ${whereParts.join(' AND ')}` : ''
|
|
45
|
-
|
|
46
|
-
const orderBy = compileOrder(search.sort, sortableSet, !tsquery.sql)
|
|
47
|
-
|
|
48
|
-
const perPage = Math.max(1, search.perPage ?? 20)
|
|
49
|
-
const page = Math.max(1, search.page ?? 1)
|
|
50
|
-
const offset = (page - 1) * perPage
|
|
51
|
-
|
|
52
|
-
const limitPh = `$${params.length + 1}`
|
|
53
|
-
const offsetPh = `$${params.length + 2}`
|
|
54
|
-
params.push(perPage, offset)
|
|
55
|
-
|
|
56
|
-
const wantedHighlights = pickHighlightFields(search.attributesToHighlight, registry)
|
|
57
|
-
const lang = `${quoteLiteral(registry.language)}::regconfig`
|
|
58
|
-
|
|
59
|
-
// The ranked CTE: filter + order + LIMIT, returns top-K rows + score only.
|
|
60
|
-
// ts_headline runs only on this top-K slice (huge perf win — ts_headline
|
|
61
|
-
// re-tokenizes raw text per row).
|
|
62
|
-
const cte = tsquery.sql
|
|
63
|
-
? `WITH q AS (SELECT (${tsquery.sql}) AS query),
|
|
64
|
-
ranked AS (
|
|
65
|
-
SELECT id, doc, ts_rank_cd(fts, q.query, ${DEFAULT_RANK_FLAGS}) AS score
|
|
66
|
-
FROM ${indexTableName(schema, index)}, q
|
|
67
|
-
${where}
|
|
68
|
-
${orderBy}
|
|
69
|
-
LIMIT ${limitPh} OFFSET ${offsetPh}
|
|
70
|
-
)`
|
|
71
|
-
: `WITH ranked AS (
|
|
72
|
-
SELECT id, doc, 0::real AS score
|
|
73
|
-
FROM ${indexTableName(schema, index)}
|
|
74
|
-
${where}
|
|
75
|
-
${orderBy}
|
|
76
|
-
LIMIT ${limitPh} OFFSET ${offsetPh}
|
|
77
|
-
)`
|
|
78
|
-
|
|
79
|
-
const snippetCols = wantedHighlights.map(field => {
|
|
80
|
-
return `ts_headline(${lang}, coalesce(doc->>${quoteLiteral(field)}, ''), ` +
|
|
81
|
-
`${tsquery.sql ? '(SELECT query FROM q)' : 'plainto_tsquery(' + lang + ", '')"}, ` +
|
|
82
|
-
`${quoteLiteral(DEFAULT_HEADLINE_OPTIONS)}) AS ${quoteIdent(`__snip_${field}`)}`
|
|
83
|
-
})
|
|
84
|
-
|
|
85
|
-
const selectCols = ['id', 'doc', 'score', ...snippetCols]
|
|
86
|
-
|
|
87
|
-
// Re-emit ORDER BY in the outer SELECT — Postgres doesn't preserve row
|
|
88
|
-
// order across CTE boundaries.
|
|
89
|
-
const outerOrderBy = compileOuterOrder(search.sort, sortableSet, !tsquery.sql)
|
|
90
|
-
const sql = `${cte}
|
|
91
|
-
SELECT ${selectCols.join(', ')}
|
|
92
|
-
FROM ranked
|
|
93
|
-
${outerOrderBy}`
|
|
94
|
-
|
|
95
|
-
// Count uses the MATCH + filter, but no rank/snippet/limit.
|
|
96
|
-
const countSql = tsquery.sql
|
|
97
|
-
? `SELECT COUNT(*)::int AS n FROM ${indexTableName(schema, index)}, ` +
|
|
98
|
-
`(SELECT (${tsquery.sql}) AS query) q ${where}`
|
|
99
|
-
: `SELECT COUNT(*)::int AS n FROM ${indexTableName(schema, index)} ${where}`
|
|
100
|
-
|
|
101
|
-
const countParams = [...tsquery.params, ...filter.params]
|
|
102
|
-
|
|
103
|
-
return {
|
|
104
|
-
sql,
|
|
105
|
-
params,
|
|
106
|
-
countSql,
|
|
107
|
-
countParams,
|
|
108
|
-
snippetColumns: wantedHighlights,
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
function compileOrder(
|
|
113
|
-
sort: string[] | undefined,
|
|
114
|
-
sortableSet: ReadonlySet<string>,
|
|
115
|
-
matchAll: boolean
|
|
116
|
-
): string {
|
|
117
|
-
if (sort && sort.length > 0) {
|
|
118
|
-
const parts: string[] = []
|
|
119
|
-
for (const spec of sort) {
|
|
120
|
-
const [field, dirRaw] = spec.split(':') as [string, string | undefined]
|
|
121
|
-
if (!field || !sortableSet.has(field)) {
|
|
122
|
-
throw new Error(
|
|
123
|
-
`Field "${field}" is not in sortableAttributes. Add it to the index settings before sorting on it.`
|
|
124
|
-
)
|
|
125
|
-
}
|
|
126
|
-
const dir = dirRaw?.toLowerCase() === 'desc' ? 'DESC' : 'ASC'
|
|
127
|
-
parts.push(`${quoteIdent(field)} ${dir}`)
|
|
128
|
-
}
|
|
129
|
-
return `ORDER BY ${parts.join(', ')}`
|
|
130
|
-
}
|
|
131
|
-
if (matchAll) return 'ORDER BY id ASC'
|
|
132
|
-
return 'ORDER BY score DESC'
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
/** ORDER BY for the outer SELECT — references columns visible on `ranked`. */
|
|
136
|
-
function compileOuterOrder(
|
|
137
|
-
sort: string[] | undefined,
|
|
138
|
-
sortableSet: ReadonlySet<string>,
|
|
139
|
-
matchAll: boolean
|
|
140
|
-
): string {
|
|
141
|
-
if (sort && sort.length > 0) {
|
|
142
|
-
// The CTE only exposes id, doc, score — sortable columns aren't in scope,
|
|
143
|
-
// so we sort by `doc->>'field'` lexically. Same lex semantics as the
|
|
144
|
-
// typed generated columns (which are TEXT) used inside the CTE.
|
|
145
|
-
const parts: string[] = []
|
|
146
|
-
for (const spec of sort) {
|
|
147
|
-
const [field, dirRaw] = spec.split(':') as [string, string | undefined]
|
|
148
|
-
if (!field || !sortableSet.has(field)) continue
|
|
149
|
-
const dir = dirRaw?.toLowerCase() === 'desc' ? 'DESC' : 'ASC'
|
|
150
|
-
parts.push(`(doc->>${quoteLiteral(field)}) ${dir}`)
|
|
151
|
-
}
|
|
152
|
-
return parts.length > 0 ? `ORDER BY ${parts.join(', ')}` : ''
|
|
153
|
-
}
|
|
154
|
-
if (matchAll) return 'ORDER BY id ASC'
|
|
155
|
-
return 'ORDER BY score DESC'
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
function pickHighlightFields(
|
|
159
|
-
requested: string[] | undefined,
|
|
160
|
-
registry: FieldRegistry
|
|
161
|
-
): string[] {
|
|
162
|
-
if (registry.usesDefaultTextColumn) return []
|
|
163
|
-
if (!requested || requested.length === 0) return []
|
|
164
|
-
return requested.filter(f => registry.searchable.includes(f))
|
|
165
|
-
}
|
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
import type { SQL } from 'bun'
|
|
2
|
-
import { quoteIdent, quoteLiteral, indexTableName, termsTableName, metaTableName, bareIndexTable, bareTermsTable } from '../storage/identifiers.ts'
|
|
3
|
-
import { MissingExtensionError } from '../errors.ts'
|
|
4
|
-
import type { FieldRegistry } from './field_registry.ts'
|
|
5
|
-
import type { ResolvedTypoTolerance } from '../types.ts'
|
|
6
|
-
|
|
7
|
-
const SCHEMA_VERSION = 1
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
* Idempotent: ensures the search schema, the shared `_meta` table, and the
|
|
11
|
-
* required extensions exist. Called once per driver instantiation.
|
|
12
|
-
*/
|
|
13
|
-
export async function ensureSchemaAndExtensions(
|
|
14
|
-
sql: SQL,
|
|
15
|
-
schema: string,
|
|
16
|
-
typo: ResolvedTypoTolerance
|
|
17
|
-
): Promise<void> {
|
|
18
|
-
await sql.unsafe(`CREATE SCHEMA IF NOT EXISTS ${quoteIdent(schema)}`)
|
|
19
|
-
|
|
20
|
-
await sql.unsafe(`
|
|
21
|
-
CREATE TABLE IF NOT EXISTS ${metaTableName(schema)} (
|
|
22
|
-
index_name TEXT NOT NULL,
|
|
23
|
-
key TEXT NOT NULL,
|
|
24
|
-
value TEXT NOT NULL,
|
|
25
|
-
PRIMARY KEY (index_name, key)
|
|
26
|
-
)
|
|
27
|
-
`)
|
|
28
|
-
|
|
29
|
-
if (typo.enabled) {
|
|
30
|
-
try {
|
|
31
|
-
await sql.unsafe('CREATE EXTENSION IF NOT EXISTS pg_trgm')
|
|
32
|
-
} catch {
|
|
33
|
-
throw new MissingExtensionError('pg_trgm')
|
|
34
|
-
}
|
|
35
|
-
// fuzzystrmatch is optional — used to re-rank trigram candidates with a
|
|
36
|
-
// bounded Levenshtein. If absent we silently fall back to trigram-only.
|
|
37
|
-
try {
|
|
38
|
-
await sql.unsafe('CREATE EXTENSION IF NOT EXISTS fuzzystrmatch')
|
|
39
|
-
} catch {
|
|
40
|
-
// ignore
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
/**
|
|
46
|
-
* Idempotent: create the per-index table, GIN index, terms_dict, and trigger.
|
|
47
|
-
* Returns true if the table was newly created (caller may seed `_meta`).
|
|
48
|
-
*/
|
|
49
|
-
export async function ensureIndexTable(
|
|
50
|
-
sql: SQL,
|
|
51
|
-
schema: string,
|
|
52
|
-
index: string,
|
|
53
|
-
registry: FieldRegistry,
|
|
54
|
-
ginFastUpdate: boolean
|
|
55
|
-
): Promise<boolean> {
|
|
56
|
-
const exists = await tableExists(sql, schema, bareIndexTable(index))
|
|
57
|
-
if (exists) return false
|
|
58
|
-
|
|
59
|
-
const typedColsDdl = registry.typedColumns
|
|
60
|
-
.map(c => `, ${quoteIdent(c.name)} TEXT GENERATED ALWAYS AS (${c.expression}) STORED`)
|
|
61
|
-
.join('')
|
|
62
|
-
|
|
63
|
-
await sql.unsafe(`
|
|
64
|
-
CREATE TABLE ${indexTableName(schema, index)} (
|
|
65
|
-
id TEXT PRIMARY KEY,
|
|
66
|
-
doc JSONB NOT NULL,
|
|
67
|
-
fts tsvector NOT NULL DEFAULT ''::tsvector${typedColsDdl}
|
|
68
|
-
)
|
|
69
|
-
`)
|
|
70
|
-
|
|
71
|
-
await sql.unsafe(
|
|
72
|
-
`CREATE INDEX ${quoteIdent(`${bareIndexTable(index)}_fts_gin`)} ` +
|
|
73
|
-
`ON ${indexTableName(schema, index)} USING gin(fts) ` +
|
|
74
|
-
`WITH (fastupdate = ${ginFastUpdate ? 'on' : 'off'})`
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
for (const col of registry.typedColumns) {
|
|
78
|
-
await sql.unsafe(
|
|
79
|
-
`CREATE INDEX ${quoteIdent(`${bareIndexTable(index)}_${col.name}_idx`)} ` +
|
|
80
|
-
`ON ${indexTableName(schema, index)}(${quoteIdent(col.name)})`
|
|
81
|
-
)
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
// Belt-and-suspenders: if anyone INSERTs without computing fts, recompute it
|
|
85
|
-
// from doc using the current language + weight scheme. The driver always
|
|
86
|
-
// sets fts itself; the trigger only fires when the caller didn't.
|
|
87
|
-
await ensureFtsTrigger(sql, schema, index, registry)
|
|
88
|
-
|
|
89
|
-
await ensureTermsDict(sql, schema, index)
|
|
90
|
-
|
|
91
|
-
await sql.unsafe(
|
|
92
|
-
`INSERT INTO ${metaTableName(schema)} (index_name, key, value) VALUES ` +
|
|
93
|
-
`($1, 'schema_version', $2), ($1, 'language', $3), ($1, 'searchable', $4), ` +
|
|
94
|
-
`($1, 'filterable', $5), ($1, 'sortable', $6) ` +
|
|
95
|
-
`ON CONFLICT (index_name, key) DO NOTHING`,
|
|
96
|
-
[
|
|
97
|
-
index,
|
|
98
|
-
String(SCHEMA_VERSION),
|
|
99
|
-
registry.language,
|
|
100
|
-
JSON.stringify(registry.searchable),
|
|
101
|
-
JSON.stringify(registry.filterable),
|
|
102
|
-
JSON.stringify(registry.sortable),
|
|
103
|
-
]
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
return true
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
export async function ensureTermsDict(sql: SQL, schema: string, index: string): Promise<void> {
|
|
110
|
-
const exists = await tableExists(sql, schema, bareTermsTable(index))
|
|
111
|
-
if (exists) return
|
|
112
|
-
|
|
113
|
-
await sql.unsafe(`
|
|
114
|
-
CREATE TABLE ${termsTableName(schema, index)} (
|
|
115
|
-
term TEXT PRIMARY KEY,
|
|
116
|
-
doc_freq INTEGER NOT NULL DEFAULT 0
|
|
117
|
-
)
|
|
118
|
-
`)
|
|
119
|
-
await sql.unsafe(
|
|
120
|
-
`CREATE INDEX ${quoteIdent(`${bareTermsTable(index)}_trgm`)} ` +
|
|
121
|
-
`ON ${termsTableName(schema, index)} USING gin (term gin_trgm_ops)`
|
|
122
|
-
)
|
|
123
|
-
await sql.unsafe(
|
|
124
|
-
`CREATE INDEX ${quoteIdent(`${bareTermsTable(index)}_len`)} ` +
|
|
125
|
-
`ON ${termsTableName(schema, index)} (length(term))`
|
|
126
|
-
)
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
async function ensureFtsTrigger(
|
|
130
|
-
sql: SQL,
|
|
131
|
-
schema: string,
|
|
132
|
-
index: string,
|
|
133
|
-
registry: FieldRegistry
|
|
134
|
-
): Promise<void> {
|
|
135
|
-
const fnName = `${bareIndexTable(index)}_fts_trigger`
|
|
136
|
-
const lang = quoteLiteral(registry.language)
|
|
137
|
-
const segments = registry.usesDefaultTextColumn
|
|
138
|
-
? `setweight(to_tsvector(${lang}::regconfig, ${defaultTextProjection()}), 'A')`
|
|
139
|
-
: registry.searchable
|
|
140
|
-
.map(attr => {
|
|
141
|
-
const weight = registry.weights.get(attr)!
|
|
142
|
-
return `setweight(to_tsvector(${lang}::regconfig, coalesce(NEW.doc->>${quoteLiteral(attr)}, '')), '${weight}')`
|
|
143
|
-
})
|
|
144
|
-
.join(' || ')
|
|
145
|
-
|
|
146
|
-
await sql.unsafe(`
|
|
147
|
-
CREATE OR REPLACE FUNCTION ${quoteIdent(schema)}.${quoteIdent(fnName)}() RETURNS trigger AS $$
|
|
148
|
-
BEGIN
|
|
149
|
-
IF NEW.fts IS NULL OR NEW.fts = ''::tsvector THEN
|
|
150
|
-
NEW.fts := ${segments};
|
|
151
|
-
END IF;
|
|
152
|
-
RETURN NEW;
|
|
153
|
-
END;
|
|
154
|
-
$$ LANGUAGE plpgsql
|
|
155
|
-
`)
|
|
156
|
-
|
|
157
|
-
await sql.unsafe(`
|
|
158
|
-
CREATE TRIGGER ${quoteIdent(`${bareIndexTable(index)}_fts_trg`)}
|
|
159
|
-
BEFORE INSERT OR UPDATE ON ${indexTableName(schema, index)}
|
|
160
|
-
FOR EACH ROW EXECUTE FUNCTION ${quoteIdent(schema)}.${quoteIdent(fnName)}()
|
|
161
|
-
`)
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
function defaultTextProjection(): string {
|
|
165
|
-
// For default mode (no searchableAttributes) the trigger fallback walks
|
|
166
|
-
// every JSONB string value — same shape as the field_registry default.
|
|
167
|
-
return `(SELECT coalesce(string_agg(value, ' '), '') FROM jsonb_each_text(NEW.doc))`
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
async function tableExists(sql: SQL, schema: string, table: string): Promise<boolean> {
|
|
171
|
-
const rows = (await sql.unsafe(
|
|
172
|
-
`SELECT 1 AS present FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace ` +
|
|
173
|
-
`WHERE n.nspname = $1 AND c.relname = $2 AND c.relkind = 'r' LIMIT 1`,
|
|
174
|
-
[schema, table]
|
|
175
|
-
)) as Array<Record<string, unknown>>
|
|
176
|
-
return rows.length > 0
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
/** Drop a single index's tables and trigger function. Idempotent. */
|
|
180
|
-
export async function dropIndex(sql: SQL, schema: string, index: string): Promise<void> {
|
|
181
|
-
await sql.unsafe(`DROP TABLE IF EXISTS ${indexTableName(schema, index)} CASCADE`)
|
|
182
|
-
await sql.unsafe(`DROP TABLE IF EXISTS ${termsTableName(schema, index)} CASCADE`)
|
|
183
|
-
await sql.unsafe(
|
|
184
|
-
`DROP FUNCTION IF EXISTS ${quoteIdent(schema)}.${quoteIdent(`${bareIndexTable(index)}_fts_trigger`)}() CASCADE`
|
|
185
|
-
)
|
|
186
|
-
await sql.unsafe(`DELETE FROM ${metaTableName(schema)} WHERE index_name = $1`, [index])
|
|
187
|
-
}
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* `ts_headline` returns text with literal `<mark>` / `</mark>` markers around
|
|
3
|
-
* matched terms. The surrounding text comes from the source document — which
|
|
4
|
-
* may itself contain HTML the caller didn't escape. We HTML-escape the
|
|
5
|
-
* snippet, then restore the marker tags, mirroring what the embedded driver
|
|
6
|
-
* does with sentinel markers.
|
|
7
|
-
*/
|
|
8
|
-
const OPEN_TAG = '<mark>'
|
|
9
|
-
const CLOSE_TAG = '</mark>'
|
|
10
|
-
const OPEN_PLACEHOLDER = 'STRAV_OPEN'
|
|
11
|
-
const CLOSE_PLACEHOLDER = 'STRAV_CLOSE'
|
|
12
|
-
|
|
13
|
-
export function formatSnippet(snippet: string | null | undefined): string {
|
|
14
|
-
if (!snippet) return ''
|
|
15
|
-
// Replace ts_headline's literal tags with sentinel control bytes that
|
|
16
|
-
// can't appear in source text, escape, then swap back.
|
|
17
|
-
const swapped = snippet
|
|
18
|
-
.replaceAll(OPEN_TAG, OPEN_PLACEHOLDER)
|
|
19
|
-
.replaceAll(CLOSE_TAG, CLOSE_PLACEHOLDER)
|
|
20
|
-
const escaped = escapeHtml(swapped)
|
|
21
|
-
return escaped.replaceAll(OPEN_PLACEHOLDER, OPEN_TAG).replaceAll(CLOSE_PLACEHOLDER, CLOSE_TAG)
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
function escapeHtml(input: string): string {
|
|
25
|
-
return input
|
|
26
|
-
.replace(/&/g, '&')
|
|
27
|
-
.replace(/</g, '<')
|
|
28
|
-
.replace(/>/g, '>')
|
|
29
|
-
.replace(/"/g, '"')
|
|
30
|
-
.replace(/'/g, ''')
|
|
31
|
-
}
|
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
import type { SQL } from 'bun'
|
|
2
|
-
import { termsTableName } from '../storage/identifiers.ts'
|
|
3
|
-
import type { ResolvedTypoTolerance } from '../types.ts'
|
|
4
|
-
|
|
5
|
-
/** Tokeniser used for terms-dict maintenance. Mirrors embedded driver. */
|
|
6
|
-
export function tokenize(text: string): string[] {
|
|
7
|
-
if (!text) return []
|
|
8
|
-
const tokens: string[] = []
|
|
9
|
-
for (const raw of text.toLowerCase().split(/[^\p{L}\p{N}]+/u)) {
|
|
10
|
-
if (raw.length >= 2) tokens.push(raw)
|
|
11
|
-
}
|
|
12
|
-
return tokens
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
/** Increment per-document term frequencies (counting unique tokens per doc). */
|
|
16
|
-
export async function recordTerms(
|
|
17
|
-
sql: SQL,
|
|
18
|
-
schema: string,
|
|
19
|
-
index: string,
|
|
20
|
-
text: string
|
|
21
|
-
): Promise<void> {
|
|
22
|
-
const unique = Array.from(new Set(tokenize(text)))
|
|
23
|
-
if (unique.length === 0) return
|
|
24
|
-
|
|
25
|
-
const placeholders = unique.map((_, i) => `($${i + 1})`).join(', ')
|
|
26
|
-
await sql.unsafe(
|
|
27
|
-
`INSERT INTO ${termsTableName(schema, index)} (term) VALUES ${placeholders} ` +
|
|
28
|
-
`ON CONFLICT (term) DO UPDATE SET doc_freq = ${termsTableName(schema, index)}.doc_freq + 1`,
|
|
29
|
-
unique
|
|
30
|
-
)
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
/** Decrement; purge rows that drop to zero. */
|
|
34
|
-
export async function unrecordTerms(
|
|
35
|
-
sql: SQL,
|
|
36
|
-
schema: string,
|
|
37
|
-
index: string,
|
|
38
|
-
text: string
|
|
39
|
-
): Promise<void> {
|
|
40
|
-
const unique = Array.from(new Set(tokenize(text)))
|
|
41
|
-
if (unique.length === 0) return
|
|
42
|
-
|
|
43
|
-
const placeholders = unique.map((_, i) => `$${i + 1}`).join(', ')
|
|
44
|
-
await sql.unsafe(
|
|
45
|
-
`UPDATE ${termsTableName(schema, index)} SET doc_freq = doc_freq - 1 WHERE term IN (${placeholders})`,
|
|
46
|
-
unique
|
|
47
|
-
)
|
|
48
|
-
await sql.unsafe(`DELETE FROM ${termsTableName(schema, index)} WHERE doc_freq <= 0`)
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* Look up Levenshtein-near terms via pg_trgm prefilter. When fuzzystrmatch is
|
|
53
|
-
* available we re-rank with bounded Levenshtein for precision (trigram on
|
|
54
|
-
* short tokens is statistically noisy).
|
|
55
|
-
*/
|
|
56
|
-
export async function expandTokens(
|
|
57
|
-
sql: SQL,
|
|
58
|
-
schema: string,
|
|
59
|
-
index: string,
|
|
60
|
-
tokens: string[],
|
|
61
|
-
settings: ResolvedTypoTolerance,
|
|
62
|
-
hasFuzzystrmatch: boolean,
|
|
63
|
-
maxCandidates = 8
|
|
64
|
-
): Promise<Map<string, string[]>> {
|
|
65
|
-
const out = new Map<string, string[]>()
|
|
66
|
-
if (!settings.enabled || tokens.length === 0) return out
|
|
67
|
-
|
|
68
|
-
for (const token of tokens) {
|
|
69
|
-
if (token.length < settings.minTokenLength) continue
|
|
70
|
-
|
|
71
|
-
// pg_trgm uses a per-session similarity threshold. We set it transactionally
|
|
72
|
-
// via the WHERE clause comparison instead, so caller's session isn't touched.
|
|
73
|
-
const rows = (await sql.unsafe(
|
|
74
|
-
hasFuzzystrmatch
|
|
75
|
-
? `WITH cands AS (
|
|
76
|
-
SELECT term FROM ${termsTableName(schema, index)}
|
|
77
|
-
WHERE similarity(term, $1) >= $2 AND term <> $1
|
|
78
|
-
ORDER BY similarity(term, $1) DESC
|
|
79
|
-
LIMIT 32
|
|
80
|
-
)
|
|
81
|
-
SELECT term FROM cands
|
|
82
|
-
WHERE levenshtein(term, $1) <= $3
|
|
83
|
-
LIMIT $4`
|
|
84
|
-
: `SELECT term FROM ${termsTableName(schema, index)}
|
|
85
|
-
WHERE similarity(term, $1) >= $2 AND term <> $1
|
|
86
|
-
ORDER BY similarity(term, $1) DESC
|
|
87
|
-
LIMIT $3`,
|
|
88
|
-
hasFuzzystrmatch
|
|
89
|
-
? [token, settings.similarity, settings.maxDistance, maxCandidates]
|
|
90
|
-
: [token, settings.similarity, maxCandidates]
|
|
91
|
-
)) as Array<{ term: string }>
|
|
92
|
-
|
|
93
|
-
if (rows.length > 0) out.set(token, rows.map(r => r.term))
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
return out
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
/** Resolve user-provided typo tolerance settings into concrete numbers. */
|
|
100
|
-
export function resolveTypoTolerance(
|
|
101
|
-
setting:
|
|
102
|
-
| 'off'
|
|
103
|
-
| 'auto'
|
|
104
|
-
| { minTokenLength?: number; maxDistance?: number; similarity?: number }
|
|
105
|
-
| undefined
|
|
106
|
-
): ResolvedTypoTolerance {
|
|
107
|
-
if (setting === 'off') {
|
|
108
|
-
return { enabled: false, minTokenLength: 4, maxDistance: 1, similarity: 0.4 }
|
|
109
|
-
}
|
|
110
|
-
if (setting === undefined || setting === 'auto') {
|
|
111
|
-
return { enabled: true, minTokenLength: 4, maxDistance: 1, similarity: 0.4 }
|
|
112
|
-
}
|
|
113
|
-
return {
|
|
114
|
-
enabled: true,
|
|
115
|
-
minTokenLength: setting.minTokenLength ?? 4,
|
|
116
|
-
maxDistance: setting.maxDistance ?? 1,
|
|
117
|
-
similarity: setting.similarity ?? 0.4,
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
/** Detect whether fuzzystrmatch.levenshtein is available. */
|
|
122
|
-
export async function hasFuzzystrmatch(sql: SQL): Promise<boolean> {
|
|
123
|
-
try {
|
|
124
|
-
const rows = (await sql.unsafe(
|
|
125
|
-
`SELECT 1 FROM pg_proc WHERE proname = 'levenshtein' LIMIT 1`
|
|
126
|
-
)) as Array<Record<string, unknown>>
|
|
127
|
-
return rows.length > 0
|
|
128
|
-
} catch {
|
|
129
|
-
return false
|
|
130
|
-
}
|
|
131
|
-
}
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import { SearchError } from '../../errors.ts'
|
|
2
|
-
|
|
3
|
-
export class PostgresFtsError extends SearchError {}
|
|
4
|
-
|
|
5
|
-
export class MissingExtensionError extends PostgresFtsError {
|
|
6
|
-
constructor(extension: string) {
|
|
7
|
-
super(
|
|
8
|
-
`Postgres extension "${extension}" is required by the postgres-fts driver. ` +
|
|
9
|
-
`Run \`CREATE EXTENSION ${extension}\` as a superuser, or set typoTolerance: 'off' if you can't.`
|
|
10
|
-
)
|
|
11
|
-
}
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export class RebuildRequiredError extends PostgresFtsError {
|
|
15
|
-
constructor(message: string) {
|
|
16
|
-
super(message)
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export class UnsupportedFilterError extends PostgresFtsError {
|
|
21
|
-
constructor(message: string) {
|
|
22
|
-
super(`Postgres-fts driver filter is unsupported: ${message}`)
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
export class MissingConnectionError extends PostgresFtsError {
|
|
27
|
-
constructor() {
|
|
28
|
-
super(
|
|
29
|
-
'PostgresFtsDriver has no Postgres connection. ' +
|
|
30
|
-
'Pass `connection` in the driver config, or bootstrap @strav/database first so Database.raw is available.'
|
|
31
|
-
)
|
|
32
|
-
}
|
|
33
|
-
}
|
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
import { UnsupportedFilterError } from '../errors.ts'
|
|
2
|
-
import { quoteIdent } from '../storage/identifiers.ts'
|
|
3
|
-
|
|
4
|
-
export interface CompiledFilter {
|
|
5
|
-
/** SQL fragment to splice into a WHERE clause (no leading 'WHERE'). Empty if no filter. */
|
|
6
|
-
sql: string
|
|
7
|
-
/** Bound parameters in the order their `$N` placeholders appear. */
|
|
8
|
-
params: unknown[]
|
|
9
|
-
/** Number of params already used (caller offsets later placeholders). */
|
|
10
|
-
paramCount: number
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
const OPERATORS = new Set(['eq', 'neq', 'gt', 'gte', 'lt', 'lte', 'in', 'nin'])
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* Compile a filter object into a parameterized SQL WHERE fragment.
|
|
17
|
-
* Mirrors the embedded driver's contract — same operator set, same shape.
|
|
18
|
-
*
|
|
19
|
-
* Placeholder numbering starts at `startAt + 1` ($N+1, $N+2, ...) so callers
|
|
20
|
-
* can compose with their own bindings.
|
|
21
|
-
*/
|
|
22
|
-
export function compileFilter(
|
|
23
|
-
filter: Record<string, unknown> | string | undefined,
|
|
24
|
-
filterableAttributes: ReadonlySet<string>,
|
|
25
|
-
startAt = 0
|
|
26
|
-
): CompiledFilter {
|
|
27
|
-
if (!filter) return { sql: '', params: [], paramCount: 0 }
|
|
28
|
-
|
|
29
|
-
if (typeof filter === 'string') {
|
|
30
|
-
throw new UnsupportedFilterError(
|
|
31
|
-
'Raw string filters are not supported by the postgres-fts driver. ' +
|
|
32
|
-
'Pass an object like `{ status: "published" }` instead.'
|
|
33
|
-
)
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
const clauses: string[] = []
|
|
37
|
-
const params: unknown[] = []
|
|
38
|
-
let cursor = startAt
|
|
39
|
-
|
|
40
|
-
const ph = () => `$${++cursor}`
|
|
41
|
-
|
|
42
|
-
for (const [key, value] of Object.entries(filter)) {
|
|
43
|
-
if (value === undefined) continue
|
|
44
|
-
if (!filterableAttributes.has(key)) {
|
|
45
|
-
throw new UnsupportedFilterError(
|
|
46
|
-
`Field "${key}" is not in filterableAttributes. Add it to the index settings before filtering on it.`
|
|
47
|
-
)
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
const col = quoteIdent(key)
|
|
51
|
-
|
|
52
|
-
if (value === null) {
|
|
53
|
-
clauses.push(`${col} IS NULL`)
|
|
54
|
-
continue
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
if (Array.isArray(value)) {
|
|
58
|
-
if (value.length === 0) {
|
|
59
|
-
clauses.push('1 = 0')
|
|
60
|
-
} else {
|
|
61
|
-
const placeholders = value.map(() => ph()).join(', ')
|
|
62
|
-
clauses.push(`${col} IN (${placeholders})`)
|
|
63
|
-
params.push(...value.map(coerce))
|
|
64
|
-
}
|
|
65
|
-
continue
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
if (isOperatorObject(value)) {
|
|
69
|
-
for (const [op, opValue] of Object.entries(value)) {
|
|
70
|
-
const compiled = compileOperator(col, op, opValue, ph)
|
|
71
|
-
clauses.push(compiled.sql)
|
|
72
|
-
params.push(...compiled.params)
|
|
73
|
-
}
|
|
74
|
-
continue
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
if (isPrimitive(value)) {
|
|
78
|
-
clauses.push(`${col} = ${ph()}`)
|
|
79
|
-
params.push(coerce(value))
|
|
80
|
-
continue
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
throw new UnsupportedFilterError(
|
|
84
|
-
`Unsupported filter value for key "${key}": ${JSON.stringify(value)}`
|
|
85
|
-
)
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
return { sql: clauses.join(' AND '), params, paramCount: cursor - startAt }
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
function compileOperator(
|
|
92
|
-
col: string,
|
|
93
|
-
op: string,
|
|
94
|
-
value: unknown,
|
|
95
|
-
ph: () => string
|
|
96
|
-
): { sql: string; params: unknown[] } {
|
|
97
|
-
switch (op) {
|
|
98
|
-
case 'eq':
|
|
99
|
-
return { sql: `${col} = ${ph()}`, params: [coerce(value)] }
|
|
100
|
-
case 'neq':
|
|
101
|
-
return { sql: `${col} <> ${ph()}`, params: [coerce(value)] }
|
|
102
|
-
case 'gt':
|
|
103
|
-
return { sql: `${col} > ${ph()}`, params: [coerce(value)] }
|
|
104
|
-
case 'gte':
|
|
105
|
-
return { sql: `${col} >= ${ph()}`, params: [coerce(value)] }
|
|
106
|
-
case 'lt':
|
|
107
|
-
return { sql: `${col} < ${ph()}`, params: [coerce(value)] }
|
|
108
|
-
case 'lte':
|
|
109
|
-
return { sql: `${col} <= ${ph()}`, params: [coerce(value)] }
|
|
110
|
-
case 'in': {
|
|
111
|
-
if (!Array.isArray(value) || value.length === 0) return { sql: '1 = 0', params: [] }
|
|
112
|
-
const placeholders = value.map(() => ph()).join(', ')
|
|
113
|
-
return { sql: `${col} IN (${placeholders})`, params: value.map(coerce) }
|
|
114
|
-
}
|
|
115
|
-
case 'nin': {
|
|
116
|
-
if (!Array.isArray(value) || value.length === 0) return { sql: '1 = 1', params: [] }
|
|
117
|
-
const placeholders = value.map(() => ph()).join(', ')
|
|
118
|
-
return { sql: `${col} NOT IN (${placeholders})`, params: value.map(coerce) }
|
|
119
|
-
}
|
|
120
|
-
default:
|
|
121
|
-
throw new UnsupportedFilterError(`Unknown operator "${op}"`)
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
function isOperatorObject(value: unknown): value is Record<string, unknown> {
|
|
126
|
-
if (value === null || typeof value !== 'object' || Array.isArray(value)) return false
|
|
127
|
-
return Object.keys(value).every(k => OPERATORS.has(k))
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
function isPrimitive(value: unknown): boolean {
|
|
131
|
-
return typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean'
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
function coerce(value: unknown): unknown {
|
|
135
|
-
if (value === null || value === undefined) return null
|
|
136
|
-
if (typeof value === 'boolean') return value ? 1 : 0
|
|
137
|
-
return value
|
|
138
|
-
}
|