@strav/search 0.4.30 → 1.0.0-alpha.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +20 -22
- package/src/console/index.ts +5 -0
- package/src/console/search_console_provider.ts +20 -0
- package/src/console/search_flush.ts +49 -0
- package/src/console/search_import.ts +103 -0
- package/src/console/search_list.ts +46 -0
- package/src/console/search_reindex.ts +94 -0
- package/src/drivers/meilisearch/meilisearch_driver.ts +304 -0
- package/src/drivers/memory/memory_driver.ts +344 -0
- package/src/drivers/postgres/apply_search_migration.ts +74 -0
- package/src/drivers/postgres/postgres_fts_driver.ts +493 -135
- package/src/drivers/typesense/typesense_driver.ts +345 -0
- package/src/index.ts +50 -39
- package/src/search_engine.ts +40 -25
- package/src/search_error.ts +86 -0
- package/src/search_manager.ts +112 -94
- package/src/search_provider.ts +68 -6
- package/src/searchable.ts +173 -160
- package/src/searchable_registry.ts +61 -0
- package/src/types.ts +59 -49
- package/README.md +0 -191
- package/src/commands/search_flush.ts +0 -41
- package/src/commands/search_import.ts +0 -43
- package/src/commands/search_optimize.ts +0 -52
- package/src/commands/search_rebuild.ts +0 -73
- package/src/drivers/algolia_driver.ts +0 -170
- package/src/drivers/embedded/embedded_driver.ts +0 -136
- package/src/drivers/embedded/engine/field_registry.ts +0 -97
- package/src/drivers/embedded/engine/fts_query_builder.ts +0 -184
- package/src/drivers/embedded/engine/query_compiler.ts +0 -134
- package/src/drivers/embedded/engine/schema.ts +0 -99
- package/src/drivers/embedded/engine/snippet_formatter.ts +0 -29
- package/src/drivers/embedded/engine/sqlite_engine.ts +0 -255
- package/src/drivers/embedded/engine/typo_expander.ts +0 -138
- package/src/drivers/embedded/errors.ts +0 -15
- package/src/drivers/embedded/filters/filter_compiler.ts +0 -136
- package/src/drivers/embedded/index.ts +0 -3
- package/src/drivers/embedded/storage/paths.ts +0 -23
- package/src/drivers/embedded/types.ts +0 -34
- package/src/drivers/meilisearch_driver.ts +0 -150
- package/src/drivers/null_driver.ts +0 -27
- package/src/drivers/postgres/engine/field_registry.ts +0 -116
- package/src/drivers/postgres/engine/fts_query_builder.ts +0 -105
- package/src/drivers/postgres/engine/pg_engine.ts +0 -300
- package/src/drivers/postgres/engine/query_compiler.ts +0 -165
- package/src/drivers/postgres/engine/schema.ts +0 -187
- package/src/drivers/postgres/engine/snippet_formatter.ts +0 -31
- package/src/drivers/postgres/engine/typo_expander.ts +0 -131
- package/src/drivers/postgres/errors.ts +0 -33
- package/src/drivers/postgres/filters/filter_compiler.ts +0 -138
- package/src/drivers/postgres/index.ts +0 -14
- package/src/drivers/postgres/rebuild/rebuild_inplace.ts +0 -113
- package/src/drivers/postgres/storage/identifiers.ts +0 -46
- package/src/drivers/postgres/types.ts +0 -53
- package/src/drivers/typesense_driver.ts +0 -229
- package/src/errors.ts +0 -18
- package/src/helpers.ts +0 -120
- package/stubs/config/search.ts +0 -57
- package/tsconfig.json +0 -5
|
@@ -1,184 +1,542 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* `PostgresFtsDriver` — `SearchEngine` backed by Postgres
|
|
3
|
+
* `tsvector` + GIN, one table per index inside a dedicated
|
|
4
|
+
* schema (`strav_search` by default).
|
|
5
|
+
*
|
|
6
|
+
* What the driver provisions for each `createIndex(name, …)`:
|
|
7
|
+
*
|
|
8
|
+
* ```
|
|
9
|
+
* CREATE TABLE "strav_search"."<name>" (
|
|
10
|
+
* id text PRIMARY KEY,
|
|
11
|
+
* tenant_id text, -- nullable for single-tenant apps
|
|
12
|
+
* document jsonb NOT NULL,
|
|
13
|
+
* fts tsvector NOT NULL
|
|
14
|
+
* );
|
|
15
|
+
* CREATE INDEX "<name>_fts" ON … USING GIN(fts);
|
|
16
|
+
* CREATE INDEX "<name>_tenant" ON … (tenant_id) WHERE tenant_id IS NOT NULL;
|
|
17
|
+
* -- Per filterable / sortable attribute:
|
|
18
|
+
* CREATE INDEX "<name>_<attr>" ON … ((document->>'<attr>'));
|
|
19
|
+
* -- RLS on `tenant_id`:
|
|
20
|
+
* ALTER TABLE … ENABLE ROW LEVEL SECURITY;
|
|
21
|
+
* ALTER TABLE … FORCE ROW LEVEL SECURITY;
|
|
22
|
+
* CREATE POLICY "<name>_isolate" ON …
|
|
23
|
+
* USING (tenant_id IS NULL OR tenant_id = current_setting('app.tenant_id', true));
|
|
24
|
+
* ```
|
|
25
|
+
*
|
|
26
|
+
* Settings (search/filter/sort attributes, primary key,
|
|
27
|
+
* language) live in `strav_search._meta` so multi-process
|
|
28
|
+
* deployments stay coherent. The driver caches the resolved
|
|
29
|
+
* SQL fragments per index in memory.
|
|
30
|
+
*
|
|
31
|
+
* Search uses `websearch_to_tsquery` + `ts_rank_cd` for
|
|
32
|
+
* ranking, plus `ts_headline` snippets for any field in
|
|
33
|
+
* `attributesToHighlight`. The total hit count comes from a
|
|
34
|
+
* `COUNT(*) OVER ()` window so pagination metadata is correct
|
|
35
|
+
* without a separate query.
|
|
36
|
+
*
|
|
37
|
+
* What V1 does **not** do:
|
|
38
|
+
*
|
|
39
|
+
* - **Typo tolerance / `pg_trgm`.** The 0.x driver shipped a
|
|
40
|
+
* trigram-based fuzzy expander; the surface area is large
|
|
41
|
+
* and the trade-offs index-specific. Deferred to V1.1 —
|
|
42
|
+
* apps that need it today drop down to the Memory or
|
|
43
|
+
* Meilisearch drivers.
|
|
44
|
+
* - **In-place rebuild tiers.** Apps re-index via the mixin's
|
|
45
|
+
* `Repository.importAll()` (or the future `search:reindex`
|
|
46
|
+
* console command) — no per-driver size heuristic.
|
|
47
|
+
* - **Adding a `filterableAttribute` to a large existing
|
|
48
|
+
* index** is fine — the index is built lazily via
|
|
49
|
+
* `CREATE INDEX IF NOT EXISTS` on `(document->>'<attr>')`,
|
|
50
|
+
* no `ALTER TABLE ADD COLUMN`.
|
|
51
|
+
*/
|
|
52
|
+
|
|
53
|
+
import {
|
|
54
|
+
currentTransactionalContext,
|
|
55
|
+
type DatabaseExecutor,
|
|
56
|
+
type PostgresDatabase,
|
|
57
|
+
} from '@strav/database'
|
|
58
|
+
import { IndexNotFoundError, SearchError, SearchQueryError } from '../../search_error.ts'
|
|
2
59
|
import type { SearchEngine } from '../../search_engine.ts'
|
|
3
60
|
import type {
|
|
61
|
+
DriverConfig,
|
|
62
|
+
IndexSettings,
|
|
4
63
|
SearchDocument,
|
|
64
|
+
SearchHit,
|
|
5
65
|
SearchOptions,
|
|
6
66
|
SearchResult,
|
|
7
|
-
IndexSettings,
|
|
8
|
-
DriverConfig,
|
|
9
67
|
} from '../../types.ts'
|
|
10
|
-
import {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
68
|
+
import { DEFAULT_SEARCH_SCHEMA, validateIdentifier } from './apply_search_migration.ts'
|
|
69
|
+
|
|
70
|
+
interface ResolvedIndex {
|
|
71
|
+
settings: IndexSettings
|
|
72
|
+
language: string
|
|
73
|
+
/** Tokenized + validated attribute names — safe to splice into SQL. */
|
|
74
|
+
searchable: string[]
|
|
75
|
+
filterable: Set<string>
|
|
76
|
+
sortable: Set<string>
|
|
77
|
+
/** SQL fragment that produces the tsvector for the document JSONB. */
|
|
78
|
+
ftsExpression: string
|
|
79
|
+
}
|
|
15
80
|
|
|
16
|
-
const DEFAULT_SCHEMA = 'strav_search'
|
|
17
81
|
const DEFAULT_LANGUAGE = 'english'
|
|
18
|
-
const
|
|
82
|
+
const TENANT_SETTING = 'app.tenant_id'
|
|
83
|
+
|
|
84
|
+
export interface PostgresFtsDriverOptions {
|
|
85
|
+
db: PostgresDatabase
|
|
86
|
+
/** Schema for index tables. Defaults to `strav_search`. */
|
|
87
|
+
schema?: string
|
|
88
|
+
/** Default text-search configuration (`english`, `french`, ...). */
|
|
89
|
+
language?: string
|
|
90
|
+
}
|
|
19
91
|
|
|
20
|
-
/**
|
|
21
|
-
* Postgres-backed full-text search driver. Implements the same `SearchEngine`
|
|
22
|
-
* interface as the embedded SQLite driver — drop-in swap by config.
|
|
23
|
-
*
|
|
24
|
-
* Sized for higher-volume workloads (1M-100M docs per index) using `tsvector`
|
|
25
|
-
* + GIN + `pg_trgm` for typo tolerance + `ts_headline` for snippets.
|
|
26
|
-
*
|
|
27
|
-
* Connection: pass `connection` (a Bun `SQL` instance) in the driver config,
|
|
28
|
-
* or rely on `Database.raw` from `@strav/database` (must be bootstrapped).
|
|
29
|
-
*/
|
|
30
92
|
export class PostgresFtsDriver implements SearchEngine {
|
|
31
93
|
readonly name = 'postgres-fts'
|
|
32
94
|
|
|
33
|
-
private readonly
|
|
34
|
-
private readonly
|
|
95
|
+
private readonly db: PostgresDatabase
|
|
96
|
+
private readonly schema: string
|
|
35
97
|
private readonly defaultLanguage: string
|
|
36
|
-
private readonly
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
98
|
+
private readonly indexes = new Map<string, ResolvedIndex>()
|
|
99
|
+
|
|
100
|
+
constructor(options: PostgresFtsDriverOptions) {
|
|
101
|
+
this.db = options.db
|
|
102
|
+
this.schema = validateIdentifier(options.schema ?? DEFAULT_SEARCH_SCHEMA, 'schema')
|
|
103
|
+
this.defaultLanguage = options.language ?? DEFAULT_LANGUAGE
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
static fromConfig(db: PostgresDatabase, config: DriverConfig): PostgresFtsDriver {
|
|
107
|
+
return new PostgresFtsDriver({
|
|
108
|
+
db,
|
|
109
|
+
...(typeof config.schema === 'string' ? { schema: config.schema } : {}),
|
|
110
|
+
...(typeof config.language === 'string' ? { language: config.language } : {}),
|
|
111
|
+
})
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Route reads + writes through the ambient `UnitOfWork`
|
|
116
|
+
* transaction when one is active (e.g., inside
|
|
117
|
+
* `tenants.withTenant(...)`), so RLS scoping applies
|
|
118
|
+
* uniformly with the rest of the app's database calls.
|
|
119
|
+
*/
|
|
120
|
+
private exec(): DatabaseExecutor {
|
|
121
|
+
const ambient = currentTransactionalContext()
|
|
122
|
+
if (ambient) return ambient.tx
|
|
123
|
+
return this.db as unknown as DatabaseExecutor
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// ─── Index lifecycle ────────────────────────────────────────────────────
|
|
127
|
+
|
|
128
|
+
async createIndex(index: string, settings: IndexSettings = {}): Promise<void> {
|
|
129
|
+
const indexName = validateIdentifier(index, 'index')
|
|
130
|
+
const language = sanitizeLanguage(this.defaultLanguage)
|
|
131
|
+
const resolved = resolveIndexSettings(settings, language)
|
|
132
|
+
|
|
133
|
+
const ex = this.exec()
|
|
134
|
+
const table = this.qualify(indexName)
|
|
135
|
+
|
|
136
|
+
// Table + columns.
|
|
137
|
+
await ex.execute(
|
|
138
|
+
`CREATE TABLE IF NOT EXISTS ${table} (
|
|
139
|
+
"id" text PRIMARY KEY,
|
|
140
|
+
"tenant_id" text,
|
|
141
|
+
"document" jsonb NOT NULL,
|
|
142
|
+
"fts" tsvector NOT NULL
|
|
143
|
+
)`,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
// Indexes — GIN over fts + B-tree over tenant_id + expression
|
|
147
|
+
// index per filterable / sortable attribute.
|
|
148
|
+
await ex.execute(
|
|
149
|
+
`CREATE INDEX IF NOT EXISTS "${indexName}_fts"
|
|
150
|
+
ON ${table} USING GIN ("fts")`,
|
|
151
|
+
)
|
|
152
|
+
await ex.execute(
|
|
153
|
+
`CREATE INDEX IF NOT EXISTS "${indexName}_tenant"
|
|
154
|
+
ON ${table} ("tenant_id") WHERE "tenant_id" IS NOT NULL`,
|
|
155
|
+
)
|
|
156
|
+
for (const attr of [...resolved.filterable, ...resolved.sortable]) {
|
|
157
|
+
await ex.execute(
|
|
158
|
+
`CREATE INDEX IF NOT EXISTS "${indexName}_${attr}"
|
|
159
|
+
ON ${table} (("document"->>'${attr}'))`,
|
|
160
|
+
)
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// RLS — single policy keyed off `app.tenant_id`. Rows with
|
|
164
|
+
// `tenant_id IS NULL` are visible to every tenant (the
|
|
165
|
+
// single-tenant default).
|
|
166
|
+
await ex.execute(`ALTER TABLE ${table} ENABLE ROW LEVEL SECURITY`)
|
|
167
|
+
await ex.execute(`ALTER TABLE ${table} FORCE ROW LEVEL SECURITY`)
|
|
168
|
+
await ex.execute(`DROP POLICY IF EXISTS "${indexName}_isolate" ON ${table}`)
|
|
169
|
+
await ex.execute(
|
|
170
|
+
`CREATE POLICY "${indexName}_isolate" ON ${table}
|
|
171
|
+
USING (
|
|
172
|
+
"tenant_id" IS NULL
|
|
173
|
+
OR "tenant_id" = current_setting('${TENANT_SETTING}', true)
|
|
174
|
+
)
|
|
175
|
+
WITH CHECK (
|
|
176
|
+
"tenant_id" IS NULL
|
|
177
|
+
OR "tenant_id" = current_setting('${TENANT_SETTING}', true)
|
|
178
|
+
)`,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
// Persist the settings + language for cross-process reads.
|
|
182
|
+
await ex.execute(
|
|
183
|
+
`INSERT INTO "${this.schema}"."_meta" ("index_name", "settings", "language", "updated_at")
|
|
184
|
+
VALUES ($1, $2::jsonb, $3, now())
|
|
185
|
+
ON CONFLICT ("index_name") DO UPDATE
|
|
186
|
+
SET "settings" = EXCLUDED."settings",
|
|
187
|
+
"language" = EXCLUDED."language",
|
|
188
|
+
"updated_at" = now()`,
|
|
189
|
+
[indexName, JSON.stringify(settings), language],
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
this.indexes.set(indexName, resolved)
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
async deleteIndex(index: string): Promise<void> {
|
|
196
|
+
const indexName = validateIdentifier(index, 'index')
|
|
197
|
+
const ex = this.exec()
|
|
198
|
+
await ex.execute(`DROP TABLE IF EXISTS ${this.qualify(indexName)} CASCADE`)
|
|
199
|
+
await ex.execute(`DELETE FROM "${this.schema}"."_meta" WHERE "index_name" = $1`, [indexName])
|
|
200
|
+
this.indexes.delete(indexName)
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
async flush(index: string): Promise<void> {
|
|
204
|
+
const indexName = validateIdentifier(index, 'index')
|
|
205
|
+
const exists = await this.indexExists(indexName)
|
|
206
|
+
if (!exists) return
|
|
207
|
+
await this.exec().execute(`DELETE FROM ${this.qualify(indexName)}`)
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// ─── Writes ─────────────────────────────────────────────────────────────
|
|
57
211
|
|
|
58
212
|
async upsert(
|
|
59
213
|
index: string,
|
|
60
214
|
id: string | number,
|
|
61
|
-
document: Record<string, unknown
|
|
215
|
+
document: Record<string, unknown>,
|
|
62
216
|
): Promise<void> {
|
|
63
|
-
await
|
|
217
|
+
await this.upsertMany(index, [{ id, ...document }])
|
|
64
218
|
}
|
|
65
219
|
|
|
66
|
-
async upsertMany(index: string, documents: SearchDocument[]): Promise<void> {
|
|
67
|
-
|
|
220
|
+
async upsertMany(index: string, documents: readonly SearchDocument[]): Promise<void> {
|
|
221
|
+
if (documents.length === 0) return
|
|
222
|
+
const indexName = validateIdentifier(index, 'index')
|
|
223
|
+
const resolved = await this.resolveIndex(indexName)
|
|
224
|
+
const table = this.qualify(indexName)
|
|
225
|
+
const ex = this.exec()
|
|
226
|
+
|
|
227
|
+
// Insert one row at a time so each row's fts expression can
|
|
228
|
+
// bind its own JSONB parameter cleanly. Bulk INSERT with
|
|
229
|
+
// VALUES + per-row CTE was tried and complicates the
|
|
230
|
+
// parameter math far more than it saves in latency for
|
|
231
|
+
// typical batches of <500 rows.
|
|
232
|
+
for (const doc of documents) {
|
|
233
|
+
await ex.execute(
|
|
234
|
+
`INSERT INTO ${table} ("id", "tenant_id", "document", "fts")
|
|
235
|
+
VALUES (
|
|
236
|
+
$1,
|
|
237
|
+
current_setting('${TENANT_SETTING}', true),
|
|
238
|
+
$2,
|
|
239
|
+
${resolved.ftsExpression}
|
|
240
|
+
)
|
|
241
|
+
ON CONFLICT ("id") DO UPDATE
|
|
242
|
+
SET "document" = EXCLUDED."document",
|
|
243
|
+
"fts" = EXCLUDED."fts"`,
|
|
244
|
+
// Pass the document as a JS object — `bun:sql` encodes JSONB
|
|
245
|
+
// params directly. Pre-`JSON.stringify`-ing causes Postgres
|
|
246
|
+
// to store a JSON string scalar instead of an object, which
|
|
247
|
+
// breaks every `->>` projection downstream.
|
|
248
|
+
[String(doc.id), { ...(doc as Record<string, unknown>) }],
|
|
249
|
+
)
|
|
250
|
+
}
|
|
68
251
|
}
|
|
69
252
|
|
|
70
253
|
async delete(index: string, id: string | number): Promise<void> {
|
|
71
|
-
await
|
|
254
|
+
await this.deleteMany(index, [id])
|
|
72
255
|
}
|
|
73
256
|
|
|
74
|
-
async deleteMany(index: string, ids:
|
|
75
|
-
|
|
257
|
+
async deleteMany(index: string, ids: readonly (string | number)[]): Promise<void> {
|
|
258
|
+
if (ids.length === 0) return
|
|
259
|
+
const indexName = validateIdentifier(index, 'index')
|
|
260
|
+
const placeholders = ids.map((_, i) => `$${i + 1}`).join(', ')
|
|
261
|
+
const params = ids.map((id) => String(id))
|
|
262
|
+
await this.exec().execute(
|
|
263
|
+
`DELETE FROM ${this.qualify(indexName)} WHERE "id" IN (${placeholders})`,
|
|
264
|
+
params,
|
|
265
|
+
)
|
|
76
266
|
}
|
|
77
267
|
|
|
78
|
-
//
|
|
268
|
+
// ─── Reads ──────────────────────────────────────────────────────────────
|
|
79
269
|
|
|
80
|
-
async
|
|
81
|
-
|
|
82
|
-
|
|
270
|
+
async search(index: string, query: string, options: SearchOptions = {}): Promise<SearchResult> {
|
|
271
|
+
const indexName = validateIdentifier(index, 'index')
|
|
272
|
+
const resolved = await this.resolveIndex(indexName)
|
|
83
273
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
274
|
+
if (options.filter !== undefined && (typeof options.filter !== 'object' || Array.isArray(options.filter))) {
|
|
275
|
+
throw new SearchQueryError(
|
|
276
|
+
'PostgresFtsDriver: `filter` must be a flat key/value object. Engine-native strings are not portable.',
|
|
277
|
+
)
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
const page = Math.max(1, options.page ?? 1)
|
|
281
|
+
const perPage = Math.max(1, options.perPage ?? 20)
|
|
282
|
+
const limit = perPage
|
|
283
|
+
const offset = (page - 1) * perPage
|
|
284
|
+
|
|
285
|
+
const table = this.qualify(indexName)
|
|
286
|
+
const params: unknown[] = []
|
|
287
|
+
const select: string[] = ['"id"', '"document"']
|
|
288
|
+
const whereClauses: string[] = []
|
|
289
|
+
const trimmedQuery = query.trim()
|
|
290
|
+
|
|
291
|
+
if (trimmedQuery.length > 0) {
|
|
292
|
+
params.push(trimmedQuery)
|
|
293
|
+
const qIdx = params.length
|
|
294
|
+
select.push(`ts_rank_cd("fts", websearch_to_tsquery('${resolved.language}', $${qIdx})) AS rank`)
|
|
295
|
+
whereClauses.push(`"fts" @@ websearch_to_tsquery('${resolved.language}', $${qIdx})`)
|
|
89
296
|
} else {
|
|
90
|
-
|
|
91
|
-
const sql = this.resolveSql()
|
|
92
|
-
const { dropIndex } = await import('./engine/schema.ts')
|
|
93
|
-
await dropIndex(sql, this.schemaName, index)
|
|
297
|
+
select.push('0.0 AS rank')
|
|
94
298
|
}
|
|
95
|
-
this.pendingSettings.delete(index)
|
|
96
|
-
}
|
|
97
299
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
300
|
+
if (options.filter) {
|
|
301
|
+
for (const [key, value] of Object.entries(options.filter)) {
|
|
302
|
+
if (!resolved.filterable.has(key)) {
|
|
303
|
+
throw new SearchQueryError(
|
|
304
|
+
`PostgresFtsDriver: filter key "${key}" is not in this index's filterableAttributes.`,
|
|
305
|
+
{ context: { index: indexName, key, filterable: [...resolved.filterable] } },
|
|
306
|
+
)
|
|
307
|
+
}
|
|
308
|
+
params.push(typeof value === 'number' || typeof value === 'boolean' ? String(value) : value)
|
|
309
|
+
whereClauses.push(`("document"->>'${key}') = $${params.length}`)
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// Highlights — one ts_headline column per requested attribute.
|
|
314
|
+
if (options.attributesToHighlight) {
|
|
315
|
+
params.push(trimmedQuery.length > 0 ? trimmedQuery : '')
|
|
316
|
+
const qIdx = params.length
|
|
317
|
+
for (const attr of options.attributesToHighlight) {
|
|
318
|
+
if (!isSafeAttribute(attr)) {
|
|
319
|
+
throw new SearchQueryError(
|
|
320
|
+
`PostgresFtsDriver: highlight attribute "${attr}" contains illegal characters.`,
|
|
321
|
+
)
|
|
322
|
+
}
|
|
323
|
+
select.push(
|
|
324
|
+
`ts_headline(
|
|
325
|
+
'${resolved.language}',
|
|
326
|
+
coalesce("document"->>'${attr}', ''),
|
|
327
|
+
websearch_to_tsquery('${resolved.language}', $${qIdx}),
|
|
328
|
+
'StartSel=<mark>,StopSel=</mark>,MaxFragments=2,MinWords=3,MaxWords=15'
|
|
329
|
+
) AS "hl_${attr}"`,
|
|
330
|
+
)
|
|
331
|
+
}
|
|
332
|
+
}
|
|
103
333
|
|
|
104
|
-
|
|
334
|
+
// Sort — explicit user sort takes precedence; otherwise rank for queried,
|
|
335
|
+
// id for empty queries.
|
|
336
|
+
const orderClauses: string[] = []
|
|
337
|
+
if (options.sort) {
|
|
338
|
+
for (const directive of options.sort) {
|
|
339
|
+
const [field, dirRaw] = directive.split(':') as [string, string | undefined]
|
|
340
|
+
const dir = dirRaw?.toLowerCase() === 'desc' ? 'DESC' : 'ASC'
|
|
341
|
+
if (!resolved.sortable.has(field)) {
|
|
342
|
+
throw new SearchQueryError(
|
|
343
|
+
`PostgresFtsDriver: sort attribute "${field}" is not in this index's sortableAttributes.`,
|
|
344
|
+
{ context: { index: indexName, field, sortable: [...resolved.sortable] } },
|
|
345
|
+
)
|
|
346
|
+
}
|
|
347
|
+
orderClauses.push(`("document"->>'${field}') ${dir}`)
|
|
348
|
+
}
|
|
349
|
+
} else if (trimmedQuery.length > 0) {
|
|
350
|
+
orderClauses.push('rank DESC')
|
|
351
|
+
} else {
|
|
352
|
+
orderClauses.push('"id" ASC')
|
|
353
|
+
}
|
|
105
354
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
355
|
+
select.push('COUNT(*) OVER () AS total')
|
|
356
|
+
params.push(limit, offset)
|
|
357
|
+
const sql =
|
|
358
|
+
`SELECT ${select.join(', ')}
|
|
359
|
+
FROM ${table}
|
|
360
|
+
${whereClauses.length > 0 ? `WHERE ${whereClauses.join(' AND ')}` : ''}
|
|
361
|
+
ORDER BY ${orderClauses.join(', ')}
|
|
362
|
+
LIMIT $${params.length - 1} OFFSET $${params.length}`
|
|
109
363
|
|
|
110
|
-
|
|
364
|
+
const start = performance.now()
|
|
365
|
+
let rows: Array<Record<string, unknown>>
|
|
366
|
+
try {
|
|
367
|
+
rows = await this.exec().query<Record<string, unknown>>(sql, params)
|
|
368
|
+
} catch (cause) {
|
|
369
|
+
const msg = (cause as Error).message ?? ''
|
|
370
|
+
if (msg.includes('does not exist')) {
|
|
371
|
+
throw new IndexNotFoundError(indexName, this.name)
|
|
372
|
+
}
|
|
373
|
+
throw new SearchQueryError(`PostgresFtsDriver: search query failed: ${msg}`, { cause })
|
|
374
|
+
}
|
|
111
375
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
376
|
+
const totalHits = rows.length === 0 ? 0 : Number(rows[0]!.total ?? 0)
|
|
377
|
+
const hits: SearchHit[] = rows.map((row) => {
|
|
378
|
+
// `bun:sql` returns jsonb columns as text — parse so apps see
|
|
379
|
+
// the original document shape.
|
|
380
|
+
const document =
|
|
381
|
+
typeof row.document === 'string'
|
|
382
|
+
? (JSON.parse(row.document) as Record<string, unknown>)
|
|
383
|
+
: (row.document as Record<string, unknown>)
|
|
384
|
+
const projected = projectAttributes(document, options.attributesToRetrieve)
|
|
385
|
+
const hit: SearchHit = { document: projected }
|
|
386
|
+
if (options.attributesToHighlight) {
|
|
387
|
+
const highlights: Record<string, string> = {}
|
|
388
|
+
for (const attr of options.attributesToHighlight) {
|
|
389
|
+
const value = row[`hl_${attr}`]
|
|
390
|
+
if (typeof value === 'string') highlights[attr] = value
|
|
391
|
+
}
|
|
392
|
+
if (Object.keys(highlights).length > 0) hit.highlights = highlights
|
|
393
|
+
}
|
|
394
|
+
return hit
|
|
395
|
+
})
|
|
396
|
+
|
|
397
|
+
return {
|
|
398
|
+
hits,
|
|
399
|
+
totalHits,
|
|
400
|
+
page,
|
|
401
|
+
perPage,
|
|
402
|
+
processingTimeMs: performance.now() - start,
|
|
117
403
|
}
|
|
118
|
-
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// ─── Internals ──────────────────────────────────────────────────────────
|
|
407
|
+
|
|
408
|
+
private qualify(index: string): string {
|
|
409
|
+
return `"${this.schema}"."${index}"`
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
private async indexExists(index: string): Promise<boolean> {
|
|
413
|
+
const row = await this.exec().queryOne<{ exists: boolean }>(
|
|
414
|
+
`SELECT EXISTS (
|
|
415
|
+
SELECT 1 FROM pg_tables WHERE schemaname = $1 AND tablename = $2
|
|
416
|
+
) AS "exists"`,
|
|
417
|
+
[this.schema, index],
|
|
418
|
+
)
|
|
419
|
+
return row?.exists === true
|
|
119
420
|
}
|
|
120
421
|
|
|
121
422
|
/**
|
|
122
|
-
*
|
|
123
|
-
*
|
|
124
|
-
*
|
|
423
|
+
* Load `_meta` into the in-memory cache. Called lazily so apps
|
|
424
|
+
* that only ever read from indexes created by another process
|
|
425
|
+
* still get the right `ftsExpression` shape.
|
|
125
426
|
*/
|
|
126
|
-
async
|
|
127
|
-
index
|
|
128
|
-
|
|
129
|
-
): Promise<{ tier: 1 | 2; rows: number; elapsedMs: number }> {
|
|
130
|
-
return (await this.engineFor(index)).rebuild(options)
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
// ── Internals ────────────────────────────────────────────────────────────
|
|
134
|
-
|
|
135
|
-
private async engineFor(index: string): Promise<PgEngine> {
|
|
136
|
-
let engine = this.engines.get(index)
|
|
137
|
-
if (engine) return engine
|
|
138
|
-
|
|
139
|
-
await this.bootstrap()
|
|
140
|
-
const settings = this.pendingSettings.get(index)
|
|
141
|
-
engine = new PgEngine({
|
|
142
|
-
sql: this.resolveSql(),
|
|
143
|
-
schema: this.schemaName,
|
|
144
|
-
index,
|
|
145
|
-
language: settings?.language ?? this.defaultLanguage,
|
|
146
|
-
typoTolerance: this.typo,
|
|
147
|
-
ginFastUpdate: this.ginFastUpdate,
|
|
148
|
-
workMem: this.workMem,
|
|
149
|
-
settings,
|
|
150
|
-
})
|
|
151
|
-
this.engines.set(index, engine)
|
|
152
|
-
this.pendingSettings.delete(index)
|
|
153
|
-
return engine
|
|
154
|
-
}
|
|
427
|
+
private async resolveIndex(index: string): Promise<ResolvedIndex> {
|
|
428
|
+
const cached = this.indexes.get(index)
|
|
429
|
+
if (cached) return cached
|
|
155
430
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
431
|
+
const row = await this.exec().queryOne<{ settings: unknown; language: string }>(
|
|
432
|
+
`SELECT "settings", "language" FROM "${this.schema}"."_meta" WHERE "index_name" = $1`,
|
|
433
|
+
[index],
|
|
434
|
+
)
|
|
435
|
+
if (!row) {
|
|
436
|
+
throw new IndexNotFoundError(index, this.name)
|
|
162
437
|
}
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
438
|
+
const settings = (row.settings ?? {}) as IndexSettings
|
|
439
|
+
const resolved = resolveIndexSettings(settings, sanitizeLanguage(row.language))
|
|
440
|
+
this.indexes.set(index, resolved)
|
|
441
|
+
return resolved
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// ─── Helpers ────────────────────────────────────────────────────────────
|
|
446
|
+
|
|
447
|
+
const FTS_WEIGHTS: readonly ('A' | 'B' | 'C' | 'D')[] = ['A', 'B', 'C', 'D']
|
|
448
|
+
|
|
449
|
+
function resolveIndexSettings(settings: IndexSettings, language: string): ResolvedIndex {
|
|
450
|
+
const searchable = (settings.searchableAttributes ?? []).map((attr) => {
|
|
451
|
+
if (!isSafeAttribute(attr)) {
|
|
452
|
+
throw new SearchError(
|
|
453
|
+
`PostgresFtsDriver: searchable attribute "${attr}" contains illegal characters. ` +
|
|
454
|
+
`Must match /^[a-z_][a-z0-9_]*$/.`,
|
|
455
|
+
{ code: 'search.config' },
|
|
456
|
+
)
|
|
171
457
|
}
|
|
458
|
+
return attr
|
|
459
|
+
})
|
|
460
|
+
const filterable = new Set(
|
|
461
|
+
(settings.filterableAttributes ?? []).filter((a) => {
|
|
462
|
+
if (!isSafeAttribute(a)) {
|
|
463
|
+
throw new SearchError(
|
|
464
|
+
`PostgresFtsDriver: filterable attribute "${a}" contains illegal characters.`,
|
|
465
|
+
{ code: 'search.config' },
|
|
466
|
+
)
|
|
467
|
+
}
|
|
468
|
+
return true
|
|
469
|
+
}),
|
|
470
|
+
)
|
|
471
|
+
const sortable = new Set(
|
|
472
|
+
(settings.sortableAttributes ?? []).filter((a) => {
|
|
473
|
+
if (!isSafeAttribute(a)) {
|
|
474
|
+
throw new SearchError(
|
|
475
|
+
`PostgresFtsDriver: sortable attribute "${a}" contains illegal characters.`,
|
|
476
|
+
{ code: 'search.config' },
|
|
477
|
+
)
|
|
478
|
+
}
|
|
479
|
+
return true
|
|
480
|
+
}),
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
const ftsExpression = buildFtsExpression(searchable, language)
|
|
484
|
+
|
|
485
|
+
return { settings, language, searchable, filterable, sortable, ftsExpression }
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
/**
|
|
489
|
+
* Compose a SQL expression that produces a tsvector for the row
|
|
490
|
+
* being inserted. The JSONB document is bound as `$2::jsonb`.
|
|
491
|
+
*
|
|
492
|
+
* When `searchableAttributes` is set, each one becomes a weighted
|
|
493
|
+
* `setweight(to_tsvector(lang, coalesce(doc->>'<attr>', '')), 'A|B|C|D')`
|
|
494
|
+
* pegged to its position. When the list is empty we fall back to
|
|
495
|
+
* indexing every text leaf in the JSONB via `jsonb_path_query_array`
|
|
496
|
+
* → `array_to_string`, weighted 'A'.
|
|
497
|
+
*/
|
|
498
|
+
function buildFtsExpression(searchable: readonly string[], language: string): string {
|
|
499
|
+
if (searchable.length === 0) {
|
|
500
|
+
return (
|
|
501
|
+
`setweight(to_tsvector('${language}', ` +
|
|
502
|
+
`coalesce((SELECT string_agg(value::text, ' ') FROM jsonb_each_text($2::jsonb)), '')),` +
|
|
503
|
+
` 'A')`
|
|
504
|
+
)
|
|
172
505
|
}
|
|
506
|
+
return searchable
|
|
507
|
+
.map((attr, i) => {
|
|
508
|
+
const weight = FTS_WEIGHTS[Math.min(i, FTS_WEIGHTS.length - 1)]
|
|
509
|
+
return `setweight(to_tsvector('${language}', coalesce($2::jsonb->>'${attr}', '')), '${weight}')`
|
|
510
|
+
})
|
|
511
|
+
.join(' || ')
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
function projectAttributes(
|
|
515
|
+
document: Record<string, unknown>,
|
|
516
|
+
attributes: string[] | undefined,
|
|
517
|
+
): Record<string, unknown> {
|
|
518
|
+
if (!attributes || attributes.length === 0) return { ...document }
|
|
519
|
+
const out: Record<string, unknown> = {}
|
|
520
|
+
for (const attr of attributes) {
|
|
521
|
+
if (attr in document) out[attr] = document[attr]
|
|
522
|
+
}
|
|
523
|
+
if ('id' in document && !('id' in out)) out.id = document.id
|
|
524
|
+
return out
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
function isSafeAttribute(name: string): boolean {
|
|
528
|
+
return /^[a-z_][a-z0-9_]*$/.test(name)
|
|
529
|
+
}
|
|
173
530
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
531
|
+
function sanitizeLanguage(language: string): string {
|
|
532
|
+
// Postgres text-search configuration names are user-defined,
|
|
533
|
+
// but the built-ins ship with lowercase ASCII identifiers; we
|
|
534
|
+
// refuse anything else so the value can be spliced directly.
|
|
535
|
+
if (!/^[a-z_][a-z0-9_]*$/.test(language)) {
|
|
536
|
+
throw new SearchError(
|
|
537
|
+
`PostgresFtsDriver: invalid language ${JSON.stringify(language)} — must match /^[a-z_][a-z0-9_]*$/.`,
|
|
538
|
+
{ code: 'search.config' },
|
|
181
539
|
)
|
|
182
|
-
return this.bootstrapped
|
|
183
540
|
}
|
|
541
|
+
return language
|
|
184
542
|
}
|