@strav/search 0.4.30 → 1.0.0-alpha.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/package.json +20 -22
  2. package/src/console/index.ts +5 -0
  3. package/src/console/search_console_provider.ts +20 -0
  4. package/src/console/search_flush.ts +49 -0
  5. package/src/console/search_import.ts +103 -0
  6. package/src/console/search_list.ts +46 -0
  7. package/src/console/search_reindex.ts +94 -0
  8. package/src/drivers/meilisearch/meilisearch_driver.ts +304 -0
  9. package/src/drivers/memory/memory_driver.ts +344 -0
  10. package/src/drivers/postgres/apply_search_migration.ts +74 -0
  11. package/src/drivers/postgres/postgres_fts_driver.ts +493 -135
  12. package/src/drivers/typesense/typesense_driver.ts +345 -0
  13. package/src/index.ts +50 -39
  14. package/src/search_engine.ts +40 -25
  15. package/src/search_error.ts +86 -0
  16. package/src/search_manager.ts +112 -94
  17. package/src/search_provider.ts +68 -6
  18. package/src/searchable.ts +173 -160
  19. package/src/searchable_registry.ts +61 -0
  20. package/src/types.ts +59 -49
  21. package/README.md +0 -191
  22. package/src/commands/search_flush.ts +0 -41
  23. package/src/commands/search_import.ts +0 -43
  24. package/src/commands/search_optimize.ts +0 -52
  25. package/src/commands/search_rebuild.ts +0 -73
  26. package/src/drivers/algolia_driver.ts +0 -170
  27. package/src/drivers/embedded/embedded_driver.ts +0 -136
  28. package/src/drivers/embedded/engine/field_registry.ts +0 -97
  29. package/src/drivers/embedded/engine/fts_query_builder.ts +0 -184
  30. package/src/drivers/embedded/engine/query_compiler.ts +0 -134
  31. package/src/drivers/embedded/engine/schema.ts +0 -99
  32. package/src/drivers/embedded/engine/snippet_formatter.ts +0 -29
  33. package/src/drivers/embedded/engine/sqlite_engine.ts +0 -255
  34. package/src/drivers/embedded/engine/typo_expander.ts +0 -138
  35. package/src/drivers/embedded/errors.ts +0 -15
  36. package/src/drivers/embedded/filters/filter_compiler.ts +0 -136
  37. package/src/drivers/embedded/index.ts +0 -3
  38. package/src/drivers/embedded/storage/paths.ts +0 -23
  39. package/src/drivers/embedded/types.ts +0 -34
  40. package/src/drivers/meilisearch_driver.ts +0 -150
  41. package/src/drivers/null_driver.ts +0 -27
  42. package/src/drivers/postgres/engine/field_registry.ts +0 -116
  43. package/src/drivers/postgres/engine/fts_query_builder.ts +0 -105
  44. package/src/drivers/postgres/engine/pg_engine.ts +0 -300
  45. package/src/drivers/postgres/engine/query_compiler.ts +0 -165
  46. package/src/drivers/postgres/engine/schema.ts +0 -187
  47. package/src/drivers/postgres/engine/snippet_formatter.ts +0 -31
  48. package/src/drivers/postgres/engine/typo_expander.ts +0 -131
  49. package/src/drivers/postgres/errors.ts +0 -33
  50. package/src/drivers/postgres/filters/filter_compiler.ts +0 -138
  51. package/src/drivers/postgres/index.ts +0 -14
  52. package/src/drivers/postgres/rebuild/rebuild_inplace.ts +0 -113
  53. package/src/drivers/postgres/storage/identifiers.ts +0 -46
  54. package/src/drivers/postgres/types.ts +0 -53
  55. package/src/drivers/typesense_driver.ts +0 -229
  56. package/src/errors.ts +0 -18
  57. package/src/helpers.ts +0 -120
  58. package/stubs/config/search.ts +0 -57
  59. package/tsconfig.json +0 -5
@@ -1,150 +0,0 @@
1
- import { ExternalServiceError } from '@strav/kernel'
2
- import type { SearchEngine } from '../search_engine.ts'
3
- import type {
4
- SearchDocument,
5
- SearchOptions,
6
- SearchResult,
7
- SearchHit,
8
- IndexSettings,
9
- DriverConfig,
10
- } from '../types.ts'
11
-
12
- /**
13
- * Meilisearch driver — communicates with the Meilisearch REST API via raw `fetch()`.
14
- *
15
- * @see https://www.meilisearch.com/docs/reference/api/overview
16
- */
17
- export class MeilisearchDriver implements SearchEngine {
18
- readonly name = 'meilisearch'
19
- private baseUrl: string
20
- private apiKey: string
21
-
22
- constructor(config: DriverConfig) {
23
- const protocol = config.protocol ?? 'http'
24
- const host = config.host ?? 'localhost'
25
- const port = config.port ?? 7700
26
- this.baseUrl = `${protocol}://${host}:${port}`
27
- this.apiKey = (config.apiKey as string) ?? ''
28
- }
29
-
30
- // ── Interface ────────────────────────────────────────────────────────────
31
-
32
- async upsert(
33
- index: string,
34
- id: string | number,
35
- document: Record<string, unknown>
36
- ): Promise<void> {
37
- await this.request('POST', `/indexes/${encodeURIComponent(index)}/documents`, [
38
- { id, ...document },
39
- ])
40
- }
41
-
42
- async upsertMany(index: string, documents: SearchDocument[]): Promise<void> {
43
- await this.request('POST', `/indexes/${encodeURIComponent(index)}/documents`, documents)
44
- }
45
-
46
- async delete(index: string, id: string | number): Promise<void> {
47
- await this.request(
48
- 'DELETE',
49
- `/indexes/${encodeURIComponent(index)}/documents/${encodeURIComponent(String(id))}`
50
- )
51
- }
52
-
53
- async deleteMany(index: string, ids: Array<string | number>): Promise<void> {
54
- await this.request('POST', `/indexes/${encodeURIComponent(index)}/documents/delete-batch`, ids)
55
- }
56
-
57
- async flush(index: string): Promise<void> {
58
- await this.request('DELETE', `/indexes/${encodeURIComponent(index)}/documents`)
59
- }
60
-
61
- async deleteIndex(index: string): Promise<void> {
62
- await this.request('DELETE', `/indexes/${encodeURIComponent(index)}`)
63
- }
64
-
65
- async createIndex(index: string, options?: IndexSettings): Promise<void> {
66
- await this.request('POST', '/indexes', {
67
- uid: index,
68
- primaryKey: options?.primaryKey ?? 'id',
69
- })
70
-
71
- if (options) {
72
- const settings: Record<string, unknown> = {}
73
- if (options.searchableAttributes) settings.searchableAttributes = options.searchableAttributes
74
- if (options.displayedAttributes) settings.displayedAttributes = options.displayedAttributes
75
- if (options.filterableAttributes) settings.filterableAttributes = options.filterableAttributes
76
- if (options.sortableAttributes) settings.sortableAttributes = options.sortableAttributes
77
-
78
- if (Object.keys(settings).length > 0) {
79
- await this.request('PATCH', `/indexes/${encodeURIComponent(index)}/settings`, settings)
80
- }
81
- }
82
- }
83
-
84
- async search(index: string, query: string, options?: SearchOptions): Promise<SearchResult> {
85
- const perPage = options?.perPage ?? 20
86
- const page = options?.page ?? 1
87
-
88
- const body: Record<string, unknown> = { q: query, limit: perPage, offset: (page - 1) * perPage }
89
-
90
- if (options?.filter) {
91
- body.filter =
92
- typeof options.filter === 'string' ? options.filter : this.buildFilter(options.filter)
93
- }
94
- if (options?.sort) body.sort = options.sort
95
- if (options?.attributesToRetrieve) body.attributesToRetrieve = options.attributesToRetrieve
96
- if (options?.attributesToHighlight) {
97
- body.attributesToHighlight = options.attributesToHighlight
98
- }
99
-
100
- const data = await this.request('POST', `/indexes/${encodeURIComponent(index)}/search`, body)
101
-
102
- return {
103
- hits: (data.hits ?? []).map(
104
- (hit: any): SearchHit => ({
105
- document: hit,
106
- highlights: hit._formatted,
107
- })
108
- ),
109
- totalHits: data.estimatedTotalHits ?? data.totalHits ?? 0,
110
- page,
111
- perPage,
112
- processingTimeMs: data.processingTimeMs,
113
- }
114
- }
115
-
116
- // ── Private ──────────────────────────────────────────────────────────────
117
-
118
- private headers(): Record<string, string> {
119
- const h: Record<string, string> = { 'content-type': 'application/json' }
120
- if (this.apiKey) h['authorization'] = `Bearer ${this.apiKey}`
121
- return h
122
- }
123
-
124
- private async request(method: string, path: string, body?: unknown): Promise<any> {
125
- const response = await fetch(`${this.baseUrl}${path}`, {
126
- method,
127
- headers: this.headers(),
128
- body: body !== undefined ? JSON.stringify(body) : undefined,
129
- })
130
-
131
- if (!response.ok) {
132
- const text = await response.text()
133
- throw new ExternalServiceError('Meilisearch', response.status, text)
134
- }
135
-
136
- if (response.status === 204 || response.headers.get('content-length') === '0') return null
137
- return response.json()
138
- }
139
-
140
- private buildFilter(filter: Record<string, unknown>): string {
141
- return Object.entries(filter)
142
- .map(([key, value]) => {
143
- if (Array.isArray(value)) {
144
- return `${key} IN [${value.map(v => JSON.stringify(v)).join(', ')}]`
145
- }
146
- return `${key} = ${JSON.stringify(value)}`
147
- })
148
- .join(' AND ')
149
- }
150
- }
@@ -1,27 +0,0 @@
1
- import type { SearchEngine } from '../search_engine.ts'
2
- import type { SearchDocument, SearchOptions, SearchResult, IndexSettings } from '../types.ts'
3
-
4
- /**
5
- * No-op search driver — silently discards all writes and returns empty results.
6
- *
7
- * Useful when search is disabled or during testing.
8
- */
9
- export class NullDriver implements SearchEngine {
10
- readonly name = 'null'
11
-
12
- async upsert(
13
- _index: string,
14
- _id: string | number,
15
- _document: Record<string, unknown>
16
- ): Promise<void> {}
17
- async upsertMany(_index: string, _documents: SearchDocument[]): Promise<void> {}
18
- async delete(_index: string, _id: string | number): Promise<void> {}
19
- async deleteMany(_index: string, _ids: Array<string | number>): Promise<void> {}
20
- async flush(_index: string): Promise<void> {}
21
- async deleteIndex(_index: string): Promise<void> {}
22
- async createIndex(_index: string, _options?: IndexSettings): Promise<void> {}
23
-
24
- async search(_index: string, _query: string, options?: SearchOptions): Promise<SearchResult> {
25
- return { hits: [], totalHits: 0, page: options?.page ?? 1, perPage: options?.perPage ?? 20 }
26
- }
27
- }
@@ -1,116 +0,0 @@
1
- import type { PgIndexSettings } from '../types.ts'
2
-
3
- /** Default searchable column when no `searchableAttributes` are configured. */
4
- export const DEFAULT_TEXT_COLUMN = '_text'
5
-
6
- /** FTS5 weight tiers in declaration order. */
7
- const WEIGHT_TIERS = ['A', 'B', 'C', 'D'] as const
8
- type WeightTier = (typeof WEIGHT_TIERS)[number]
9
-
10
- /** Postgres column type derived from a sample value, or `text` as the conservative default. */
11
- type PgType = 'text' | 'integer' | 'bigint' | 'double precision' | 'boolean' | 'timestamptz'
12
-
13
- export interface TypedColumnSpec {
14
- name: string
15
- pgType: PgType
16
- /** JSONB extraction expression: `(doc->>'name')::pgType` (cast suppressed for text). */
17
- expression: string
18
- }
19
-
20
- /**
21
- * The schema layout for one index: which document attributes feed which
22
- * tsvector segment + weight, and which typed columns exist for filter/sort.
23
- *
24
- * Mirrors `embedded/engine/field_registry.ts` so the two drivers project
25
- * documents identically. Differences:
26
- * - Per-attribute weight tier (A/B/C/D) is explicit.
27
- * - Typed columns are emitted as `GENERATED ALWAYS AS (...) STORED` SQL.
28
- */
29
- export class FieldRegistry {
30
- readonly searchable: string[]
31
- readonly weights: Map<string, WeightTier>
32
- readonly filterable: string[]
33
- readonly sortable: string[]
34
- readonly typedColumns: TypedColumnSpec[]
35
- readonly primaryKey: string
36
- readonly language: string
37
-
38
- constructor(settings?: PgIndexSettings, language = 'english') {
39
- this.primaryKey = settings?.primaryKey ?? 'id'
40
- this.language = settings?.language ?? language
41
- this.searchable =
42
- settings?.searchableAttributes && settings.searchableAttributes.length > 0
43
- ? [...settings.searchableAttributes]
44
- : [DEFAULT_TEXT_COLUMN]
45
-
46
- this.weights = new Map()
47
- for (let i = 0; i < this.searchable.length; i++) {
48
- const attr = this.searchable[i]!
49
- const tier = (settings?.weights?.[attr] ?? WEIGHT_TIERS[Math.min(i, 3)]) as WeightTier
50
- this.weights.set(attr, tier)
51
- }
52
-
53
- this.filterable = settings?.filterableAttributes ?? []
54
- this.sortable = settings?.sortableAttributes ?? []
55
-
56
- const seen = new Set<string>()
57
- const typed: TypedColumnSpec[] = []
58
- for (const attr of [...this.filterable, ...this.sortable]) {
59
- if (seen.has(attr)) continue
60
- seen.add(attr)
61
- typed.push({ name: attr, pgType: 'text', expression: `(doc->>${literal(attr)})` })
62
- }
63
- this.typedColumns = typed
64
- }
65
-
66
- get usesDefaultTextColumn(): boolean {
67
- return this.searchable.length === 1 && this.searchable[0] === DEFAULT_TEXT_COLUMN
68
- }
69
-
70
- /**
71
- * Project a document into [text, tier] pairs for tsvector construction.
72
- * Default mode collapses every string into one A-weighted blob.
73
- */
74
- projectFtsSegments(document: Record<string, unknown>): Array<{ text: string; tier: WeightTier }> {
75
- if (this.usesDefaultTextColumn) {
76
- return [{ text: collectStrings(document), tier: 'A' }]
77
- }
78
- return this.searchable.map(attr => ({
79
- text: coerceText(document[attr]),
80
- tier: this.weights.get(attr)!,
81
- }))
82
- }
83
-
84
- /** Single string spanning all searchable text (for terms-dict tokenization). */
85
- concatSearchableText(document: Record<string, unknown>): string {
86
- return this.projectFtsSegments(document)
87
- .map(s => s.text)
88
- .filter(Boolean)
89
- .join(' ')
90
- }
91
- }
92
-
93
- function literal(value: string): string {
94
- return `'${value.replace(/'/g, "''")}'`
95
- }
96
-
97
- function coerceText(value: unknown): string {
98
- if (value === null || value === undefined) return ''
99
- if (typeof value === 'string') return value
100
- if (Array.isArray(value)) return value.map(v => coerceText(v)).filter(Boolean).join(' ')
101
- if (typeof value === 'number' || typeof value === 'boolean') return String(value)
102
- return ''
103
- }
104
-
105
- function collectStrings(document: Record<string, unknown>): string {
106
- const parts: string[] = []
107
- for (const value of Object.values(document)) {
108
- if (typeof value === 'string' && value.length > 0) parts.push(value)
109
- else if (Array.isArray(value)) {
110
- for (const item of value) {
111
- if (typeof item === 'string' && item.length > 0) parts.push(item)
112
- }
113
- }
114
- }
115
- return parts.join(' ')
116
- }
@@ -1,105 +0,0 @@
1
- import { quoteLiteral } from '../storage/identifiers.ts'
2
-
3
- /**
4
- * Translate a user-facing query string into one that's safe for
5
- * `websearch_to_tsquery`, plus extract positive tokens for typo expansion.
6
- *
7
- * websearch_to_tsquery already accepts Google-style syntax:
8
- * - `"foo bar"` — phrase
9
- * - `-foo` — exclude
10
- * - `OR`/`AND` — boolean (case-insensitive)
11
- *
12
- * It does NOT support prefix matching (`foo*`); we recognise that ourselves
13
- * and emit a separate `to_tsquery('foo:*')` ORed onto the result.
14
- */
15
- export interface ParsedQuery {
16
- /** The raw query, ready to pass to `websearch_to_tsquery`. */
17
- websearch: string
18
- /** Positive bare tokens (no quotes/operators) — used for typo expansion. */
19
- positiveTokens: string[]
20
- /** Prefix tokens from `foo*` syntax — emitted separately to `to_tsquery`. */
21
- prefixTokens: string[]
22
- /** Whether the input was effectively empty. */
23
- isEmpty: boolean
24
- }
25
-
26
- const PHRASE_RE = /"([^"]*)"/g
27
-
28
- export function parseQuery(input: string): ParsedQuery {
29
- const trimmed = input.trim()
30
- if (!trimmed) {
31
- return { websearch: '', positiveTokens: [], prefixTokens: [], isEmpty: true }
32
- }
33
-
34
- const positiveTokens: string[] = []
35
- const prefixTokens: string[] = []
36
-
37
- // Strip phrases first so we don't tokenize their inner whitespace.
38
- const scratch = trimmed.replace(PHRASE_RE, ' ')
39
- for (const raw of scratch.split(/\s+/)) {
40
- if (!raw) continue
41
- let text = raw
42
- if (text.startsWith('-') || text.startsWith('+')) text = text.slice(1)
43
- if (text.endsWith('*')) {
44
- const stem = text.slice(0, -1).toLowerCase().replace(/[^\p{L}\p{N}_-]/gu, '')
45
- if (stem) prefixTokens.push(stem)
46
- continue
47
- }
48
- if (text.toUpperCase() === 'AND' || text.toUpperCase() === 'OR') continue
49
- const norm = text.toLowerCase().replace(/[^\p{L}\p{N}_-]/gu, '')
50
- if (norm.length >= 2) positiveTokens.push(norm)
51
- }
52
-
53
- return { websearch: trimmed, positiveTokens, prefixTokens, isEmpty: false }
54
- }
55
-
56
- /**
57
- * Build a tsquery SQL expression that ORs together the user's websearch query,
58
- * any prefix tokens, and any typo-expanded alternatives. Returns the
59
- * expression + the user-text bindings (the language is embedded as a literal
60
- * since it's a per-index server-controlled value, not user input).
61
- *
62
- * `startAt` is the placeholder counter the caller has already used. Returned
63
- * `paramCount` lets the caller continue numbering for filter/limit/offset.
64
- */
65
- export function buildTsqueryExpression(
66
- parsed: ParsedQuery,
67
- expansions: Map<string, string[]>,
68
- language: string,
69
- startAt = 0
70
- ): { sql: string; params: string[]; paramCount: number } {
71
- const params: string[] = []
72
- const fragments: string[] = []
73
- const lang = `${quoteLiteral(language)}::regconfig`
74
- let cursor = startAt
75
- const ph = () => `$${++cursor}`
76
-
77
- if (parsed.websearch) {
78
- params.push(parsed.websearch)
79
- fragments.push(`websearch_to_tsquery(${lang}, ${ph()})`)
80
- }
81
-
82
- for (const stem of parsed.prefixTokens) {
83
- params.push(`${stem}:*`)
84
- fragments.push(`to_tsquery(${lang}, ${ph()})`)
85
- }
86
-
87
- for (const token of parsed.positiveTokens) {
88
- const cands = expansions.get(token)
89
- if (!cands || cands.length === 0) continue
90
- const expr = cands.map(sanitiseTsTerm).filter(Boolean).join(' | ')
91
- if (!expr) continue
92
- params.push(expr)
93
- fragments.push(`to_tsquery(${lang}, ${ph()})`)
94
- }
95
-
96
- if (fragments.length === 0) {
97
- return { sql: '', params: [], paramCount: 0 }
98
- }
99
- return { sql: fragments.join(' || '), params, paramCount: cursor - startAt }
100
- }
101
-
102
- /** Sanitise a single term for inclusion in a manually built tsquery. */
103
- function sanitiseTsTerm(term: string): string {
104
- return term.toLowerCase().replace(/[^\p{L}\p{N}_-]/gu, '')
105
- }
@@ -1,300 +0,0 @@
1
- import type { SQL } from 'bun'
2
- import type {
3
- SearchDocument,
4
- SearchOptions,
5
- SearchResult,
6
- SearchHit,
7
- } from '../../../types.ts'
8
- import type { PgIndexSettings, ResolvedTypoTolerance } from '../types.ts'
9
- import { FieldRegistry } from './field_registry.ts'
10
- import { ensureIndexTable, dropIndex as dropIndexSchema } from './schema.ts'
11
- import { parseQuery, buildTsqueryExpression } from './fts_query_builder.ts'
12
- import { compileSearch } from './query_compiler.ts'
13
- import { formatSnippet } from './snippet_formatter.ts'
14
- import {
15
- expandTokens,
16
- hasFuzzystrmatch,
17
- recordTerms,
18
- unrecordTerms,
19
- } from './typo_expander.ts'
20
- import {
21
- indexTableName,
22
- termsTableName,
23
- quoteIdent,
24
- quoteLiteral,
25
- } from '../storage/identifiers.ts'
26
- import { rebuildInPlace, type RebuildOptions } from '../rebuild/rebuild_inplace.ts'
27
-
28
- export interface PgEngineOptions {
29
- sql: SQL
30
- schema: string
31
- index: string
32
- language: string
33
- typoTolerance: ResolvedTypoTolerance
34
- ginFastUpdate: boolean
35
- workMem: string | null
36
- settings?: PgIndexSettings
37
- }
38
-
39
- /** Postgres tsvector silently truncates at ~1MB lexemes. Truncate inputs to be safe. */
40
- const MAX_TEXT_BYTES = 900_000
41
-
42
- /** One PgEngine wraps a single index. */
43
- export class PgEngine {
44
- readonly registry: FieldRegistry
45
- private readonly sql: SQL
46
- private readonly schema: string
47
- private readonly index: string
48
- private readonly typo: ResolvedTypoTolerance
49
- private readonly ginFastUpdate: boolean
50
- private readonly workMem: string | null
51
- private readonly tableName: string
52
- private fuzzyAvailable: boolean | null = null
53
- private ensured = false
54
-
55
- constructor(opts: PgEngineOptions) {
56
- this.sql = opts.sql
57
- this.schema = opts.schema
58
- this.index = opts.index
59
- this.typo = opts.typoTolerance
60
- this.ginFastUpdate = opts.ginFastUpdate
61
- this.workMem = opts.workMem
62
- this.registry = new FieldRegistry(opts.settings, opts.language)
63
- this.tableName = indexTableName(opts.schema, opts.index)
64
- }
65
-
66
- /** Lazy: ensure the table + indexes + trigger exist. Idempotent. */
67
- async ensure(): Promise<void> {
68
- if (this.ensured) return
69
- await ensureIndexTable(this.sql, this.schema, this.index, this.registry, this.ginFastUpdate)
70
- if (this.typo.enabled && this.fuzzyAvailable === null) {
71
- this.fuzzyAvailable = await hasFuzzystrmatch(this.sql)
72
- }
73
- this.ensured = true
74
- }
75
-
76
- // ── Writes ──────────────────────────────────────────────────────────────
77
-
78
- async upsert(id: string | number, document: Record<string, unknown>): Promise<void> {
79
- await this.upsertMany([{ id, ...document }])
80
- }
81
-
82
- async upsertMany(documents: SearchDocument[]): Promise<void> {
83
- if (documents.length === 0) return
84
- await this.ensure()
85
-
86
- await this.sql.begin(async (tx: SQL) => {
87
- for (const raw of documents) {
88
- const { id, ...rest } = raw
89
- const idStr = String(id)
90
- // Bun's SQL treats stringified JSON as a JSONB string value (double-
91
- // encoding the JSON). Passing the object directly lets it generate
92
- // proper JSONB so `doc->>'field'` works for the typed generated cols.
93
- const doc = { id, ...(rest as Record<string, unknown>) }
94
- const newText = truncate(this.registry.concatSearchableText(rest as Record<string, unknown>))
95
-
96
- const oldRows = (await tx.unsafe(
97
- `SELECT doc FROM ${this.tableName} WHERE id = $1`,
98
- [idStr]
99
- )) as Array<{ doc: Record<string, unknown> | string }>
100
- if (oldRows.length > 0) {
101
- const oldDoc = parseDoc(oldRows[0]!.doc)
102
- const oldText = this.registry.concatSearchableText(oldDoc)
103
- if (this.typo.enabled) await unrecordTerms(tx, this.schema, this.index, oldText)
104
- }
105
-
106
- const ftsExpr = this.buildFtsExpression(rest as Record<string, unknown>)
107
- const sqlStr =
108
- `INSERT INTO ${this.tableName} (id, doc, fts) VALUES ($1, $2, ${ftsExpr.sql}) ` +
109
- `ON CONFLICT (id) DO UPDATE SET doc = EXCLUDED.doc, fts = EXCLUDED.fts`
110
- await tx.unsafe(sqlStr, [idStr, doc as any, ...ftsExpr.params])
111
-
112
- if (this.typo.enabled) await recordTerms(tx, this.schema, this.index, newText)
113
- }
114
- })
115
- }
116
-
117
- async delete(id: string | number): Promise<void> {
118
- await this.deleteMany([id])
119
- }
120
-
121
- async deleteMany(ids: Array<string | number>): Promise<void> {
122
- if (ids.length === 0) return
123
- await this.ensure()
124
-
125
- await this.sql.begin(async (tx: SQL) => {
126
- const idStrs = ids.map(String)
127
- const placeholders = idStrs.map((_, i) => `$${i + 1}`).join(', ')
128
-
129
- if (this.typo.enabled) {
130
- const rows = (await tx.unsafe(
131
- `SELECT doc FROM ${this.tableName} WHERE id IN (${placeholders})`,
132
- idStrs
133
- )) as Array<{ doc: Record<string, unknown> | string }>
134
- for (const r of rows) {
135
- const oldDoc = parseDoc(r.doc)
136
- await unrecordTerms(tx, this.schema, this.index, this.registry.concatSearchableText(oldDoc))
137
- }
138
- }
139
-
140
- await tx.unsafe(
141
- `DELETE FROM ${this.tableName} WHERE id IN (${placeholders})`,
142
- idStrs
143
- )
144
- })
145
- }
146
-
147
- async flush(): Promise<void> {
148
- await this.ensure()
149
- await this.sql.begin(async (tx: SQL) => {
150
- await tx.unsafe(`TRUNCATE ${this.tableName}`)
151
- if (this.typo.enabled) {
152
- await tx.unsafe(`TRUNCATE ${termsTableName(this.schema, this.index)}`)
153
- }
154
- })
155
- }
156
-
157
- async drop(): Promise<void> {
158
- await dropIndexSchema(this.sql, this.schema, this.index)
159
- this.ensured = false
160
- }
161
-
162
- // ── Reads ───────────────────────────────────────────────────────────────
163
-
164
- async search(query: string, options?: SearchOptions): Promise<SearchResult> {
165
- await this.ensure()
166
- const start = performance.now()
167
- const opts = options ?? {}
168
- const parsed = parseQuery(query)
169
-
170
- const expansions = await this.maybeExpand(parsed.positiveTokens)
171
- const tsquery = buildTsqueryExpression(parsed, expansions, this.registry.language)
172
-
173
- const compiled = compileSearch({
174
- registry: this.registry,
175
- schema: this.schema,
176
- index: this.index,
177
- tsquery: { sql: tsquery.sql, params: tsquery.params },
178
- search: opts,
179
- })
180
-
181
- const result = await this.sql.begin(async (tx: SQL) => {
182
- if (this.workMem) {
183
- await tx.unsafe(`SET LOCAL work_mem = ${quoteLiteral(this.workMem)}`)
184
- }
185
- const rows = (await tx.unsafe(compiled.sql, compiled.params)) as RawHitRow[]
186
- const totalRows = (await tx.unsafe(compiled.countSql, compiled.countParams)) as Array<{
187
- n: number
188
- }>
189
- return { rows, total: totalRows[0]?.n ?? rows.length }
190
- })
191
-
192
- const projection = opts.attributesToRetrieve
193
- const hits: SearchHit[] = result.rows.map(row =>
194
- projectHit(row, compiled.snippetColumns, projection)
195
- )
196
-
197
- return {
198
- hits,
199
- totalHits: result.total,
200
- page: Math.max(1, opts.page ?? 1),
201
- perPage: Math.max(1, opts.perPage ?? 20),
202
- processingTimeMs: Math.round(performance.now() - start),
203
- }
204
- }
205
-
206
- /** REINDEX the GIN index. Periodic maintenance for write-heavy indexes. */
207
- async optimize(): Promise<void> {
208
- await this.ensure()
209
- const ginName = `${quoteIdent(this.schema)}.${quoteIdent(`search_${this.index}_fts_gin`)}`
210
- await this.sql.unsafe(`REINDEX INDEX ${ginName}`)
211
- }
212
-
213
- /**
214
- * Recompute every row's `fts` using the current registry's language + weight
215
- * scheme. Auto-picks tier (in-place vs batched) by row count; throws on
216
- * tables larger than the supported tier-2 ceiling.
217
- */
218
- async rebuild(options?: RebuildOptions) {
219
- await this.ensure()
220
- return rebuildInPlace(this.sql, this.schema, this.index, this.registry, options)
221
- }
222
-
223
- // ── Internals ───────────────────────────────────────────────────────────
224
-
225
- private buildFtsExpression(document: Record<string, unknown>): {
226
- sql: string
227
- params: string[]
228
- } {
229
- const segments = this.registry.projectFtsSegments(document)
230
- const lang = `${quoteLiteral(this.registry.language)}::regconfig`
231
- const params: string[] = []
232
- const fragments = segments.map(seg => {
233
- params.push(truncate(seg.text))
234
- return `setweight(to_tsvector(${lang}, $${params.length + 2}), '${seg.tier}')`
235
- })
236
- // The `+2` above accounts for the leading id ($1) and doc ($2) bindings
237
- // that callers prepend. Caller MUST keep those positions stable.
238
- return { sql: fragments.join(' || '), params }
239
- }
240
-
241
- private async maybeExpand(tokens: string[]): Promise<Map<string, string[]>> {
242
- if (!this.typo.enabled || tokens.length === 0) return new Map()
243
- return expandTokens(
244
- this.sql,
245
- this.schema,
246
- this.index,
247
- tokens,
248
- this.typo,
249
- this.fuzzyAvailable === true
250
- )
251
- }
252
- }
253
-
254
- interface RawHitRow {
255
- id: string
256
- doc: Record<string, unknown> | string
257
- score: number
258
- [snippetCol: string]: unknown
259
- }
260
-
261
- function projectHit(
262
- row: RawHitRow,
263
- snippetCols: string[],
264
- attributesToRetrieve: string[] | undefined
265
- ): SearchHit {
266
- const document = parseDoc(row.doc)
267
-
268
- let projected = document
269
- if (attributesToRetrieve && attributesToRetrieve.length > 0) {
270
- const out: Record<string, unknown> = {}
271
- for (const attr of attributesToRetrieve) {
272
- if (attr in document) out[attr] = document[attr]
273
- }
274
- projected = out
275
- }
276
-
277
- const hit: SearchHit = { document: projected }
278
-
279
- if (snippetCols.length > 0) {
280
- const highlights: Record<string, string> = {}
281
- for (const col of snippetCols) {
282
- const raw = row[`__snip_${col}`] as string | null | undefined
283
- if (raw) highlights[col] = formatSnippet(raw)
284
- }
285
- if (Object.keys(highlights).length > 0) hit.highlights = highlights
286
- }
287
-
288
- return hit
289
- }
290
-
291
- function parseDoc(doc: Record<string, unknown> | string): Record<string, unknown> {
292
- if (typeof doc === 'string') return JSON.parse(doc) as Record<string, unknown>
293
- return doc
294
- }
295
-
296
- function truncate(text: string): string {
297
- if (Buffer.byteLength(text, 'utf8') <= MAX_TEXT_BYTES) return text
298
- // Truncate by char count; over-conservative is fine.
299
- return text.slice(0, MAX_TEXT_BYTES)
300
- }