@strav/search 0.4.31 → 1.0.0-alpha.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +20 -22
- package/src/console/index.ts +5 -0
- package/src/console/search_console_provider.ts +20 -0
- package/src/console/search_flush.ts +49 -0
- package/src/console/search_import.ts +103 -0
- package/src/console/search_list.ts +46 -0
- package/src/console/search_reindex.ts +94 -0
- package/src/drivers/meilisearch/meilisearch_driver.ts +304 -0
- package/src/drivers/memory/memory_driver.ts +344 -0
- package/src/drivers/postgres/apply_search_migration.ts +74 -0
- package/src/drivers/postgres/postgres_fts_driver.ts +493 -135
- package/src/drivers/typesense/typesense_driver.ts +345 -0
- package/src/index.ts +50 -39
- package/src/search_engine.ts +40 -25
- package/src/search_error.ts +86 -0
- package/src/search_manager.ts +112 -94
- package/src/search_provider.ts +68 -6
- package/src/searchable.ts +173 -160
- package/src/searchable_registry.ts +61 -0
- package/src/types.ts +59 -49
- package/README.md +0 -191
- package/src/commands/search_flush.ts +0 -41
- package/src/commands/search_import.ts +0 -43
- package/src/commands/search_optimize.ts +0 -52
- package/src/commands/search_rebuild.ts +0 -73
- package/src/drivers/algolia_driver.ts +0 -170
- package/src/drivers/embedded/embedded_driver.ts +0 -136
- package/src/drivers/embedded/engine/field_registry.ts +0 -97
- package/src/drivers/embedded/engine/fts_query_builder.ts +0 -184
- package/src/drivers/embedded/engine/query_compiler.ts +0 -134
- package/src/drivers/embedded/engine/schema.ts +0 -99
- package/src/drivers/embedded/engine/snippet_formatter.ts +0 -29
- package/src/drivers/embedded/engine/sqlite_engine.ts +0 -255
- package/src/drivers/embedded/engine/typo_expander.ts +0 -138
- package/src/drivers/embedded/errors.ts +0 -15
- package/src/drivers/embedded/filters/filter_compiler.ts +0 -136
- package/src/drivers/embedded/index.ts +0 -3
- package/src/drivers/embedded/storage/paths.ts +0 -23
- package/src/drivers/embedded/types.ts +0 -34
- package/src/drivers/meilisearch_driver.ts +0 -150
- package/src/drivers/null_driver.ts +0 -27
- package/src/drivers/postgres/engine/field_registry.ts +0 -116
- package/src/drivers/postgres/engine/fts_query_builder.ts +0 -105
- package/src/drivers/postgres/engine/pg_engine.ts +0 -300
- package/src/drivers/postgres/engine/query_compiler.ts +0 -165
- package/src/drivers/postgres/engine/schema.ts +0 -187
- package/src/drivers/postgres/engine/snippet_formatter.ts +0 -31
- package/src/drivers/postgres/engine/typo_expander.ts +0 -131
- package/src/drivers/postgres/errors.ts +0 -33
- package/src/drivers/postgres/filters/filter_compiler.ts +0 -138
- package/src/drivers/postgres/index.ts +0 -14
- package/src/drivers/postgres/rebuild/rebuild_inplace.ts +0 -113
- package/src/drivers/postgres/storage/identifiers.ts +0 -46
- package/src/drivers/postgres/types.ts +0 -53
- package/src/drivers/typesense_driver.ts +0 -229
- package/src/errors.ts +0 -18
- package/src/helpers.ts +0 -120
- package/stubs/config/search.ts +0 -57
- package/tsconfig.json +0 -5
|
@@ -1,150 +0,0 @@
|
|
|
1
|
-
import { ExternalServiceError } from '@strav/kernel'
|
|
2
|
-
import type { SearchEngine } from '../search_engine.ts'
|
|
3
|
-
import type {
|
|
4
|
-
SearchDocument,
|
|
5
|
-
SearchOptions,
|
|
6
|
-
SearchResult,
|
|
7
|
-
SearchHit,
|
|
8
|
-
IndexSettings,
|
|
9
|
-
DriverConfig,
|
|
10
|
-
} from '../types.ts'
|
|
11
|
-
|
|
12
|
-
/**
|
|
13
|
-
* Meilisearch driver — communicates with the Meilisearch REST API via raw `fetch()`.
|
|
14
|
-
*
|
|
15
|
-
* @see https://www.meilisearch.com/docs/reference/api/overview
|
|
16
|
-
*/
|
|
17
|
-
export class MeilisearchDriver implements SearchEngine {
|
|
18
|
-
readonly name = 'meilisearch'
|
|
19
|
-
private baseUrl: string
|
|
20
|
-
private apiKey: string
|
|
21
|
-
|
|
22
|
-
constructor(config: DriverConfig) {
|
|
23
|
-
const protocol = config.protocol ?? 'http'
|
|
24
|
-
const host = config.host ?? 'localhost'
|
|
25
|
-
const port = config.port ?? 7700
|
|
26
|
-
this.baseUrl = `${protocol}://${host}:${port}`
|
|
27
|
-
this.apiKey = (config.apiKey as string) ?? ''
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
// ── Interface ────────────────────────────────────────────────────────────
|
|
31
|
-
|
|
32
|
-
async upsert(
|
|
33
|
-
index: string,
|
|
34
|
-
id: string | number,
|
|
35
|
-
document: Record<string, unknown>
|
|
36
|
-
): Promise<void> {
|
|
37
|
-
await this.request('POST', `/indexes/${encodeURIComponent(index)}/documents`, [
|
|
38
|
-
{ id, ...document },
|
|
39
|
-
])
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
async upsertMany(index: string, documents: SearchDocument[]): Promise<void> {
|
|
43
|
-
await this.request('POST', `/indexes/${encodeURIComponent(index)}/documents`, documents)
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
async delete(index: string, id: string | number): Promise<void> {
|
|
47
|
-
await this.request(
|
|
48
|
-
'DELETE',
|
|
49
|
-
`/indexes/${encodeURIComponent(index)}/documents/${encodeURIComponent(String(id))}`
|
|
50
|
-
)
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
async deleteMany(index: string, ids: Array<string | number>): Promise<void> {
|
|
54
|
-
await this.request('POST', `/indexes/${encodeURIComponent(index)}/documents/delete-batch`, ids)
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
async flush(index: string): Promise<void> {
|
|
58
|
-
await this.request('DELETE', `/indexes/${encodeURIComponent(index)}/documents`)
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
async deleteIndex(index: string): Promise<void> {
|
|
62
|
-
await this.request('DELETE', `/indexes/${encodeURIComponent(index)}`)
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
async createIndex(index: string, options?: IndexSettings): Promise<void> {
|
|
66
|
-
await this.request('POST', '/indexes', {
|
|
67
|
-
uid: index,
|
|
68
|
-
primaryKey: options?.primaryKey ?? 'id',
|
|
69
|
-
})
|
|
70
|
-
|
|
71
|
-
if (options) {
|
|
72
|
-
const settings: Record<string, unknown> = {}
|
|
73
|
-
if (options.searchableAttributes) settings.searchableAttributes = options.searchableAttributes
|
|
74
|
-
if (options.displayedAttributes) settings.displayedAttributes = options.displayedAttributes
|
|
75
|
-
if (options.filterableAttributes) settings.filterableAttributes = options.filterableAttributes
|
|
76
|
-
if (options.sortableAttributes) settings.sortableAttributes = options.sortableAttributes
|
|
77
|
-
|
|
78
|
-
if (Object.keys(settings).length > 0) {
|
|
79
|
-
await this.request('PATCH', `/indexes/${encodeURIComponent(index)}/settings`, settings)
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
async search(index: string, query: string, options?: SearchOptions): Promise<SearchResult> {
|
|
85
|
-
const perPage = options?.perPage ?? 20
|
|
86
|
-
const page = options?.page ?? 1
|
|
87
|
-
|
|
88
|
-
const body: Record<string, unknown> = { q: query, limit: perPage, offset: (page - 1) * perPage }
|
|
89
|
-
|
|
90
|
-
if (options?.filter) {
|
|
91
|
-
body.filter =
|
|
92
|
-
typeof options.filter === 'string' ? options.filter : this.buildFilter(options.filter)
|
|
93
|
-
}
|
|
94
|
-
if (options?.sort) body.sort = options.sort
|
|
95
|
-
if (options?.attributesToRetrieve) body.attributesToRetrieve = options.attributesToRetrieve
|
|
96
|
-
if (options?.attributesToHighlight) {
|
|
97
|
-
body.attributesToHighlight = options.attributesToHighlight
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
const data = await this.request('POST', `/indexes/${encodeURIComponent(index)}/search`, body)
|
|
101
|
-
|
|
102
|
-
return {
|
|
103
|
-
hits: (data.hits ?? []).map(
|
|
104
|
-
(hit: any): SearchHit => ({
|
|
105
|
-
document: hit,
|
|
106
|
-
highlights: hit._formatted,
|
|
107
|
-
})
|
|
108
|
-
),
|
|
109
|
-
totalHits: data.estimatedTotalHits ?? data.totalHits ?? 0,
|
|
110
|
-
page,
|
|
111
|
-
perPage,
|
|
112
|
-
processingTimeMs: data.processingTimeMs,
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
// ── Private ──────────────────────────────────────────────────────────────
|
|
117
|
-
|
|
118
|
-
private headers(): Record<string, string> {
|
|
119
|
-
const h: Record<string, string> = { 'content-type': 'application/json' }
|
|
120
|
-
if (this.apiKey) h['authorization'] = `Bearer ${this.apiKey}`
|
|
121
|
-
return h
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
private async request(method: string, path: string, body?: unknown): Promise<any> {
|
|
125
|
-
const response = await fetch(`${this.baseUrl}${path}`, {
|
|
126
|
-
method,
|
|
127
|
-
headers: this.headers(),
|
|
128
|
-
body: body !== undefined ? JSON.stringify(body) : undefined,
|
|
129
|
-
})
|
|
130
|
-
|
|
131
|
-
if (!response.ok) {
|
|
132
|
-
const text = await response.text()
|
|
133
|
-
throw new ExternalServiceError('Meilisearch', response.status, text)
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
if (response.status === 204 || response.headers.get('content-length') === '0') return null
|
|
137
|
-
return response.json()
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
private buildFilter(filter: Record<string, unknown>): string {
|
|
141
|
-
return Object.entries(filter)
|
|
142
|
-
.map(([key, value]) => {
|
|
143
|
-
if (Array.isArray(value)) {
|
|
144
|
-
return `${key} IN [${value.map(v => JSON.stringify(v)).join(', ')}]`
|
|
145
|
-
}
|
|
146
|
-
return `${key} = ${JSON.stringify(value)}`
|
|
147
|
-
})
|
|
148
|
-
.join(' AND ')
|
|
149
|
-
}
|
|
150
|
-
}
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import type { SearchEngine } from '../search_engine.ts'
|
|
2
|
-
import type { SearchDocument, SearchOptions, SearchResult, IndexSettings } from '../types.ts'
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* No-op search driver — silently discards all writes and returns empty results.
|
|
6
|
-
*
|
|
7
|
-
* Useful when search is disabled or during testing.
|
|
8
|
-
*/
|
|
9
|
-
export class NullDriver implements SearchEngine {
|
|
10
|
-
readonly name = 'null'
|
|
11
|
-
|
|
12
|
-
async upsert(
|
|
13
|
-
_index: string,
|
|
14
|
-
_id: string | number,
|
|
15
|
-
_document: Record<string, unknown>
|
|
16
|
-
): Promise<void> {}
|
|
17
|
-
async upsertMany(_index: string, _documents: SearchDocument[]): Promise<void> {}
|
|
18
|
-
async delete(_index: string, _id: string | number): Promise<void> {}
|
|
19
|
-
async deleteMany(_index: string, _ids: Array<string | number>): Promise<void> {}
|
|
20
|
-
async flush(_index: string): Promise<void> {}
|
|
21
|
-
async deleteIndex(_index: string): Promise<void> {}
|
|
22
|
-
async createIndex(_index: string, _options?: IndexSettings): Promise<void> {}
|
|
23
|
-
|
|
24
|
-
async search(_index: string, _query: string, options?: SearchOptions): Promise<SearchResult> {
|
|
25
|
-
return { hits: [], totalHits: 0, page: options?.page ?? 1, perPage: options?.perPage ?? 20 }
|
|
26
|
-
}
|
|
27
|
-
}
|
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
import type { PgIndexSettings } from '../types.ts'
|
|
2
|
-
|
|
3
|
-
/** Default searchable column when no `searchableAttributes` are configured. */
|
|
4
|
-
export const DEFAULT_TEXT_COLUMN = '_text'
|
|
5
|
-
|
|
6
|
-
/** FTS5 weight tiers in declaration order. */
|
|
7
|
-
const WEIGHT_TIERS = ['A', 'B', 'C', 'D'] as const
|
|
8
|
-
type WeightTier = (typeof WEIGHT_TIERS)[number]
|
|
9
|
-
|
|
10
|
-
/** Postgres column type derived from a sample value, or `text` as the conservative default. */
|
|
11
|
-
type PgType = 'text' | 'integer' | 'bigint' | 'double precision' | 'boolean' | 'timestamptz'
|
|
12
|
-
|
|
13
|
-
export interface TypedColumnSpec {
|
|
14
|
-
name: string
|
|
15
|
-
pgType: PgType
|
|
16
|
-
/** JSONB extraction expression: `(doc->>'name')::pgType` (cast suppressed for text). */
|
|
17
|
-
expression: string
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
/**
|
|
21
|
-
* The schema layout for one index: which document attributes feed which
|
|
22
|
-
* tsvector segment + weight, and which typed columns exist for filter/sort.
|
|
23
|
-
*
|
|
24
|
-
* Mirrors `embedded/engine/field_registry.ts` so the two drivers project
|
|
25
|
-
* documents identically. Differences:
|
|
26
|
-
* - Per-attribute weight tier (A/B/C/D) is explicit.
|
|
27
|
-
* - Typed columns are emitted as `GENERATED ALWAYS AS (...) STORED` SQL.
|
|
28
|
-
*/
|
|
29
|
-
export class FieldRegistry {
|
|
30
|
-
readonly searchable: string[]
|
|
31
|
-
readonly weights: Map<string, WeightTier>
|
|
32
|
-
readonly filterable: string[]
|
|
33
|
-
readonly sortable: string[]
|
|
34
|
-
readonly typedColumns: TypedColumnSpec[]
|
|
35
|
-
readonly primaryKey: string
|
|
36
|
-
readonly language: string
|
|
37
|
-
|
|
38
|
-
constructor(settings?: PgIndexSettings, language = 'english') {
|
|
39
|
-
this.primaryKey = settings?.primaryKey ?? 'id'
|
|
40
|
-
this.language = settings?.language ?? language
|
|
41
|
-
this.searchable =
|
|
42
|
-
settings?.searchableAttributes && settings.searchableAttributes.length > 0
|
|
43
|
-
? [...settings.searchableAttributes]
|
|
44
|
-
: [DEFAULT_TEXT_COLUMN]
|
|
45
|
-
|
|
46
|
-
this.weights = new Map()
|
|
47
|
-
for (let i = 0; i < this.searchable.length; i++) {
|
|
48
|
-
const attr = this.searchable[i]!
|
|
49
|
-
const tier = (settings?.weights?.[attr] ?? WEIGHT_TIERS[Math.min(i, 3)]) as WeightTier
|
|
50
|
-
this.weights.set(attr, tier)
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
this.filterable = settings?.filterableAttributes ?? []
|
|
54
|
-
this.sortable = settings?.sortableAttributes ?? []
|
|
55
|
-
|
|
56
|
-
const seen = new Set<string>()
|
|
57
|
-
const typed: TypedColumnSpec[] = []
|
|
58
|
-
for (const attr of [...this.filterable, ...this.sortable]) {
|
|
59
|
-
if (seen.has(attr)) continue
|
|
60
|
-
seen.add(attr)
|
|
61
|
-
typed.push({ name: attr, pgType: 'text', expression: `(doc->>${literal(attr)})` })
|
|
62
|
-
}
|
|
63
|
-
this.typedColumns = typed
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
get usesDefaultTextColumn(): boolean {
|
|
67
|
-
return this.searchable.length === 1 && this.searchable[0] === DEFAULT_TEXT_COLUMN
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
/**
|
|
71
|
-
* Project a document into [text, tier] pairs for tsvector construction.
|
|
72
|
-
* Default mode collapses every string into one A-weighted blob.
|
|
73
|
-
*/
|
|
74
|
-
projectFtsSegments(document: Record<string, unknown>): Array<{ text: string; tier: WeightTier }> {
|
|
75
|
-
if (this.usesDefaultTextColumn) {
|
|
76
|
-
return [{ text: collectStrings(document), tier: 'A' }]
|
|
77
|
-
}
|
|
78
|
-
return this.searchable.map(attr => ({
|
|
79
|
-
text: coerceText(document[attr]),
|
|
80
|
-
tier: this.weights.get(attr)!,
|
|
81
|
-
}))
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
/** Single string spanning all searchable text (for terms-dict tokenization). */
|
|
85
|
-
concatSearchableText(document: Record<string, unknown>): string {
|
|
86
|
-
return this.projectFtsSegments(document)
|
|
87
|
-
.map(s => s.text)
|
|
88
|
-
.filter(Boolean)
|
|
89
|
-
.join(' ')
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
function literal(value: string): string {
|
|
94
|
-
return `'${value.replace(/'/g, "''")}'`
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
function coerceText(value: unknown): string {
|
|
98
|
-
if (value === null || value === undefined) return ''
|
|
99
|
-
if (typeof value === 'string') return value
|
|
100
|
-
if (Array.isArray(value)) return value.map(v => coerceText(v)).filter(Boolean).join(' ')
|
|
101
|
-
if (typeof value === 'number' || typeof value === 'boolean') return String(value)
|
|
102
|
-
return ''
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
function collectStrings(document: Record<string, unknown>): string {
|
|
106
|
-
const parts: string[] = []
|
|
107
|
-
for (const value of Object.values(document)) {
|
|
108
|
-
if (typeof value === 'string' && value.length > 0) parts.push(value)
|
|
109
|
-
else if (Array.isArray(value)) {
|
|
110
|
-
for (const item of value) {
|
|
111
|
-
if (typeof item === 'string' && item.length > 0) parts.push(item)
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
return parts.join(' ')
|
|
116
|
-
}
|
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
import { quoteLiteral } from '../storage/identifiers.ts'
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Translate a user-facing query string into one that's safe for
|
|
5
|
-
* `websearch_to_tsquery`, plus extract positive tokens for typo expansion.
|
|
6
|
-
*
|
|
7
|
-
* websearch_to_tsquery already accepts Google-style syntax:
|
|
8
|
-
* - `"foo bar"` — phrase
|
|
9
|
-
* - `-foo` — exclude
|
|
10
|
-
* - `OR`/`AND` — boolean (case-insensitive)
|
|
11
|
-
*
|
|
12
|
-
* It does NOT support prefix matching (`foo*`); we recognise that ourselves
|
|
13
|
-
* and emit a separate `to_tsquery('foo:*')` ORed onto the result.
|
|
14
|
-
*/
|
|
15
|
-
export interface ParsedQuery {
|
|
16
|
-
/** The raw query, ready to pass to `websearch_to_tsquery`. */
|
|
17
|
-
websearch: string
|
|
18
|
-
/** Positive bare tokens (no quotes/operators) — used for typo expansion. */
|
|
19
|
-
positiveTokens: string[]
|
|
20
|
-
/** Prefix tokens from `foo*` syntax — emitted separately to `to_tsquery`. */
|
|
21
|
-
prefixTokens: string[]
|
|
22
|
-
/** Whether the input was effectively empty. */
|
|
23
|
-
isEmpty: boolean
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
const PHRASE_RE = /"([^"]*)"/g
|
|
27
|
-
|
|
28
|
-
export function parseQuery(input: string): ParsedQuery {
|
|
29
|
-
const trimmed = input.trim()
|
|
30
|
-
if (!trimmed) {
|
|
31
|
-
return { websearch: '', positiveTokens: [], prefixTokens: [], isEmpty: true }
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
const positiveTokens: string[] = []
|
|
35
|
-
const prefixTokens: string[] = []
|
|
36
|
-
|
|
37
|
-
// Strip phrases first so we don't tokenize their inner whitespace.
|
|
38
|
-
const scratch = trimmed.replace(PHRASE_RE, ' ')
|
|
39
|
-
for (const raw of scratch.split(/\s+/)) {
|
|
40
|
-
if (!raw) continue
|
|
41
|
-
let text = raw
|
|
42
|
-
if (text.startsWith('-') || text.startsWith('+')) text = text.slice(1)
|
|
43
|
-
if (text.endsWith('*')) {
|
|
44
|
-
const stem = text.slice(0, -1).toLowerCase().replace(/[^\p{L}\p{N}_-]/gu, '')
|
|
45
|
-
if (stem) prefixTokens.push(stem)
|
|
46
|
-
continue
|
|
47
|
-
}
|
|
48
|
-
if (text.toUpperCase() === 'AND' || text.toUpperCase() === 'OR') continue
|
|
49
|
-
const norm = text.toLowerCase().replace(/[^\p{L}\p{N}_-]/gu, '')
|
|
50
|
-
if (norm.length >= 2) positiveTokens.push(norm)
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
return { websearch: trimmed, positiveTokens, prefixTokens, isEmpty: false }
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
/**
|
|
57
|
-
* Build a tsquery SQL expression that ORs together the user's websearch query,
|
|
58
|
-
* any prefix tokens, and any typo-expanded alternatives. Returns the
|
|
59
|
-
* expression + the user-text bindings (the language is embedded as a literal
|
|
60
|
-
* since it's a per-index server-controlled value, not user input).
|
|
61
|
-
*
|
|
62
|
-
* `startAt` is the placeholder counter the caller has already used. Returned
|
|
63
|
-
* `paramCount` lets the caller continue numbering for filter/limit/offset.
|
|
64
|
-
*/
|
|
65
|
-
export function buildTsqueryExpression(
|
|
66
|
-
parsed: ParsedQuery,
|
|
67
|
-
expansions: Map<string, string[]>,
|
|
68
|
-
language: string,
|
|
69
|
-
startAt = 0
|
|
70
|
-
): { sql: string; params: string[]; paramCount: number } {
|
|
71
|
-
const params: string[] = []
|
|
72
|
-
const fragments: string[] = []
|
|
73
|
-
const lang = `${quoteLiteral(language)}::regconfig`
|
|
74
|
-
let cursor = startAt
|
|
75
|
-
const ph = () => `$${++cursor}`
|
|
76
|
-
|
|
77
|
-
if (parsed.websearch) {
|
|
78
|
-
params.push(parsed.websearch)
|
|
79
|
-
fragments.push(`websearch_to_tsquery(${lang}, ${ph()})`)
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
for (const stem of parsed.prefixTokens) {
|
|
83
|
-
params.push(`${stem}:*`)
|
|
84
|
-
fragments.push(`to_tsquery(${lang}, ${ph()})`)
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
for (const token of parsed.positiveTokens) {
|
|
88
|
-
const cands = expansions.get(token)
|
|
89
|
-
if (!cands || cands.length === 0) continue
|
|
90
|
-
const expr = cands.map(sanitiseTsTerm).filter(Boolean).join(' | ')
|
|
91
|
-
if (!expr) continue
|
|
92
|
-
params.push(expr)
|
|
93
|
-
fragments.push(`to_tsquery(${lang}, ${ph()})`)
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
if (fragments.length === 0) {
|
|
97
|
-
return { sql: '', params: [], paramCount: 0 }
|
|
98
|
-
}
|
|
99
|
-
return { sql: fragments.join(' || '), params, paramCount: cursor - startAt }
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
/** Sanitise a single term for inclusion in a manually built tsquery. */
|
|
103
|
-
function sanitiseTsTerm(term: string): string {
|
|
104
|
-
return term.toLowerCase().replace(/[^\p{L}\p{N}_-]/gu, '')
|
|
105
|
-
}
|
|
@@ -1,300 +0,0 @@
|
|
|
1
|
-
import type { SQL } from 'bun'
|
|
2
|
-
import type {
|
|
3
|
-
SearchDocument,
|
|
4
|
-
SearchOptions,
|
|
5
|
-
SearchResult,
|
|
6
|
-
SearchHit,
|
|
7
|
-
} from '../../../types.ts'
|
|
8
|
-
import type { PgIndexSettings, ResolvedTypoTolerance } from '../types.ts'
|
|
9
|
-
import { FieldRegistry } from './field_registry.ts'
|
|
10
|
-
import { ensureIndexTable, dropIndex as dropIndexSchema } from './schema.ts'
|
|
11
|
-
import { parseQuery, buildTsqueryExpression } from './fts_query_builder.ts'
|
|
12
|
-
import { compileSearch } from './query_compiler.ts'
|
|
13
|
-
import { formatSnippet } from './snippet_formatter.ts'
|
|
14
|
-
import {
|
|
15
|
-
expandTokens,
|
|
16
|
-
hasFuzzystrmatch,
|
|
17
|
-
recordTerms,
|
|
18
|
-
unrecordTerms,
|
|
19
|
-
} from './typo_expander.ts'
|
|
20
|
-
import {
|
|
21
|
-
indexTableName,
|
|
22
|
-
termsTableName,
|
|
23
|
-
quoteIdent,
|
|
24
|
-
quoteLiteral,
|
|
25
|
-
} from '../storage/identifiers.ts'
|
|
26
|
-
import { rebuildInPlace, type RebuildOptions } from '../rebuild/rebuild_inplace.ts'
|
|
27
|
-
|
|
28
|
-
export interface PgEngineOptions {
|
|
29
|
-
sql: SQL
|
|
30
|
-
schema: string
|
|
31
|
-
index: string
|
|
32
|
-
language: string
|
|
33
|
-
typoTolerance: ResolvedTypoTolerance
|
|
34
|
-
ginFastUpdate: boolean
|
|
35
|
-
workMem: string | null
|
|
36
|
-
settings?: PgIndexSettings
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
/** Postgres tsvector silently truncates at ~1MB lexemes. Truncate inputs to be safe. */
|
|
40
|
-
const MAX_TEXT_BYTES = 900_000
|
|
41
|
-
|
|
42
|
-
/** One PgEngine wraps a single index. */
|
|
43
|
-
export class PgEngine {
|
|
44
|
-
readonly registry: FieldRegistry
|
|
45
|
-
private readonly sql: SQL
|
|
46
|
-
private readonly schema: string
|
|
47
|
-
private readonly index: string
|
|
48
|
-
private readonly typo: ResolvedTypoTolerance
|
|
49
|
-
private readonly ginFastUpdate: boolean
|
|
50
|
-
private readonly workMem: string | null
|
|
51
|
-
private readonly tableName: string
|
|
52
|
-
private fuzzyAvailable: boolean | null = null
|
|
53
|
-
private ensured = false
|
|
54
|
-
|
|
55
|
-
constructor(opts: PgEngineOptions) {
|
|
56
|
-
this.sql = opts.sql
|
|
57
|
-
this.schema = opts.schema
|
|
58
|
-
this.index = opts.index
|
|
59
|
-
this.typo = opts.typoTolerance
|
|
60
|
-
this.ginFastUpdate = opts.ginFastUpdate
|
|
61
|
-
this.workMem = opts.workMem
|
|
62
|
-
this.registry = new FieldRegistry(opts.settings, opts.language)
|
|
63
|
-
this.tableName = indexTableName(opts.schema, opts.index)
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
/** Lazy: ensure the table + indexes + trigger exist. Idempotent. */
|
|
67
|
-
async ensure(): Promise<void> {
|
|
68
|
-
if (this.ensured) return
|
|
69
|
-
await ensureIndexTable(this.sql, this.schema, this.index, this.registry, this.ginFastUpdate)
|
|
70
|
-
if (this.typo.enabled && this.fuzzyAvailable === null) {
|
|
71
|
-
this.fuzzyAvailable = await hasFuzzystrmatch(this.sql)
|
|
72
|
-
}
|
|
73
|
-
this.ensured = true
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// ── Writes ──────────────────────────────────────────────────────────────
|
|
77
|
-
|
|
78
|
-
async upsert(id: string | number, document: Record<string, unknown>): Promise<void> {
|
|
79
|
-
await this.upsertMany([{ id, ...document }])
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
async upsertMany(documents: SearchDocument[]): Promise<void> {
|
|
83
|
-
if (documents.length === 0) return
|
|
84
|
-
await this.ensure()
|
|
85
|
-
|
|
86
|
-
await this.sql.begin(async (tx: SQL) => {
|
|
87
|
-
for (const raw of documents) {
|
|
88
|
-
const { id, ...rest } = raw
|
|
89
|
-
const idStr = String(id)
|
|
90
|
-
// Bun's SQL treats stringified JSON as a JSONB string value (double-
|
|
91
|
-
// encoding the JSON). Passing the object directly lets it generate
|
|
92
|
-
// proper JSONB so `doc->>'field'` works for the typed generated cols.
|
|
93
|
-
const doc = { id, ...(rest as Record<string, unknown>) }
|
|
94
|
-
const newText = truncate(this.registry.concatSearchableText(rest as Record<string, unknown>))
|
|
95
|
-
|
|
96
|
-
const oldRows = (await tx.unsafe(
|
|
97
|
-
`SELECT doc FROM ${this.tableName} WHERE id = $1`,
|
|
98
|
-
[idStr]
|
|
99
|
-
)) as Array<{ doc: Record<string, unknown> | string }>
|
|
100
|
-
if (oldRows.length > 0) {
|
|
101
|
-
const oldDoc = parseDoc(oldRows[0]!.doc)
|
|
102
|
-
const oldText = this.registry.concatSearchableText(oldDoc)
|
|
103
|
-
if (this.typo.enabled) await unrecordTerms(tx, this.schema, this.index, oldText)
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
const ftsExpr = this.buildFtsExpression(rest as Record<string, unknown>)
|
|
107
|
-
const sqlStr =
|
|
108
|
-
`INSERT INTO ${this.tableName} (id, doc, fts) VALUES ($1, $2, ${ftsExpr.sql}) ` +
|
|
109
|
-
`ON CONFLICT (id) DO UPDATE SET doc = EXCLUDED.doc, fts = EXCLUDED.fts`
|
|
110
|
-
await tx.unsafe(sqlStr, [idStr, doc as any, ...ftsExpr.params])
|
|
111
|
-
|
|
112
|
-
if (this.typo.enabled) await recordTerms(tx, this.schema, this.index, newText)
|
|
113
|
-
}
|
|
114
|
-
})
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
async delete(id: string | number): Promise<void> {
|
|
118
|
-
await this.deleteMany([id])
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
async deleteMany(ids: Array<string | number>): Promise<void> {
|
|
122
|
-
if (ids.length === 0) return
|
|
123
|
-
await this.ensure()
|
|
124
|
-
|
|
125
|
-
await this.sql.begin(async (tx: SQL) => {
|
|
126
|
-
const idStrs = ids.map(String)
|
|
127
|
-
const placeholders = idStrs.map((_, i) => `$${i + 1}`).join(', ')
|
|
128
|
-
|
|
129
|
-
if (this.typo.enabled) {
|
|
130
|
-
const rows = (await tx.unsafe(
|
|
131
|
-
`SELECT doc FROM ${this.tableName} WHERE id IN (${placeholders})`,
|
|
132
|
-
idStrs
|
|
133
|
-
)) as Array<{ doc: Record<string, unknown> | string }>
|
|
134
|
-
for (const r of rows) {
|
|
135
|
-
const oldDoc = parseDoc(r.doc)
|
|
136
|
-
await unrecordTerms(tx, this.schema, this.index, this.registry.concatSearchableText(oldDoc))
|
|
137
|
-
}
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
await tx.unsafe(
|
|
141
|
-
`DELETE FROM ${this.tableName} WHERE id IN (${placeholders})`,
|
|
142
|
-
idStrs
|
|
143
|
-
)
|
|
144
|
-
})
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
async flush(): Promise<void> {
|
|
148
|
-
await this.ensure()
|
|
149
|
-
await this.sql.begin(async (tx: SQL) => {
|
|
150
|
-
await tx.unsafe(`TRUNCATE ${this.tableName}`)
|
|
151
|
-
if (this.typo.enabled) {
|
|
152
|
-
await tx.unsafe(`TRUNCATE ${termsTableName(this.schema, this.index)}`)
|
|
153
|
-
}
|
|
154
|
-
})
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
async drop(): Promise<void> {
|
|
158
|
-
await dropIndexSchema(this.sql, this.schema, this.index)
|
|
159
|
-
this.ensured = false
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
// ── Reads ───────────────────────────────────────────────────────────────
|
|
163
|
-
|
|
164
|
-
async search(query: string, options?: SearchOptions): Promise<SearchResult> {
|
|
165
|
-
await this.ensure()
|
|
166
|
-
const start = performance.now()
|
|
167
|
-
const opts = options ?? {}
|
|
168
|
-
const parsed = parseQuery(query)
|
|
169
|
-
|
|
170
|
-
const expansions = await this.maybeExpand(parsed.positiveTokens)
|
|
171
|
-
const tsquery = buildTsqueryExpression(parsed, expansions, this.registry.language)
|
|
172
|
-
|
|
173
|
-
const compiled = compileSearch({
|
|
174
|
-
registry: this.registry,
|
|
175
|
-
schema: this.schema,
|
|
176
|
-
index: this.index,
|
|
177
|
-
tsquery: { sql: tsquery.sql, params: tsquery.params },
|
|
178
|
-
search: opts,
|
|
179
|
-
})
|
|
180
|
-
|
|
181
|
-
const result = await this.sql.begin(async (tx: SQL) => {
|
|
182
|
-
if (this.workMem) {
|
|
183
|
-
await tx.unsafe(`SET LOCAL work_mem = ${quoteLiteral(this.workMem)}`)
|
|
184
|
-
}
|
|
185
|
-
const rows = (await tx.unsafe(compiled.sql, compiled.params)) as RawHitRow[]
|
|
186
|
-
const totalRows = (await tx.unsafe(compiled.countSql, compiled.countParams)) as Array<{
|
|
187
|
-
n: number
|
|
188
|
-
}>
|
|
189
|
-
return { rows, total: totalRows[0]?.n ?? rows.length }
|
|
190
|
-
})
|
|
191
|
-
|
|
192
|
-
const projection = opts.attributesToRetrieve
|
|
193
|
-
const hits: SearchHit[] = result.rows.map(row =>
|
|
194
|
-
projectHit(row, compiled.snippetColumns, projection)
|
|
195
|
-
)
|
|
196
|
-
|
|
197
|
-
return {
|
|
198
|
-
hits,
|
|
199
|
-
totalHits: result.total,
|
|
200
|
-
page: Math.max(1, opts.page ?? 1),
|
|
201
|
-
perPage: Math.max(1, opts.perPage ?? 20),
|
|
202
|
-
processingTimeMs: Math.round(performance.now() - start),
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
/** REINDEX the GIN index. Periodic maintenance for write-heavy indexes. */
|
|
207
|
-
async optimize(): Promise<void> {
|
|
208
|
-
await this.ensure()
|
|
209
|
-
const ginName = `${quoteIdent(this.schema)}.${quoteIdent(`search_${this.index}_fts_gin`)}`
|
|
210
|
-
await this.sql.unsafe(`REINDEX INDEX ${ginName}`)
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
/**
|
|
214
|
-
* Recompute every row's `fts` using the current registry's language + weight
|
|
215
|
-
* scheme. Auto-picks tier (in-place vs batched) by row count; throws on
|
|
216
|
-
* tables larger than the supported tier-2 ceiling.
|
|
217
|
-
*/
|
|
218
|
-
async rebuild(options?: RebuildOptions) {
|
|
219
|
-
await this.ensure()
|
|
220
|
-
return rebuildInPlace(this.sql, this.schema, this.index, this.registry, options)
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
// ── Internals ───────────────────────────────────────────────────────────
|
|
224
|
-
|
|
225
|
-
private buildFtsExpression(document: Record<string, unknown>): {
|
|
226
|
-
sql: string
|
|
227
|
-
params: string[]
|
|
228
|
-
} {
|
|
229
|
-
const segments = this.registry.projectFtsSegments(document)
|
|
230
|
-
const lang = `${quoteLiteral(this.registry.language)}::regconfig`
|
|
231
|
-
const params: string[] = []
|
|
232
|
-
const fragments = segments.map(seg => {
|
|
233
|
-
params.push(truncate(seg.text))
|
|
234
|
-
return `setweight(to_tsvector(${lang}, $${params.length + 2}), '${seg.tier}')`
|
|
235
|
-
})
|
|
236
|
-
// The `+2` above accounts for the leading id ($1) and doc ($2) bindings
|
|
237
|
-
// that callers prepend. Caller MUST keep those positions stable.
|
|
238
|
-
return { sql: fragments.join(' || '), params }
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
private async maybeExpand(tokens: string[]): Promise<Map<string, string[]>> {
|
|
242
|
-
if (!this.typo.enabled || tokens.length === 0) return new Map()
|
|
243
|
-
return expandTokens(
|
|
244
|
-
this.sql,
|
|
245
|
-
this.schema,
|
|
246
|
-
this.index,
|
|
247
|
-
tokens,
|
|
248
|
-
this.typo,
|
|
249
|
-
this.fuzzyAvailable === true
|
|
250
|
-
)
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
interface RawHitRow {
|
|
255
|
-
id: string
|
|
256
|
-
doc: Record<string, unknown> | string
|
|
257
|
-
score: number
|
|
258
|
-
[snippetCol: string]: unknown
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
function projectHit(
|
|
262
|
-
row: RawHitRow,
|
|
263
|
-
snippetCols: string[],
|
|
264
|
-
attributesToRetrieve: string[] | undefined
|
|
265
|
-
): SearchHit {
|
|
266
|
-
const document = parseDoc(row.doc)
|
|
267
|
-
|
|
268
|
-
let projected = document
|
|
269
|
-
if (attributesToRetrieve && attributesToRetrieve.length > 0) {
|
|
270
|
-
const out: Record<string, unknown> = {}
|
|
271
|
-
for (const attr of attributesToRetrieve) {
|
|
272
|
-
if (attr in document) out[attr] = document[attr]
|
|
273
|
-
}
|
|
274
|
-
projected = out
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
const hit: SearchHit = { document: projected }
|
|
278
|
-
|
|
279
|
-
if (snippetCols.length > 0) {
|
|
280
|
-
const highlights: Record<string, string> = {}
|
|
281
|
-
for (const col of snippetCols) {
|
|
282
|
-
const raw = row[`__snip_${col}`] as string | null | undefined
|
|
283
|
-
if (raw) highlights[col] = formatSnippet(raw)
|
|
284
|
-
}
|
|
285
|
-
if (Object.keys(highlights).length > 0) hit.highlights = highlights
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
return hit
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
function parseDoc(doc: Record<string, unknown> | string): Record<string, unknown> {
|
|
292
|
-
if (typeof doc === 'string') return JSON.parse(doc) as Record<string, unknown>
|
|
293
|
-
return doc
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
function truncate(text: string): string {
|
|
297
|
-
if (Buffer.byteLength(text, 'utf8') <= MAX_TEXT_BYTES) return text
|
|
298
|
-
// Truncate by char count; over-conservative is fine.
|
|
299
|
-
return text.slice(0, MAX_TEXT_BYTES)
|
|
300
|
-
}
|