@strav/search 0.4.30 → 1.0.0-alpha.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +20 -22
- package/src/console/index.ts +5 -0
- package/src/console/search_console_provider.ts +20 -0
- package/src/console/search_flush.ts +49 -0
- package/src/console/search_import.ts +103 -0
- package/src/console/search_list.ts +46 -0
- package/src/console/search_reindex.ts +94 -0
- package/src/drivers/meilisearch/meilisearch_driver.ts +304 -0
- package/src/drivers/memory/memory_driver.ts +344 -0
- package/src/drivers/postgres/apply_search_migration.ts +74 -0
- package/src/drivers/postgres/postgres_fts_driver.ts +493 -135
- package/src/drivers/typesense/typesense_driver.ts +345 -0
- package/src/index.ts +50 -39
- package/src/search_engine.ts +40 -25
- package/src/search_error.ts +86 -0
- package/src/search_manager.ts +112 -94
- package/src/search_provider.ts +68 -6
- package/src/searchable.ts +173 -160
- package/src/searchable_registry.ts +61 -0
- package/src/types.ts +59 -49
- package/README.md +0 -191
- package/src/commands/search_flush.ts +0 -41
- package/src/commands/search_import.ts +0 -43
- package/src/commands/search_optimize.ts +0 -52
- package/src/commands/search_rebuild.ts +0 -73
- package/src/drivers/algolia_driver.ts +0 -170
- package/src/drivers/embedded/embedded_driver.ts +0 -136
- package/src/drivers/embedded/engine/field_registry.ts +0 -97
- package/src/drivers/embedded/engine/fts_query_builder.ts +0 -184
- package/src/drivers/embedded/engine/query_compiler.ts +0 -134
- package/src/drivers/embedded/engine/schema.ts +0 -99
- package/src/drivers/embedded/engine/snippet_formatter.ts +0 -29
- package/src/drivers/embedded/engine/sqlite_engine.ts +0 -255
- package/src/drivers/embedded/engine/typo_expander.ts +0 -138
- package/src/drivers/embedded/errors.ts +0 -15
- package/src/drivers/embedded/filters/filter_compiler.ts +0 -136
- package/src/drivers/embedded/index.ts +0 -3
- package/src/drivers/embedded/storage/paths.ts +0 -23
- package/src/drivers/embedded/types.ts +0 -34
- package/src/drivers/meilisearch_driver.ts +0 -150
- package/src/drivers/null_driver.ts +0 -27
- package/src/drivers/postgres/engine/field_registry.ts +0 -116
- package/src/drivers/postgres/engine/fts_query_builder.ts +0 -105
- package/src/drivers/postgres/engine/pg_engine.ts +0 -300
- package/src/drivers/postgres/engine/query_compiler.ts +0 -165
- package/src/drivers/postgres/engine/schema.ts +0 -187
- package/src/drivers/postgres/engine/snippet_formatter.ts +0 -31
- package/src/drivers/postgres/engine/typo_expander.ts +0 -131
- package/src/drivers/postgres/errors.ts +0 -33
- package/src/drivers/postgres/filters/filter_compiler.ts +0 -138
- package/src/drivers/postgres/index.ts +0 -14
- package/src/drivers/postgres/rebuild/rebuild_inplace.ts +0 -113
- package/src/drivers/postgres/storage/identifiers.ts +0 -46
- package/src/drivers/postgres/types.ts +0 -53
- package/src/drivers/typesense_driver.ts +0 -229
- package/src/errors.ts +0 -18
- package/src/helpers.ts +0 -120
- package/stubs/config/search.ts +0 -57
- package/tsconfig.json +0 -5
|
@@ -1,255 +0,0 @@
|
|
|
1
|
-
import { Database } from 'bun:sqlite'
|
|
2
|
-
import type {
|
|
3
|
-
SearchDocument,
|
|
4
|
-
SearchOptions,
|
|
5
|
-
SearchResult,
|
|
6
|
-
SearchHit,
|
|
7
|
-
IndexSettings,
|
|
8
|
-
} from '../../../types.ts'
|
|
9
|
-
import type { ResolvedTypoTolerance } from '../types.ts'
|
|
10
|
-
import { FieldRegistry } from './field_registry.ts'
|
|
11
|
-
import { applyConnectionPragmas, createSchema, quoteIdent } from './schema.ts'
|
|
12
|
-
import { compileQuery, compileQueryWithExpansions } from './fts_query_builder.ts'
|
|
13
|
-
import { compileSearch } from './query_compiler.ts'
|
|
14
|
-
import { formatSnippet } from './snippet_formatter.ts'
|
|
15
|
-
import { recordTerms, unrecordTerms, expandTokens } from './typo_expander.ts'
|
|
16
|
-
|
|
17
|
-
export interface SqliteEngineOptions {
|
|
18
|
-
path: string
|
|
19
|
-
synchronous: 'OFF' | 'NORMAL' | 'FULL'
|
|
20
|
-
typoTolerance: ResolvedTypoTolerance
|
|
21
|
-
indexName: string
|
|
22
|
-
settings?: IndexSettings
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
/**
|
|
26
|
-
* One SqliteEngine wraps a single index (a single SQLite file). The driver
|
|
27
|
-
* holds a Map<indexName, SqliteEngine> and lazily instantiates per index.
|
|
28
|
-
*/
|
|
29
|
-
export class SqliteEngine {
|
|
30
|
-
readonly db: Database
|
|
31
|
-
readonly registry: FieldRegistry
|
|
32
|
-
private readonly typo: ResolvedTypoTolerance
|
|
33
|
-
private readonly indexName: string
|
|
34
|
-
|
|
35
|
-
constructor(opts: SqliteEngineOptions) {
|
|
36
|
-
this.db = new Database(opts.path)
|
|
37
|
-
applyConnectionPragmas(this.db, opts.synchronous)
|
|
38
|
-
this.registry = new FieldRegistry(opts.settings)
|
|
39
|
-
createSchema(this.db, this.registry)
|
|
40
|
-
this.typo = opts.typoTolerance
|
|
41
|
-
this.indexName = opts.indexName
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
// ── Writes ──────────────────────────────────────────────────────────────
|
|
45
|
-
|
|
46
|
-
upsert(id: string | number, document: Record<string, unknown>): void {
|
|
47
|
-
this.runUpsertBatch([{ id, document }])
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
upsertMany(documents: SearchDocument[]): void {
|
|
51
|
-
if (documents.length === 0) return
|
|
52
|
-
const batch = documents.map(d => {
|
|
53
|
-
const { id, ...rest } = d
|
|
54
|
-
return { id, document: rest as Record<string, unknown> }
|
|
55
|
-
})
|
|
56
|
-
this.runUpsertBatch(batch)
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
delete(id: string | number): void {
|
|
60
|
-
this.runDeleteBatch([id])
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
deleteMany(ids: Array<string | number>): void {
|
|
64
|
-
if (ids.length === 0) return
|
|
65
|
-
this.runDeleteBatch(ids)
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
/** Remove all documents from the index, leaving the schema in place. */
|
|
69
|
-
flush(): void {
|
|
70
|
-
const tx = this.db.transaction(() => {
|
|
71
|
-
this.db.exec('DELETE FROM fts')
|
|
72
|
-
this.db.exec('DELETE FROM documents')
|
|
73
|
-
this.db.exec('DELETE FROM terms_dict')
|
|
74
|
-
})
|
|
75
|
-
tx()
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
/** Force-merge FTS5 segments into one. Run periodically (e.g. nightly via CLI). */
|
|
79
|
-
optimize(): void {
|
|
80
|
-
this.db.exec("INSERT INTO fts(fts) VALUES('optimize')")
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
close(): void {
|
|
84
|
-
try {
|
|
85
|
-
this.db.exec('PRAGMA wal_checkpoint(TRUNCATE)')
|
|
86
|
-
} catch {
|
|
87
|
-
// Ignore — closing should never throw on a checkpoint failure
|
|
88
|
-
}
|
|
89
|
-
this.db.close()
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// ── Reads ───────────────────────────────────────────────────────────────
|
|
93
|
-
|
|
94
|
-
search(query: string, options?: SearchOptions): SearchResult {
|
|
95
|
-
const start = performance.now()
|
|
96
|
-
const opts = options ?? {}
|
|
97
|
-
const expression = this.buildExpression(query)
|
|
98
|
-
|
|
99
|
-
const compiled = compileSearch({
|
|
100
|
-
registry: this.registry,
|
|
101
|
-
expression,
|
|
102
|
-
search: opts,
|
|
103
|
-
})
|
|
104
|
-
|
|
105
|
-
const rows = this.db
|
|
106
|
-
.prepare<RawHitRow, any[]>(compiled.sql)
|
|
107
|
-
.all(...(compiled.params as any[]))
|
|
108
|
-
const totalRow = this.db
|
|
109
|
-
.prepare<{ n: number }, any[]>(compiled.countSql)
|
|
110
|
-
.get(...(compiled.countParams as any[]))
|
|
111
|
-
|
|
112
|
-
const projection = opts.attributesToRetrieve
|
|
113
|
-
const hits: SearchHit[] = rows.map(row => projectHit(row, compiled.snippetColumns, projection))
|
|
114
|
-
|
|
115
|
-
return {
|
|
116
|
-
hits,
|
|
117
|
-
totalHits: totalRow?.n ?? hits.length,
|
|
118
|
-
page: Math.max(1, opts.page ?? 1),
|
|
119
|
-
perPage: Math.max(1, opts.perPage ?? 20),
|
|
120
|
-
processingTimeMs: Math.round(performance.now() - start),
|
|
121
|
-
}
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
// ── Internals ───────────────────────────────────────────────────────────
|
|
125
|
-
|
|
126
|
-
private buildExpression(query: string) {
|
|
127
|
-
const base = compileQuery(query)
|
|
128
|
-
if (!this.typo.enabled || base.isEmpty || base.positiveTokens.length === 0) return base
|
|
129
|
-
|
|
130
|
-
const expansions = expandTokens(this.db, base.positiveTokens, this.typo)
|
|
131
|
-
if (expansions.size === 0) return base
|
|
132
|
-
return compileQueryWithExpansions(query, expansions)
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
private runUpsertBatch(items: Array<{ id: string | number; document: Record<string, unknown> }>) {
|
|
136
|
-
const insertDoc = this.prepareInsertDoc()
|
|
137
|
-
const fetchExisting = this.db.prepare<
|
|
138
|
-
{ rowid: number; doc: string },
|
|
139
|
-
[string]
|
|
140
|
-
>('SELECT rowid, doc FROM documents WHERE id = ?')
|
|
141
|
-
const insertFts = this.prepareInsertFts()
|
|
142
|
-
const deleteFts = this.db.prepare('DELETE FROM fts WHERE rowid = ?')
|
|
143
|
-
const indexName = this.indexName
|
|
144
|
-
|
|
145
|
-
const tx = this.db.transaction(() => {
|
|
146
|
-
for (const { id, document: doc } of items) {
|
|
147
|
-
const idStr = String(id)
|
|
148
|
-
const docJson = JSON.stringify({ id, ...doc })
|
|
149
|
-
const ftsValues = this.registry.projectFtsValues(doc)
|
|
150
|
-
const typedValues = this.registry.projectTypedValues(doc)
|
|
151
|
-
const newText = this.registry.concatSearchableText(doc)
|
|
152
|
-
|
|
153
|
-
const existing = fetchExisting.get(idStr)
|
|
154
|
-
if (existing) {
|
|
155
|
-
// Update path
|
|
156
|
-
const oldDoc = JSON.parse(existing.doc) as Record<string, unknown>
|
|
157
|
-
const oldText = this.registry.concatSearchableText(oldDoc)
|
|
158
|
-
unrecordTerms(this.db, oldText)
|
|
159
|
-
|
|
160
|
-
deleteFts.run(existing.rowid)
|
|
161
|
-
insertFts.run(existing.rowid as any, ...(ftsValues as any[]))
|
|
162
|
-
this.updateDocumentRow(existing.rowid, docJson, typedValues)
|
|
163
|
-
} else {
|
|
164
|
-
const result = insertDoc.run(idStr as any, docJson as any, ...(typedValues as any[]))
|
|
165
|
-
const rowid = Number(result.lastInsertRowid)
|
|
166
|
-
insertFts.run(rowid as any, ...(ftsValues as any[]))
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
recordTerms(this.db, newText)
|
|
170
|
-
}
|
|
171
|
-
})
|
|
172
|
-
void indexName
|
|
173
|
-
tx()
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
private runDeleteBatch(ids: Array<string | number>) {
|
|
177
|
-
const fetchExisting = this.db.prepare<
|
|
178
|
-
{ rowid: number; doc: string },
|
|
179
|
-
[string]
|
|
180
|
-
>('SELECT rowid, doc FROM documents WHERE id = ?')
|
|
181
|
-
const deleteDoc = this.db.prepare('DELETE FROM documents WHERE id = ?')
|
|
182
|
-
const deleteFts = this.db.prepare('DELETE FROM fts WHERE rowid = ?')
|
|
183
|
-
|
|
184
|
-
const tx = this.db.transaction(() => {
|
|
185
|
-
for (const id of ids) {
|
|
186
|
-
const idStr = String(id)
|
|
187
|
-
const existing = fetchExisting.get(idStr)
|
|
188
|
-
if (!existing) continue
|
|
189
|
-
const oldDoc = JSON.parse(existing.doc) as Record<string, unknown>
|
|
190
|
-
unrecordTerms(this.db, this.registry.concatSearchableText(oldDoc))
|
|
191
|
-
deleteFts.run(existing.rowid)
|
|
192
|
-
deleteDoc.run(idStr)
|
|
193
|
-
}
|
|
194
|
-
})
|
|
195
|
-
tx()
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
private prepareInsertDoc() {
|
|
199
|
-
const cols = ['id', 'doc', ...this.registry.typedColumns.map(quoteIdent)]
|
|
200
|
-
const placeholders = cols.map(() => '?').join(', ')
|
|
201
|
-
return this.db.prepare(
|
|
202
|
-
`INSERT INTO documents (${cols.join(', ')}) VALUES (${placeholders})`
|
|
203
|
-
)
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
private updateDocumentRow(rowid: number, docJson: string, typedValues: unknown[]) {
|
|
207
|
-
const sets = ['doc = ?']
|
|
208
|
-
for (const col of this.registry.typedColumns) sets.push(`${quoteIdent(col)} = ?`)
|
|
209
|
-
const sql = `UPDATE documents SET ${sets.join(', ')} WHERE rowid = ?`
|
|
210
|
-
this.db.prepare(sql).run(docJson as any, ...(typedValues as any[]), rowid as any)
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
private prepareInsertFts() {
|
|
214
|
-
const cols = ['rowid', ...this.registry.searchable.map(quoteIdent)]
|
|
215
|
-
const placeholders = cols.map(() => '?').join(', ')
|
|
216
|
-
return this.db.prepare(`INSERT INTO fts (${cols.join(', ')}) VALUES (${placeholders})`)
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
interface RawHitRow {
|
|
221
|
-
id: string
|
|
222
|
-
doc: string
|
|
223
|
-
score: number
|
|
224
|
-
[snippetCol: string]: unknown
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
function projectHit(
|
|
228
|
-
row: RawHitRow,
|
|
229
|
-
snippetCols: string[],
|
|
230
|
-
attributesToRetrieve: string[] | undefined
|
|
231
|
-
): SearchHit {
|
|
232
|
-
const document = JSON.parse(row.doc) as Record<string, unknown>
|
|
233
|
-
|
|
234
|
-
let projected = document
|
|
235
|
-
if (attributesToRetrieve && attributesToRetrieve.length > 0) {
|
|
236
|
-
const out: Record<string, unknown> = {}
|
|
237
|
-
for (const attr of attributesToRetrieve) {
|
|
238
|
-
if (attr in document) out[attr] = document[attr]
|
|
239
|
-
}
|
|
240
|
-
projected = out
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
const hit: SearchHit = { document: projected }
|
|
244
|
-
|
|
245
|
-
if (snippetCols.length > 0) {
|
|
246
|
-
const highlights: Record<string, string> = {}
|
|
247
|
-
for (const col of snippetCols) {
|
|
248
|
-
const raw = row[`__snip_${col}`] as string | null | undefined
|
|
249
|
-
if (raw) highlights[col] = formatSnippet(raw)
|
|
250
|
-
}
|
|
251
|
-
if (Object.keys(highlights).length > 0) hit.highlights = highlights
|
|
252
|
-
}
|
|
253
|
-
|
|
254
|
-
return hit
|
|
255
|
-
}
|
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
import type { Database } from 'bun:sqlite'
|
|
2
|
-
import type { ResolvedTypoTolerance } from '../types.ts'
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* Plain-text tokeniser for the terms dictionary.
|
|
6
|
-
*
|
|
7
|
-
* Lowercases input, splits on non-letter/digit boundaries, drops tokens shorter
|
|
8
|
-
* than 2 characters. We deliberately do NOT apply Porter stemming here because:
|
|
9
|
-
*
|
|
10
|
-
* - Most typos are on rare/proper nouns (e.g. customer names, product SKUs)
|
|
11
|
-
* which Porter doesn't transform anyway.
|
|
12
|
-
* - Mirroring SQLite's stem inside JS would require shipping a Porter
|
|
13
|
-
* implementation just for the dictionary, which is a lot of code for the
|
|
14
|
-
* marginal gain on common-word typos.
|
|
15
|
-
*
|
|
16
|
-
* The candidate term we feed back into FTS5 is then re-stemmed by FTS5 itself,
|
|
17
|
-
* so the lookup still works.
|
|
18
|
-
*/
|
|
19
|
-
export function tokenize(text: string): string[] {
|
|
20
|
-
if (!text) return []
|
|
21
|
-
const tokens: string[] = []
|
|
22
|
-
for (const raw of text.toLowerCase().split(/[^\p{L}\p{N}]+/u)) {
|
|
23
|
-
if (raw.length >= 2) tokens.push(raw)
|
|
24
|
-
}
|
|
25
|
-
return tokens
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
/** Add a document's tokens to the terms dictionary, incrementing per unique term. */
|
|
29
|
-
export function recordTerms(db: Database, text: string): void {
|
|
30
|
-
const unique = new Set(tokenize(text))
|
|
31
|
-
if (unique.size === 0) return
|
|
32
|
-
|
|
33
|
-
const stmt = db.prepare(
|
|
34
|
-
'INSERT INTO terms_dict (term, doc_freq) VALUES (?, 1) ' +
|
|
35
|
-
'ON CONFLICT(term) DO UPDATE SET doc_freq = doc_freq + 1'
|
|
36
|
-
)
|
|
37
|
-
for (const term of unique) stmt.run(term)
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
/** Decrement a document's tokens; remove rows that drop to zero. */
|
|
41
|
-
export function unrecordTerms(db: Database, text: string): void {
|
|
42
|
-
const unique = new Set(tokenize(text))
|
|
43
|
-
if (unique.size === 0) return
|
|
44
|
-
|
|
45
|
-
const dec = db.prepare('UPDATE terms_dict SET doc_freq = doc_freq - 1 WHERE term = ?')
|
|
46
|
-
const purge = db.prepare('DELETE FROM terms_dict WHERE doc_freq <= 0')
|
|
47
|
-
for (const term of unique) dec.run(term)
|
|
48
|
-
purge.run()
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* For each token, return up to `maxCandidates` near-misses already present in
|
|
53
|
-
* the dictionary, using Levenshtein distance ≤ settings.maxDistance.
|
|
54
|
-
*/
|
|
55
|
-
export function expandTokens(
|
|
56
|
-
db: Database,
|
|
57
|
-
tokens: string[],
|
|
58
|
-
settings: ResolvedTypoTolerance,
|
|
59
|
-
maxCandidates = 8
|
|
60
|
-
): Map<string, string[]> {
|
|
61
|
-
const out = new Map<string, string[]>()
|
|
62
|
-
if (!settings.enabled) return out
|
|
63
|
-
|
|
64
|
-
const stmt = db.prepare<{ term: string }, [number, number]>(
|
|
65
|
-
'SELECT term FROM terms_dict WHERE length(term) BETWEEN ? AND ?'
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
for (const token of tokens) {
|
|
69
|
-
if (token.length < settings.minTokenLength) continue
|
|
70
|
-
|
|
71
|
-
const minLen = Math.max(1, token.length - settings.maxDistance)
|
|
72
|
-
const maxLen = token.length + settings.maxDistance
|
|
73
|
-
|
|
74
|
-
const candidates: string[] = []
|
|
75
|
-
for (const row of stmt.all(minLen, maxLen)) {
|
|
76
|
-
if (row.term === token) continue
|
|
77
|
-
if (levenshtein(token, row.term, settings.maxDistance) <= settings.maxDistance) {
|
|
78
|
-
candidates.push(row.term)
|
|
79
|
-
if (candidates.length >= maxCandidates) break
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
if (candidates.length > 0) out.set(token, candidates)
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
return out
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
/** Resolve user-provided typo tolerance settings into concrete numbers. */
|
|
89
|
-
export function resolveTypoTolerance(
|
|
90
|
-
setting:
|
|
91
|
-
| 'off'
|
|
92
|
-
| 'auto'
|
|
93
|
-
| { minTokenLength?: number; maxDistance?: number }
|
|
94
|
-
| undefined
|
|
95
|
-
): ResolvedTypoTolerance {
|
|
96
|
-
if (setting === 'off') {
|
|
97
|
-
return { enabled: false, minTokenLength: 4, maxDistance: 1 }
|
|
98
|
-
}
|
|
99
|
-
if (setting === undefined || setting === 'auto') {
|
|
100
|
-
return { enabled: true, minTokenLength: 4, maxDistance: 1 }
|
|
101
|
-
}
|
|
102
|
-
return {
|
|
103
|
-
enabled: true,
|
|
104
|
-
minTokenLength: setting.minTokenLength ?? 4,
|
|
105
|
-
maxDistance: setting.maxDistance ?? 1,
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
/**
|
|
110
|
-
* Bounded Levenshtein distance: returns max+1 once it can prove the distance
|
|
111
|
-
* exceeds `max` so we can short-circuit. Operates on UTF-16 code units, which
|
|
112
|
-
* is fine for our supported (ASCII-ish) corpora.
|
|
113
|
-
*/
|
|
114
|
-
function levenshtein(a: string, b: string, max: number): number {
|
|
115
|
-
if (a === b) return 0
|
|
116
|
-
if (Math.abs(a.length - b.length) > max) return max + 1
|
|
117
|
-
if (a.length === 0) return b.length
|
|
118
|
-
if (b.length === 0) return a.length
|
|
119
|
-
|
|
120
|
-
const aLen = a.length
|
|
121
|
-
const bLen = b.length
|
|
122
|
-
let prev = new Array<number>(bLen + 1).fill(0)
|
|
123
|
-
let curr = new Array<number>(bLen + 1).fill(0)
|
|
124
|
-
for (let j = 0; j <= bLen; j++) prev[j] = j
|
|
125
|
-
|
|
126
|
-
for (let i = 1; i <= aLen; i++) {
|
|
127
|
-
curr[0] = i
|
|
128
|
-
let rowMin = curr[0]!
|
|
129
|
-
for (let j = 1; j <= bLen; j++) {
|
|
130
|
-
const cost = a.charCodeAt(i - 1) === b.charCodeAt(j - 1) ? 0 : 1
|
|
131
|
-
curr[j] = Math.min(prev[j]! + 1, curr[j - 1]! + 1, prev[j - 1]! + cost)
|
|
132
|
-
if (curr[j]! < rowMin) rowMin = curr[j]!
|
|
133
|
-
}
|
|
134
|
-
if (rowMin > max) return max + 1
|
|
135
|
-
;[prev, curr] = [curr, prev]
|
|
136
|
-
}
|
|
137
|
-
return prev[bLen]!
|
|
138
|
-
}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
import { SearchError } from '../../errors.ts'
|
|
2
|
-
|
|
3
|
-
export class EmbeddedSearchError extends SearchError {}
|
|
4
|
-
|
|
5
|
-
export class IndexCorruptError extends EmbeddedSearchError {
|
|
6
|
-
constructor(index: string, cause: string) {
|
|
7
|
-
super(`Embedded search index "${index}" is corrupt: ${cause}`)
|
|
8
|
-
}
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
export class UnsupportedFilterError extends EmbeddedSearchError {
|
|
12
|
-
constructor(message: string) {
|
|
13
|
-
super(`Embedded driver filter is unsupported: ${message}`)
|
|
14
|
-
}
|
|
15
|
-
}
|
|
@@ -1,136 +0,0 @@
|
|
|
1
|
-
import { UnsupportedFilterError } from '../errors.ts'
|
|
2
|
-
import { quoteIdent } from '../engine/schema.ts'
|
|
3
|
-
|
|
4
|
-
export interface CompiledFilter {
|
|
5
|
-
/** SQL fragment to splice into a WHERE clause (no leading 'WHERE'). Empty if no filter. */
|
|
6
|
-
sql: string
|
|
7
|
-
/** Bound parameters in the order their `?` placeholders appear. */
|
|
8
|
-
params: unknown[]
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
/**
|
|
12
|
-
* Compile a filter object into a parameterized SQL WHERE fragment.
|
|
13
|
-
*
|
|
14
|
-
* Supported value shapes for each key:
|
|
15
|
-
* - primitive (string/number/boolean/null) → `key = ?`
|
|
16
|
-
* - array of primitives → `key IN (?, ?, ?)`
|
|
17
|
-
* - operator object: `{ gt, gte, lt, lte, eq, neq, in }` → composed clauses
|
|
18
|
-
*
|
|
19
|
-
* Unknown keys are NOT validated against the field registry here — callers
|
|
20
|
-
* are expected to have configured `filterableAttributes` correctly. SQLite
|
|
21
|
-
* raises a clean error if the column doesn't exist.
|
|
22
|
-
*/
|
|
23
|
-
export function compileFilter(
|
|
24
|
-
filter: Record<string, unknown> | string | undefined,
|
|
25
|
-
filterableAttributes: ReadonlySet<string>
|
|
26
|
-
): CompiledFilter {
|
|
27
|
-
if (!filter) return { sql: '', params: [] }
|
|
28
|
-
|
|
29
|
-
if (typeof filter === 'string') {
|
|
30
|
-
throw new UnsupportedFilterError(
|
|
31
|
-
'Raw string filters are not supported by the embedded driver. ' +
|
|
32
|
-
'Pass an object like `{ status: "published" }` instead.'
|
|
33
|
-
)
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
const clauses: string[] = []
|
|
37
|
-
const params: unknown[] = []
|
|
38
|
-
|
|
39
|
-
for (const [key, value] of Object.entries(filter)) {
|
|
40
|
-
if (value === undefined) continue
|
|
41
|
-
|
|
42
|
-
if (!filterableAttributes.has(key)) {
|
|
43
|
-
throw new UnsupportedFilterError(
|
|
44
|
-
`Field "${key}" is not in filterableAttributes. ` +
|
|
45
|
-
'Add it to the index settings before filtering on it.'
|
|
46
|
-
)
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
const col = quoteIdent(key)
|
|
50
|
-
|
|
51
|
-
if (value === null) {
|
|
52
|
-
clauses.push(`${col} IS NULL`)
|
|
53
|
-
continue
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
if (Array.isArray(value)) {
|
|
57
|
-
if (value.length === 0) {
|
|
58
|
-
clauses.push('1 = 0')
|
|
59
|
-
} else {
|
|
60
|
-
const placeholders = value.map(() => '?').join(', ')
|
|
61
|
-
clauses.push(`${col} IN (${placeholders})`)
|
|
62
|
-
params.push(...value.map(coerce))
|
|
63
|
-
}
|
|
64
|
-
continue
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
if (isOperatorObject(value)) {
|
|
68
|
-
for (const [op, opValue] of Object.entries(value)) {
|
|
69
|
-
const compiled = compileOperator(col, op, opValue)
|
|
70
|
-
clauses.push(compiled.sql)
|
|
71
|
-
params.push(...compiled.params)
|
|
72
|
-
}
|
|
73
|
-
continue
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
if (isPrimitive(value)) {
|
|
77
|
-
clauses.push(`${col} = ?`)
|
|
78
|
-
params.push(coerce(value))
|
|
79
|
-
continue
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
throw new UnsupportedFilterError(
|
|
83
|
-
`Unsupported filter value for key "${key}": ${JSON.stringify(value)}`
|
|
84
|
-
)
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
return { sql: clauses.join(' AND '), params }
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
function isOperatorObject(value: unknown): value is Record<string, unknown> {
|
|
91
|
-
if (value === null || typeof value !== 'object' || Array.isArray(value)) return false
|
|
92
|
-
return Object.keys(value).every(k => OPERATORS.has(k))
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
const OPERATORS = new Set(['eq', 'neq', 'gt', 'gte', 'lt', 'lte', 'in', 'nin'])
|
|
96
|
-
|
|
97
|
-
function compileOperator(col: string, op: string, value: unknown): CompiledFilter {
|
|
98
|
-
switch (op) {
|
|
99
|
-
case 'eq':
|
|
100
|
-
return { sql: `${col} = ?`, params: [coerce(value)] }
|
|
101
|
-
case 'neq':
|
|
102
|
-
return { sql: `${col} <> ?`, params: [coerce(value)] }
|
|
103
|
-
case 'gt':
|
|
104
|
-
return { sql: `${col} > ?`, params: [coerce(value)] }
|
|
105
|
-
case 'gte':
|
|
106
|
-
return { sql: `${col} >= ?`, params: [coerce(value)] }
|
|
107
|
-
case 'lt':
|
|
108
|
-
return { sql: `${col} < ?`, params: [coerce(value)] }
|
|
109
|
-
case 'lte':
|
|
110
|
-
return { sql: `${col} <= ?`, params: [coerce(value)] }
|
|
111
|
-
case 'in': {
|
|
112
|
-
if (!Array.isArray(value) || value.length === 0) return { sql: '1 = 0', params: [] }
|
|
113
|
-
const ph = value.map(() => '?').join(', ')
|
|
114
|
-
return { sql: `${col} IN (${ph})`, params: value.map(coerce) }
|
|
115
|
-
}
|
|
116
|
-
case 'nin': {
|
|
117
|
-
if (!Array.isArray(value) || value.length === 0) return { sql: '1 = 1', params: [] }
|
|
118
|
-
const ph = value.map(() => '?').join(', ')
|
|
119
|
-
return { sql: `${col} NOT IN (${ph})`, params: value.map(coerce) }
|
|
120
|
-
}
|
|
121
|
-
default:
|
|
122
|
-
throw new UnsupportedFilterError(`Unknown operator "${op}"`)
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
function isPrimitive(value: unknown): boolean {
|
|
127
|
-
return (
|
|
128
|
-
typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean'
|
|
129
|
-
)
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
function coerce(value: unknown): unknown {
|
|
133
|
-
if (value === null || value === undefined) return null
|
|
134
|
-
if (typeof value === 'boolean') return value ? 1 : 0
|
|
135
|
-
return value
|
|
136
|
-
}
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import { join, isAbsolute, resolve } from 'node:path'
|
|
2
|
-
import { mkdirSync } from 'node:fs'
|
|
3
|
-
import type { EmbeddedConfig } from '../types.ts'
|
|
4
|
-
|
|
5
|
-
const MEMORY = ':memory:'
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Resolve the on-disk path for a given index, creating the parent directory
|
|
9
|
-
* if necessary. Returns ':memory:' verbatim when the config asks for it.
|
|
10
|
-
*/
|
|
11
|
-
export function resolveIndexPath(config: EmbeddedConfig, index: string): string {
|
|
12
|
-
const root = config.path ?? './storage/search'
|
|
13
|
-
|
|
14
|
-
if (root === MEMORY) return MEMORY
|
|
15
|
-
|
|
16
|
-
const dir = isAbsolute(root) ? root : resolve(process.cwd(), root)
|
|
17
|
-
mkdirSync(dir, { recursive: true })
|
|
18
|
-
|
|
19
|
-
const safeName = index.replace(/[^a-zA-Z0-9_.-]/g, '_')
|
|
20
|
-
return join(dir, `${safeName}.sqlite`)
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
export const MEMORY_PATH = MEMORY
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import type { DriverConfig } from '../../types.ts'
|
|
2
|
-
|
|
3
|
-
export type TypoToleranceMode = 'off' | 'auto'
|
|
4
|
-
|
|
5
|
-
export interface TypoToleranceSettings {
|
|
6
|
-
/** Minimum token length to consider for fuzzy expansion (default 4). */
|
|
7
|
-
minTokenLength?: number
|
|
8
|
-
/** Maximum Levenshtein distance to tolerate (default 1; 2 is supported but slower). */
|
|
9
|
-
maxDistance?: number
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
export interface EmbeddedConfig extends DriverConfig {
|
|
13
|
-
driver: string
|
|
14
|
-
/** Directory holding the per-index `.sqlite` files. Use `:memory:` for tests. */
|
|
15
|
-
path?: string
|
|
16
|
-
/** SQLite synchronous pragma. Default 'NORMAL' (crash-safe, sub-second write loss possible). */
|
|
17
|
-
synchronous?: 'OFF' | 'NORMAL' | 'FULL'
|
|
18
|
-
/** Typo tolerance: 'off' disables; 'auto' uses defaults; object for fine-grained control. */
|
|
19
|
-
typoTolerance?: TypoToleranceMode | TypoToleranceSettings
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
/** Resolved typo tolerance settings (after defaults applied). */
|
|
23
|
-
export interface ResolvedTypoTolerance {
|
|
24
|
-
enabled: boolean
|
|
25
|
-
minTokenLength: number
|
|
26
|
-
maxDistance: number
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
/** Internal row shape from the documents table. */
|
|
30
|
-
export interface DocumentRow {
|
|
31
|
-
rowid: number
|
|
32
|
-
id: string
|
|
33
|
-
doc: string
|
|
34
|
-
}
|