@strav/search 0.3.19 → 0.3.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -3
- package/package.json +4 -4
- package/src/commands/search_optimize.ts +52 -0
- package/src/drivers/embedded/embedded_driver.ts +136 -0
- package/src/drivers/embedded/engine/field_registry.ts +97 -0
- package/src/drivers/embedded/engine/fts_query_builder.ts +184 -0
- package/src/drivers/embedded/engine/query_compiler.ts +134 -0
- package/src/drivers/embedded/engine/schema.ts +99 -0
- package/src/drivers/embedded/engine/snippet_formatter.ts +29 -0
- package/src/drivers/embedded/engine/sqlite_engine.ts +255 -0
- package/src/drivers/embedded/engine/typo_expander.ts +138 -0
- package/src/drivers/embedded/errors.ts +15 -0
- package/src/drivers/embedded/filters/filter_compiler.ts +136 -0
- package/src/drivers/embedded/index.ts +3 -0
- package/src/drivers/embedded/storage/paths.ts +23 -0
- package/src/drivers/embedded/types.ts +34 -0
- package/src/index.ts +6 -0
- package/src/search_manager.ts +3 -0
- package/stubs/config/search.ts +10 -0
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import { UnsupportedFilterError } from '../errors.ts'
|
|
2
|
+
import { quoteIdent } from '../engine/schema.ts'
|
|
3
|
+
|
|
4
|
+
export interface CompiledFilter {
|
|
5
|
+
/** SQL fragment to splice into a WHERE clause (no leading 'WHERE'). Empty if no filter. */
|
|
6
|
+
sql: string
|
|
7
|
+
/** Bound parameters in the order their `?` placeholders appear. */
|
|
8
|
+
params: unknown[]
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Compile a filter object into a parameterized SQL WHERE fragment.
|
|
13
|
+
*
|
|
14
|
+
* Supported value shapes for each key:
|
|
15
|
+
* - primitive (string/number/boolean/null) → `key = ?`
|
|
16
|
+
* - array of primitives → `key IN (?, ?, ?)`
|
|
17
|
+
* - operator object: `{ gt, gte, lt, lte, eq, neq, in }` → composed clauses
|
|
18
|
+
*
|
|
19
|
+
* Unknown keys are NOT validated against the field registry here — callers
|
|
20
|
+
* are expected to have configured `filterableAttributes` correctly. SQLite
|
|
21
|
+
* raises a clean error if the column doesn't exist.
|
|
22
|
+
*/
|
|
23
|
+
export function compileFilter(
|
|
24
|
+
filter: Record<string, unknown> | string | undefined,
|
|
25
|
+
filterableAttributes: ReadonlySet<string>
|
|
26
|
+
): CompiledFilter {
|
|
27
|
+
if (!filter) return { sql: '', params: [] }
|
|
28
|
+
|
|
29
|
+
if (typeof filter === 'string') {
|
|
30
|
+
throw new UnsupportedFilterError(
|
|
31
|
+
'Raw string filters are not supported by the embedded driver. ' +
|
|
32
|
+
'Pass an object like `{ status: "published" }` instead.'
|
|
33
|
+
)
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const clauses: string[] = []
|
|
37
|
+
const params: unknown[] = []
|
|
38
|
+
|
|
39
|
+
for (const [key, value] of Object.entries(filter)) {
|
|
40
|
+
if (value === undefined) continue
|
|
41
|
+
|
|
42
|
+
if (!filterableAttributes.has(key)) {
|
|
43
|
+
throw new UnsupportedFilterError(
|
|
44
|
+
`Field "${key}" is not in filterableAttributes. ` +
|
|
45
|
+
'Add it to the index settings before filtering on it.'
|
|
46
|
+
)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const col = quoteIdent(key)
|
|
50
|
+
|
|
51
|
+
if (value === null) {
|
|
52
|
+
clauses.push(`${col} IS NULL`)
|
|
53
|
+
continue
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if (Array.isArray(value)) {
|
|
57
|
+
if (value.length === 0) {
|
|
58
|
+
clauses.push('1 = 0')
|
|
59
|
+
} else {
|
|
60
|
+
const placeholders = value.map(() => '?').join(', ')
|
|
61
|
+
clauses.push(`${col} IN (${placeholders})`)
|
|
62
|
+
params.push(...value.map(coerce))
|
|
63
|
+
}
|
|
64
|
+
continue
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (isOperatorObject(value)) {
|
|
68
|
+
for (const [op, opValue] of Object.entries(value)) {
|
|
69
|
+
const compiled = compileOperator(col, op, opValue)
|
|
70
|
+
clauses.push(compiled.sql)
|
|
71
|
+
params.push(...compiled.params)
|
|
72
|
+
}
|
|
73
|
+
continue
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (isPrimitive(value)) {
|
|
77
|
+
clauses.push(`${col} = ?`)
|
|
78
|
+
params.push(coerce(value))
|
|
79
|
+
continue
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
throw new UnsupportedFilterError(
|
|
83
|
+
`Unsupported filter value for key "${key}": ${JSON.stringify(value)}`
|
|
84
|
+
)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return { sql: clauses.join(' AND '), params }
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function isOperatorObject(value: unknown): value is Record<string, unknown> {
|
|
91
|
+
if (value === null || typeof value !== 'object' || Array.isArray(value)) return false
|
|
92
|
+
return Object.keys(value).every(k => OPERATORS.has(k))
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const OPERATORS = new Set(['eq', 'neq', 'gt', 'gte', 'lt', 'lte', 'in', 'nin'])
|
|
96
|
+
|
|
97
|
+
function compileOperator(col: string, op: string, value: unknown): CompiledFilter {
|
|
98
|
+
switch (op) {
|
|
99
|
+
case 'eq':
|
|
100
|
+
return { sql: `${col} = ?`, params: [coerce(value)] }
|
|
101
|
+
case 'neq':
|
|
102
|
+
return { sql: `${col} <> ?`, params: [coerce(value)] }
|
|
103
|
+
case 'gt':
|
|
104
|
+
return { sql: `${col} > ?`, params: [coerce(value)] }
|
|
105
|
+
case 'gte':
|
|
106
|
+
return { sql: `${col} >= ?`, params: [coerce(value)] }
|
|
107
|
+
case 'lt':
|
|
108
|
+
return { sql: `${col} < ?`, params: [coerce(value)] }
|
|
109
|
+
case 'lte':
|
|
110
|
+
return { sql: `${col} <= ?`, params: [coerce(value)] }
|
|
111
|
+
case 'in': {
|
|
112
|
+
if (!Array.isArray(value) || value.length === 0) return { sql: '1 = 0', params: [] }
|
|
113
|
+
const ph = value.map(() => '?').join(', ')
|
|
114
|
+
return { sql: `${col} IN (${ph})`, params: value.map(coerce) }
|
|
115
|
+
}
|
|
116
|
+
case 'nin': {
|
|
117
|
+
if (!Array.isArray(value) || value.length === 0) return { sql: '1 = 1', params: [] }
|
|
118
|
+
const ph = value.map(() => '?').join(', ')
|
|
119
|
+
return { sql: `${col} NOT IN (${ph})`, params: value.map(coerce) }
|
|
120
|
+
}
|
|
121
|
+
default:
|
|
122
|
+
throw new UnsupportedFilterError(`Unknown operator "${op}"`)
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function isPrimitive(value: unknown): boolean {
|
|
127
|
+
return (
|
|
128
|
+
typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean'
|
|
129
|
+
)
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function coerce(value: unknown): unknown {
|
|
133
|
+
if (value === null || value === undefined) return null
|
|
134
|
+
if (typeof value === 'boolean') return value ? 1 : 0
|
|
135
|
+
return value
|
|
136
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { join, isAbsolute, resolve } from 'node:path'
|
|
2
|
+
import { mkdirSync } from 'node:fs'
|
|
3
|
+
import type { EmbeddedConfig } from '../types.ts'
|
|
4
|
+
|
|
5
|
+
const MEMORY = ':memory:'
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Resolve the on-disk path for a given index, creating the parent directory
|
|
9
|
+
* if necessary. Returns ':memory:' verbatim when the config asks for it.
|
|
10
|
+
*/
|
|
11
|
+
export function resolveIndexPath(config: EmbeddedConfig, index: string): string {
|
|
12
|
+
const root = config.path ?? './storage/search'
|
|
13
|
+
|
|
14
|
+
if (root === MEMORY) return MEMORY
|
|
15
|
+
|
|
16
|
+
const dir = isAbsolute(root) ? root : resolve(process.cwd(), root)
|
|
17
|
+
mkdirSync(dir, { recursive: true })
|
|
18
|
+
|
|
19
|
+
const safeName = index.replace(/[^a-zA-Z0-9_.-]/g, '_')
|
|
20
|
+
return join(dir, `${safeName}.sqlite`)
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export const MEMORY_PATH = MEMORY
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import type { DriverConfig } from '../../types.ts'
|
|
2
|
+
|
|
3
|
+
export type TypoToleranceMode = 'off' | 'auto'
|
|
4
|
+
|
|
5
|
+
export interface TypoToleranceSettings {
|
|
6
|
+
/** Minimum token length to consider for fuzzy expansion (default 4). */
|
|
7
|
+
minTokenLength?: number
|
|
8
|
+
/** Maximum Levenshtein distance to tolerate (default 1; 2 is supported but slower). */
|
|
9
|
+
maxDistance?: number
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface EmbeddedConfig extends DriverConfig {
|
|
13
|
+
driver: string
|
|
14
|
+
/** Directory holding the per-index `.sqlite` files. Use `:memory:` for tests. */
|
|
15
|
+
path?: string
|
|
16
|
+
/** SQLite synchronous pragma. Default 'NORMAL' (crash-safe, sub-second write loss possible). */
|
|
17
|
+
synchronous?: 'OFF' | 'NORMAL' | 'FULL'
|
|
18
|
+
/** Typo tolerance: 'off' disables; 'auto' uses defaults; object for fine-grained control. */
|
|
19
|
+
typoTolerance?: TypoToleranceMode | TypoToleranceSettings
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** Resolved typo tolerance settings (after defaults applied). */
|
|
23
|
+
export interface ResolvedTypoTolerance {
|
|
24
|
+
enabled: boolean
|
|
25
|
+
minTokenLength: number
|
|
26
|
+
maxDistance: number
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** Internal row shape from the documents table. */
|
|
30
|
+
export interface DocumentRow {
|
|
31
|
+
rowid: number
|
|
32
|
+
id: string
|
|
33
|
+
doc: string
|
|
34
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -12,6 +12,12 @@ export { MeilisearchDriver } from './drivers/meilisearch_driver.ts'
|
|
|
12
12
|
export { TypesenseDriver } from './drivers/typesense_driver.ts'
|
|
13
13
|
export { AlgoliaDriver } from './drivers/algolia_driver.ts'
|
|
14
14
|
export { NullDriver } from './drivers/null_driver.ts'
|
|
15
|
+
export { EmbeddedDriver } from './drivers/embedded/index.ts'
|
|
16
|
+
export type {
|
|
17
|
+
EmbeddedConfig,
|
|
18
|
+
TypoToleranceMode,
|
|
19
|
+
TypoToleranceSettings,
|
|
20
|
+
} from './drivers/embedded/index.ts'
|
|
15
21
|
|
|
16
22
|
// Mixin
|
|
17
23
|
export { searchable } from './searchable.ts'
|
package/src/search_manager.ts
CHANGED
|
@@ -5,6 +5,7 @@ import { MeilisearchDriver } from './drivers/meilisearch_driver.ts'
|
|
|
5
5
|
import { TypesenseDriver } from './drivers/typesense_driver.ts'
|
|
6
6
|
import { AlgoliaDriver } from './drivers/algolia_driver.ts'
|
|
7
7
|
import { NullDriver } from './drivers/null_driver.ts'
|
|
8
|
+
import { EmbeddedDriver } from './drivers/embedded/index.ts'
|
|
8
9
|
|
|
9
10
|
@inject
|
|
10
11
|
export default class SearchManager {
|
|
@@ -86,6 +87,8 @@ export default class SearchManager {
|
|
|
86
87
|
return new TypesenseDriver(config)
|
|
87
88
|
case 'algolia':
|
|
88
89
|
return new AlgoliaDriver(config)
|
|
90
|
+
case 'embedded':
|
|
91
|
+
return new EmbeddedDriver(config)
|
|
89
92
|
case 'null':
|
|
90
93
|
return new NullDriver()
|
|
91
94
|
default:
|
package/stubs/config/search.ts
CHANGED
|
@@ -28,5 +28,15 @@ export default {
|
|
|
28
28
|
appId: env('ALGOLIA_APP_ID', ''),
|
|
29
29
|
apiKey: env('ALGOLIA_SECRET', ''),
|
|
30
30
|
},
|
|
31
|
+
|
|
32
|
+
embedded: {
|
|
33
|
+
driver: 'embedded',
|
|
34
|
+
/** Directory holding per-index `.sqlite` files. Use ':memory:' for tests. */
|
|
35
|
+
path: env('SEARCH_PATH', './storage/search'),
|
|
36
|
+
/** SQLite synchronous pragma. NORMAL is crash-safe with sub-second-of-writes loss. */
|
|
37
|
+
synchronous: env('SEARCH_SYNCHRONOUS', 'NORMAL'),
|
|
38
|
+
/** Typo tolerance: 'off' to disable, 'auto' for defaults, or { minTokenLength, maxDistance }. */
|
|
39
|
+
typoTolerance: env('SEARCH_TYPO_TOLERANCE', 'auto'),
|
|
40
|
+
},
|
|
31
41
|
},
|
|
32
42
|
}
|