@rekal/mem 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/dist/db-BMh1OP4b.mjs +294 -0
  2. package/dist/doc-DnYN4jAU.mjs +116 -0
  3. package/dist/embed-rUMZxqed.mjs +100 -0
  4. package/dist/fs-DMp26Byo.mjs +32 -0
  5. package/dist/glob.d.mts +27 -0
  6. package/dist/glob.mjs +132 -0
  7. package/dist/index.d.mts +1465 -0
  8. package/dist/index.mjs +351 -0
  9. package/dist/llama-CT3dc9Cn.mjs +75 -0
  10. package/dist/models-DFQSgBNr.mjs +77 -0
  11. package/dist/openai-j2_2GM4J.mjs +76 -0
  12. package/dist/progress-B1JdNapX.mjs +263 -0
  13. package/dist/query-VFSpErTB.mjs +125 -0
  14. package/dist/runtime.node-DlQPaGrV.mjs +35 -0
  15. package/dist/search-BllHWtZF.mjs +166 -0
  16. package/dist/store-DE7S35SS.mjs +137 -0
  17. package/dist/transformers-CJ3QA2PK.mjs +55 -0
  18. package/dist/uri-CehXVDGB.mjs +28 -0
  19. package/dist/util-DNyrmcA3.mjs +11 -0
  20. package/dist/vfs-CNQbkhsf.mjs +222 -0
  21. package/foo.ts +3 -0
  22. package/foo2.ts +20 -0
  23. package/package.json +61 -0
  24. package/src/context.ts +77 -0
  25. package/src/db.ts +464 -0
  26. package/src/doc.ts +163 -0
  27. package/src/embed/base.ts +122 -0
  28. package/src/embed/index.ts +67 -0
  29. package/src/embed/llama.ts +111 -0
  30. package/src/embed/models.ts +104 -0
  31. package/src/embed/openai.ts +95 -0
  32. package/src/embed/transformers.ts +81 -0
  33. package/src/frecency.ts +58 -0
  34. package/src/fs.ts +36 -0
  35. package/src/glob.ts +163 -0
  36. package/src/index.ts +15 -0
  37. package/src/log.ts +60 -0
  38. package/src/md.ts +204 -0
  39. package/src/progress.ts +121 -0
  40. package/src/query.ts +131 -0
  41. package/src/runtime.bun.ts +33 -0
  42. package/src/runtime.node.ts +47 -0
  43. package/src/search.ts +230 -0
  44. package/src/snippet.ts +248 -0
  45. package/src/sqlite.ts +1 -0
  46. package/src/store.ts +180 -0
  47. package/src/uri.ts +28 -0
  48. package/src/util.ts +21 -0
  49. package/src/vfs.ts +257 -0
  50. package/test/doc.test.ts +61 -0
  51. package/test/fixtures/ignore-test/keep.md +0 -0
  52. package/test/fixtures/ignore-test/skip.log +0 -0
  53. package/test/fixtures/ignore-test/sub/keep.md +0 -0
  54. package/test/fixtures/store/agent/index.md +9 -0
  55. package/test/fixtures/store/agent/lessons.md +21 -0
  56. package/test/fixtures/store/agent/soul.md +28 -0
  57. package/test/fixtures/store/agent/tools.md +25 -0
  58. package/test/fixtures/store/concepts/frecency.md +30 -0
  59. package/test/fixtures/store/concepts/index.md +9 -0
  60. package/test/fixtures/store/concepts/memory-coherence.md +33 -0
  61. package/test/fixtures/store/concepts/rag.md +27 -0
  62. package/test/fixtures/store/index.md +9 -0
  63. package/test/fixtures/store/projects/index.md +9 -0
  64. package/test/fixtures/store/projects/rekall-inc/architecture.md +41 -0
  65. package/test/fixtures/store/projects/rekall-inc/decisions/index.md +9 -0
  66. package/test/fixtures/store/projects/rekall-inc/decisions/no-military.md +20 -0
  67. package/test/fixtures/store/projects/rekall-inc/index.md +28 -0
  68. package/test/fixtures/store/user/family.md +13 -0
  69. package/test/fixtures/store/user/index.md +9 -0
  70. package/test/fixtures/store/user/preferences.md +29 -0
  71. package/test/fixtures/store/user/profile.md +29 -0
  72. package/test/fs.test.ts +15 -0
  73. package/test/glob.test.ts +190 -0
  74. package/test/md.test.ts +177 -0
  75. package/test/query.test.ts +105 -0
  76. package/test/uri.test.ts +46 -0
  77. package/test/util.test.ts +62 -0
  78. package/test/vfs.test.ts +164 -0
  79. package/tsconfig.json +3 -0
  80. package/tsdown.config.ts +8 -0
@@ -0,0 +1,33 @@
1
+ import type { Database } from "bun:sqlite"
2
+
3
+ let didInit = false
4
+
5
+ export async function openDatabase(path: string) {
6
+ const [{ load: sqliteVec }, { Database }] = await Promise.all([
7
+ import("sqlite-vec"),
8
+ import("bun:sqlite"),
9
+ ])
10
+ if (!didInit) {
11
+ didInit = true
12
+ // See: https://bun.com/docs/runtime/sqlite#setcustomsqlite
13
+ if (process.platform === "darwin")
14
+ for (const p of [
15
+ "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib", // Apple Silicon
16
+ "/usr/local/opt/sqlite/lib/libsqlite3.dylib", // Intel
17
+ ]) {
18
+ try {
19
+ Database.setCustomSQLite(p)
20
+ break
21
+ } catch {}
22
+ }
23
+ }
24
+ const db = new Database(path, { strict: true })
25
+ sqliteVec(db)
26
+ return db
27
+ }
28
+
29
+ export type { Database }
30
+
31
+ export function parseYaml(content: string): unknown {
32
+ return Bun.YAML.parse(content)
33
+ }
@@ -0,0 +1,47 @@
1
+ import type { Database as BetterDB } from "better-sqlite3"
2
+ import type { Database } from "bun:sqlite"
3
+
4
+ let DB: undefined | typeof Database
5
+
6
+ async function dbInit() {
7
+ const { default: BetterDatabase } = await import("better-sqlite3")
8
+
9
+ // Extend better-sqlite3 to mimic Bun's Database API
10
+ return class extends BetterDatabase {
11
+ private prepareCache = new Map<string, ReturnType<BetterDB["prepare"]>>()
12
+
13
+ // oxlint-disable-next-line no-useless-constructor
14
+ constructor(filename?: string) {
15
+ super(filename)
16
+ }
17
+
18
+ run(...args: Parameters<BetterDB["exec"]>) {
19
+ return this.exec(...args)
20
+ }
21
+
22
+ query(source: string) {
23
+ let ret = this.prepareCache.get(source)
24
+ if (!ret) {
25
+ ret = this.prepare(source)
26
+ this.prepareCache.set(source, ret)
27
+ }
28
+ return ret
29
+ }
30
+ } as unknown as typeof Database
31
+ }
32
+
33
+ export async function openDatabase(path: string) {
34
+ DB ??= await dbInit()
35
+ const { load: sqliteVec } = await import("sqlite-vec")
36
+ const db = new DB(path, { strict: true })
37
+ sqliteVec(db)
38
+ return db
39
+ }
40
+
41
+ export type { Database }
42
+
43
+ const { load: loadYaml } = await import("js-yaml")
44
+
45
+ export function parseYaml(content: string): unknown {
46
+ return loadYaml(content)
47
+ }
package/src/search.ts ADDED
@@ -0,0 +1,230 @@
1
+ import type { Context } from "./context.ts"
2
+ import type { Db, DocRow, FTSResult, VecResult } from "./db.ts"
3
+ import type { VfsEntry } from "./vfs.ts"
4
+
5
+ import { toFts } from "./query.ts"
6
+ import { parentUri } from "./uri.ts"
7
+ import { hash } from "./util.ts"
8
+
9
+ export type SearchMode = "hybrid" | "vec" | "fts"
10
+
11
+ export type SearchScore = {
12
+ score: number
13
+ display_score?: number
14
+ rank: number
15
+ }
16
+
17
+ export type SearchResult = {
18
+ uri: string
19
+ path: string
20
+ doc: DocRow
21
+ scores: Partial<Record<SearchMode, SearchScore>>
22
+ match: { fts?: FTSResult; vec?: VecResult }
23
+ } & VfsEntry
24
+
25
+ type SearchResultMap = {
26
+ hybrid: HybridSR
27
+ vec: VecSR
28
+ fts: FtsSR
29
+ }
30
+
31
+ export type HybridSR = SearchResult & { scores: { hybrid: SearchScore } }
32
+ export type VecSR = SearchResult & { scores: { vec: SearchScore }; match: { vec: VecResult } }
33
+ export type FtsSR = SearchResult & { scores: { fts: SearchScore }; match: { fts: FTSResult } }
34
+
35
+ export type SearchOptions = {
36
+ limit?: number
37
+ uri?: string
38
+ mode?: SearchMode
39
+ }
40
+
41
+ export type FtsSearchOptions = Omit<SearchOptions, "mode"> & {
42
+ op?: "AND" | "OR"
43
+ }
44
+
45
+ // Description chunks (seq=0) get boosted in vector scoring
46
+ const DESC_BOOST = 0.2
47
+ const PARENT_BOOST = 0.3 // how much to boost a chunk based on its parent's score, vs its own score, in [0, 1]
48
+ const RRF_K = 60
49
+ const RRF_LIMIT = 50
50
+ const VEC_OVERSAMPLE = 4
51
+
52
+ export class Search {
53
+ private constructor(
54
+ public db: Db,
55
+ public ctx: Context
56
+ ) {}
57
+
58
+ static async load(ctx: Context) {
59
+ return new Search(await ctx.db(), ctx)
60
+ }
61
+
62
+ async search(query: string, opts: SearchOptions = {}): Promise<SearchResult[]> {
63
+ const mode = opts.mode ?? "hybrid"
64
+ if (mode === "fts") return this.searchFts(query, opts)
65
+ if (mode === "vec") return this.searchVec(query, opts)
66
+
67
+ const limit = opts.limit ?? 20
68
+
69
+ // Hybrid: run both, fuse with RRF — need enough candidates for good fusion
70
+ const subLimit = Math.max(RRF_LIMIT, limit * 2)
71
+ const [fts, vec] = await Promise.all([
72
+ this.searchFts(query, { ...opts, limit: subLimit }),
73
+ this.searchVec(query, { ...opts, limit: subLimit, slice: false }),
74
+ ])
75
+
76
+ return this.fuse(fts, vec, limit)
77
+ }
78
+
79
+ async searchVec(
80
+ query: string,
81
+ opts: Omit<SearchOptions, "mode"> & { slice?: boolean } = {}
82
+ ): Promise<VecSR[]> {
83
+ const cacheKey = hash(`embed:${query}`)
84
+ const embedder = await this.ctx.embedder()
85
+ const vfs = await this.ctx.vfs()
86
+
87
+ const embedding =
88
+ this.db.cacheGet<number[]>(cacheKey) ??
89
+ this.db.cacheSet(cacheKey, await embedder.embed(query))
90
+
91
+ const scope = vfs.getScope(opts.uri)
92
+ const limit = opts.limit ?? 20
93
+
94
+ // Oversample for post-filtering when scoped
95
+ const results = this.db.searchVec(embedding, {
96
+ limit: Math.max(limit, RRF_LIMIT) * VEC_OVERSAMPLE,
97
+ })
98
+
99
+ // Group by doc, take best chunk per doc
100
+ const best = new Map<number, VecResult & { uri: string; hiscore: number }>()
101
+ for (const vec of results) {
102
+ const uri = scope.map(vec.path)
103
+ if (!uri) continue
104
+ vec.score = vec.seq === 0 ? vec.score + DESC_BOOST * (1 - vec.score) : vec.score
105
+ const existing = best.get(vec.doc_id)?.score ?? -Infinity
106
+ if (vec.score > existing) best.set(vec.doc_id, Object.assign(vec, { hiscore: 0, uri }))
107
+ }
108
+ const scores = new Map<string, number>(best.values().map((vec) => [vec.uri, vec.score]))
109
+ const parentScores = new Map<string, number>()
110
+ const getParentScore = (uri: string): number => {
111
+ const parent = parentUri(uri)
112
+ if (!parent) return 0
113
+ let score = parentScores.get(parent)
114
+ if (score !== undefined) return score
115
+ score = (scores.get(parent) ?? 0) * 0.5 + getParentScore(parent) * 0.5
116
+ parentScores.set(parent, score)
117
+ return score
118
+ }
119
+
120
+ for (const vec of best.values()) {
121
+ const parentScore = getParentScore(vec.uri)
122
+ vec.score += PARENT_BOOST * parentScore * (1 - vec.score)
123
+ }
124
+
125
+ let bestResults = [...best.values()].toSorted((a, b) => b.score - a.score)
126
+ bestResults = opts.slice === false ? bestResults : bestResults.slice(0, limit)
127
+
128
+ const docs = this.db.getDocs(bestResults.map((r) => r.doc_id))
129
+ const ret: VecSR[] = []
130
+ for (const vec of bestResults) {
131
+ const doc = docs.get(vec.doc_id)
132
+ if (doc)
133
+ ret.push({
134
+ doc,
135
+ match: { vec },
136
+ path: vec.path,
137
+ scores: { vec: { rank: 0, score: vec.score } },
138
+ uri: vec.uri,
139
+ })
140
+ }
141
+
142
+ return this.rank("vec", ret)
143
+ }
144
+
145
+ async searchFts(query: string, opts: FtsSearchOptions = {}): Promise<FtsSR[]> {
146
+ const vfs = await this.ctx.vfs()
147
+ const scope = vfs.getScope(opts.uri)
148
+ const results = this.db.searchFts(toFts(query, opts.op ?? "OR"), {
149
+ limit: opts.limit ?? 20,
150
+ scope: scope.paths.map((p) => p.path),
151
+ })
152
+ const docs = this.db.getDocs(results.map((r) => r.rowid))
153
+ const ret: FtsSR[] = []
154
+ for (const fts of results) {
155
+ fts.score = Math.abs(fts.score) / (1 + Math.abs(fts.score))
156
+ const doc = docs.get(fts.rowid)
157
+ const uri = scope.map(doc?.path ?? "")
158
+ if (doc && uri)
159
+ ret.push({
160
+ doc,
161
+ match: { fts },
162
+ path: doc.path,
163
+ scores: { fts: { rank: 0, score: fts.score } },
164
+ uri,
165
+ })
166
+ }
167
+ return this.rank("fts", ret)
168
+ }
169
+
170
+ rank<M extends SearchMode>(mode: M, results: SearchResultMap[M][]): SearchResultMap[M][] {
171
+ const score = (r: SearchResult) => (r.scores as Record<string, SearchScore>)[mode]
172
+ return results
173
+ .toSorted(
174
+ (a, b) =>
175
+ score(b).score - score(a).score ||
176
+ (score(b).display_score ?? 0) - (score(a).display_score ?? 0)
177
+ )
178
+ .map((r, i) => {
179
+ score(r).rank = i + 1
180
+ return r
181
+ })
182
+ }
183
+
184
+ /** Reciprocal Rank Fusion: merge FTS and vector results */
185
+ private fuse(ftsResults: FtsSR[], vecResults: VecSR[], limit: number): HybridSR[] {
186
+ const merged = new Map<number, { uri: string; vec?: VecSR; fts?: FtsSR }>()
187
+
188
+ const minVecScore = vecResults.length
189
+ ? vecResults[vecResults.length - 1]?.scores.vec.score
190
+ : undefined
191
+ const minFtsScore = ftsResults.length
192
+ ? ftsResults[ftsResults.length - 1]?.scores.fts.score
193
+ : undefined
194
+ const minScore = Math.min(minVecScore ?? 1, minFtsScore ?? 1)
195
+
196
+ for (const fts of ftsResults) merged.set(fts.doc.id, { fts, uri: fts.uri })
197
+ for (const vec of vecResults)
198
+ merged.set(vec.doc.id, { ...merged.get(vec.doc.id), uri: vec.uri, vec })
199
+
200
+ let ret: HybridSR[] = [...merged.values()].map(({ uri, fts, vec }) => {
201
+ const ftsScore = fts?.scores.fts
202
+ const vecScore = vec?.scores.vec
203
+ const score =
204
+ (ftsScore?.rank !== undefined ? 1 / (RRF_K + ftsScore.rank) : 0) +
205
+ (vecScore?.rank !== undefined ? 1 / (RRF_K + vecScore.rank) : 0)
206
+ const display_score =
207
+ 0.6 * (vecScore?.score ?? minScore) + 0.4 * (ftsScore?.score ?? minScore)
208
+ const doc = (fts?.doc ?? vec?.doc)!
209
+ return {
210
+ doc,
211
+ match: { ...fts?.match, ...vec?.match },
212
+ path: doc.path,
213
+ scores: {
214
+ ...fts?.scores,
215
+ ...vec?.scores,
216
+ hybrid: { display_score, rank: 0, score },
217
+ },
218
+ uri,
219
+ }
220
+ })
221
+
222
+ ret = this.rank("hybrid", ret).slice(0, limit)
223
+
224
+ // Normalize scores to [0, 1]
225
+ const bestScore = ret[0]?.scores.hybrid.score ?? 1
226
+ for (const r of ret) r.scores.hybrid.score /= bestScore
227
+
228
+ return ret
229
+ }
230
+ }
package/src/snippet.ts ADDED
@@ -0,0 +1,248 @@
1
+ // oxfmt-ignore
2
+ export type Token = {
3
+ text: string
4
+ lower: string
5
+ }
6
+
7
+ export type TokenWithScore = Token & { score: number }
8
+
9
+ export type SnippetOptions = {
10
+ query: string
11
+ lines?: number
12
+ stopWords?: Map<string, number>
13
+ }
14
+
15
+ export type SnippetWindow = {
16
+ start: number
17
+ heat: number
18
+ coverage: number
19
+ score: number
20
+ }
21
+
22
+ export type SnippetResult = {
23
+ lines: string[]
24
+ tokens: Token[][]
25
+ scores: number[]
26
+ windows: SnippetWindow[]
27
+ best: SnippetWindow
28
+ heat: number[]
29
+ snippet: string[]
30
+ }
31
+
32
+ export const WORD_REGEX = /[\p{L}\p{N}]+/gu // regex to match words (unicode letters and numbers)
33
+
34
+ const SCORE_EXACT = 3
35
+ const SCORE_LOWER = 2
36
+ const SCORE_QUERY_PREFIX = 1.5 // query "child" matches doc "children" — query is prefix of doc
37
+ const SCORE_DOC_PREFIX = 1 // query "children" matches doc "child" — doc is prefix of query
38
+ const SCORE_OVERLAP = 1
39
+ const MIN_PREFIX_LENGTH = 3 // minimum prefix length to consider for scoring
40
+ const STOPWORD_MIN = 0 // min stopword score for a term
41
+ const STOPWORD_MAX = 1 // max stopword score for a term
42
+ const HEAT_SPREAD = 5 // how many lines to spread the heat on each side of a match (should be < lines/2)
43
+ const WEIGHT_EMPTY = 0.1 // score multiplier for empty lines
44
+ const WEIGHT_NONWORD = 0.3 // score multiplier for lines without any word characters
45
+ const WEIGHT_REPETITION = 0.7 // score multiplier for repeated terms in the same line (to de-emphasize boilerplate)
46
+ const decayLinear = (d: number, r: number) => Math.max(0, 1 - d / r) // linear decay function
47
+ // const decayHyperbolic = (d: number, _r: number) => 1 / (1 + d) // hyperbolic decay function
48
+ // const decayExponential = (d: number, r: number) => Math.exp(-d / r) // exponential decay function
49
+
50
+ // Common English stop words — used for post-processing snippets.
51
+ // Kept minimal: only the highest-frequency words that add no search value.
52
+ // oxfmt-ignore
53
+ const STOP_WORDS = new Set([
54
+ "a", "an", "and", "are", "as", "at", "be", "but", "by", "do", "for", "from",
55
+ "had", "has", "have", "he", "her", "his", "how", "i", "if", "in", "is", "it",
56
+ "its", "my", "no", "not", "of", "on", "or", "our", "she", "so", "than",
57
+ "that", "the", "their", "them", "then", "there", "these", "they", "this",
58
+ "to", "up", "us", "was", "we", "what", "when", "which", "who", "will",
59
+ "with", "you", "your",
60
+ ])
61
+
62
+ export function isStopWord(word: string): boolean {
63
+ return STOP_WORDS.has(word.toLowerCase())
64
+ }
65
+
66
+ export class Snippet {
67
+ query: (Token & { score: number })[] = []
68
+ prefixes = new Set<string>()
69
+ prefixRegex: RegExp
70
+ opts: Required<SnippetOptions>
71
+
72
+ constructor(opts: SnippetOptions) {
73
+ this.opts = { ...opts, lines: 5, stopWords: new Map() }
74
+ const stopwords = new Map<string, number>([...STOP_WORDS].map((w) => [w, 0]))
75
+ this.opts.stopWords.forEach((s, w) =>
76
+ stopwords.set(w, Math.max(STOPWORD_MIN, Math.min(STOPWORD_MAX, s)))
77
+ )
78
+
79
+ const tokens = this.tokenize(opts.query, false)
80
+ for (const tok of tokens) {
81
+ const score = stopwords.get(tok.lower) ?? 2
82
+ if (score === 0) continue
83
+ this.prefixes.add(tok.lower.slice(0, 2))
84
+ this.query.push({ ...tok, score })
85
+ }
86
+ this.prefixRegex =
87
+ this.prefixes.size > 0 ? new RegExp(`(${[...this.prefixes].join("|")})`, "i") : /(?!)/ // never matches
88
+ }
89
+
90
+ normalize(text: string): string {
91
+ // Handle diacritics
92
+ text = text.normalize("NFD").replace(/\p{M}/gu, "")
93
+ // Drop possessive 's (and smart quote ’s) completely to avoid orphaned "s" tokens
94
+ text = text.replace(/['’]s\b/gi, "")
95
+ // Globally replace any remaining single quotes/apostrophes with a space
96
+ text = text.replace(/['’]/g, " ")
97
+ return text
98
+ }
99
+
100
+ tokenize(text: string, queryOnly = true): Token[] {
101
+ if (queryOnly && !this.prefixRegex.test(text)) return []
102
+ // only keep unicode letters and numbers
103
+ const tokens = this.normalize(text).match(WORD_REGEX) ?? []
104
+ const ret: Token[] = []
105
+ for (const token of tokens) {
106
+ const lower = token.toLowerCase()
107
+ ret.push({ lower, text: token })
108
+ }
109
+ return ret
110
+ }
111
+
112
+ score(token: Token, queryToken: Token): number {
113
+ const tl = token.lower.length
114
+ if (token.text === queryToken.text) return SCORE_EXACT
115
+ if (token.lower === queryToken.lower) return SCORE_LOWER
116
+ if (token.lower.startsWith(queryToken.lower)) return SCORE_QUERY_PREFIX
117
+ if (queryToken.lower.startsWith(token.lower) && tl >= MIN_PREFIX_LENGTH) return SCORE_DOC_PREFIX
118
+ let prefix = 0
119
+ for (let i = 0; i < token.lower.length; i++) {
120
+ if (token.lower[i] !== queryToken.lower[i]) break
121
+ prefix++
122
+ }
123
+ return prefix >= MIN_PREFIX_LENGTH
124
+ ? SCORE_OVERLAP * (prefix / Math.max(token.lower.length, queryToken.lower.length))
125
+ : 0
126
+ }
127
+
128
+ match(input: Token | string): TokenWithScore | undefined {
129
+ let token: Token
130
+ if (typeof input === "string") {
131
+ const tok = this.normalize(input)
132
+ token = { lower: tok.toLowerCase(), text: input }
133
+ } else token = input
134
+
135
+ let [bestScore, bestTok] = [0, this.query[0]]
136
+ // oxlint-disable-next-line typescript/prefer-for-of
137
+ for (let t = 0; t < this.query.length; t++) {
138
+ const queryTok = this.query[t]
139
+ const s = this.score(token, queryTok)
140
+ if (s > bestScore) {
141
+ ;[bestScore, bestTok] = [s, queryTok]
142
+ }
143
+ }
144
+ if (bestScore > 0) return { ...bestTok, score: bestTok.score * bestScore }
145
+ }
146
+
147
+ // get initial scores/coverage for each line
148
+ scores(tokens: Token[][]) {
149
+ const coverage: Set<string>[] = tokens.map(() => new Set())
150
+ const scores = tokens.map((line, l) => {
151
+ let lineScore = 0
152
+ for (const token of line) {
153
+ const queryTok = this.match(token)
154
+ if (!queryTok) continue
155
+ let score = queryTok.score
156
+ if (coverage[l].has(queryTok.lower)) score *= WEIGHT_REPETITION
157
+ coverage[l].add(queryTok.lower)
158
+ lineScore += score
159
+ }
160
+ return lineScore
161
+ })
162
+ return { coverage, scores }
163
+ }
164
+
165
+ // Build heatmap using a bounded spread (O(N * radius))
166
+ heat(lines: string[], scores: number[]): number[] {
167
+ const spread = Math.max(HEAT_SPREAD, Math.ceil(this.opts.lines / 2))
168
+ const heat = new Float64Array(scores.length)
169
+ for (let i = 0; i < scores.length; i++) {
170
+ if (scores[i] === 0) continue
171
+ const spreadStart = Math.max(0, i - spread)
172
+ const spreadEnd = Math.min(scores.length - 1, i + spread)
173
+ for (let j = spreadStart; j <= spreadEnd; j++) {
174
+ let weight = 1
175
+
176
+ // NOTE: de-emphasize lines without any word characters (e.g. code blocks, separators)
177
+ if (!lines[j].trim()) weight *= WEIGHT_EMPTY
178
+ else if (!lines[j].match(/\p{L}/u)) weight *= WEIGHT_NONWORD
179
+
180
+ weight *= decayLinear(Math.abs(i - j), spread)
181
+
182
+ heat[j] += scores[i] * weight
183
+ }
184
+ }
185
+ return [...heat]
186
+ }
187
+
188
+ extract(text: string): SnippetResult {
189
+ const lines = text.split("\n")
190
+ const radius = Math.min(this.opts.lines, lines.length)
191
+ const tokens = lines.map((line) => this.tokenize(line))
192
+
193
+ const { scores, coverage } = this.scores(tokens)
194
+ const heat = this.heat(lines, scores)
195
+
196
+ // Find the window with highest heat × term coverage
197
+ const windows: SnippetWindow[] = []
198
+
199
+ for (let i = 0; i <= scores.length - radius; i++) {
200
+ if (heat[i] === 0 && windows.length > 0) continue // skip windows that don't start with any heat to save computation
201
+ let heatSum = 0
202
+ // Count how many distinct query terms appear in this window
203
+ const covered = new Set<string>()
204
+ for (let j = i; j < i + radius; j++) {
205
+ heatSum += heat[j]
206
+ coverage[j].forEach((t) => covered.add(t))
207
+ }
208
+ const cov = this.query.length === 0 ? 1 : covered.size / this.query.length
209
+ windows.push({ coverage: cov, heat: heatSum, score: heatSum * cov, start: i })
210
+ }
211
+
212
+ let best = windows[0] ?? { coverage: 0, heat: 0, score: 0, start: 0 }
213
+ for (let i = 1; i < windows.length; i++) {
214
+ if (windows[i].score > best.score) best = windows[i]
215
+ }
216
+
217
+ const snippet = lines.slice(best.start, best.start + radius)
218
+ return { best, heat: [...heat], lines, scores, snippet, tokens, windows }
219
+ }
220
+
221
+ debug(result: SnippetResult) {
222
+ // oxlint-disable-next-line no-console
223
+ console.info("Options:", this.opts)
224
+ // oxlint-disable-next-line no-console
225
+ console.info("Query :", this.query)
226
+ // oxlint-disable-next-line unicorn/consistent-function-scoping
227
+ const score = (n?: number, f = 1) => (n !== undefined ? n.toFixed(f).padEnd(4) : " ".repeat(4))
228
+ result.lines.forEach((line, i) => {
229
+ const isBest = (l: number) =>
230
+ l >= result.best.start && l < result.best.start + this.opts.lines
231
+
232
+ const lineScore = score(result.scores[i])
233
+ const lineHeat = score(result.heat[i])
234
+ const windowHeat = score(result.windows[i]?.heat ?? 0)
235
+ const windowScore = score(result.windows[i]?.score ?? 0)
236
+ const coverage = ((result.windows[i]?.coverage ?? 0) * 100).toFixed(0).padStart(3)
237
+
238
+ // oxlint-disable-next-line no-console
239
+ console.log(
240
+ `s:${lineScore} h:${lineHeat} wh:${windowHeat} ws:${windowScore} ${coverage}% ${isBest(i) ? ">" : " "} ${line}`
241
+ )
242
+ })
243
+ }
244
+
245
+ highlight(text: string, hl: (word: string, offset: number) => string): string {
246
+ return text.replace(WORD_REGEX, (word, offset) => (this.match(word) ? hl(word, offset) : word))
247
+ }
248
+ }
package/src/sqlite.ts ADDED
@@ -0,0 +1 @@
1
+ export { openDatabase, type Database } from "#runtime"