@rekal/mem 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/db-BMh1OP4b.mjs +294 -0
- package/dist/doc-DnYN4jAU.mjs +116 -0
- package/dist/embed-rUMZxqed.mjs +100 -0
- package/dist/fs-DMp26Byo.mjs +32 -0
- package/dist/glob.d.mts +27 -0
- package/dist/glob.mjs +132 -0
- package/dist/index.d.mts +1465 -0
- package/dist/index.mjs +351 -0
- package/dist/llama-CT3dc9Cn.mjs +75 -0
- package/dist/models-DFQSgBNr.mjs +77 -0
- package/dist/openai-j2_2GM4J.mjs +76 -0
- package/dist/progress-B1JdNapX.mjs +263 -0
- package/dist/query-VFSpErTB.mjs +125 -0
- package/dist/runtime.node-DlQPaGrV.mjs +35 -0
- package/dist/search-BllHWtZF.mjs +166 -0
- package/dist/store-DE7S35SS.mjs +137 -0
- package/dist/transformers-CJ3QA2PK.mjs +55 -0
- package/dist/uri-CehXVDGB.mjs +28 -0
- package/dist/util-DNyrmcA3.mjs +11 -0
- package/dist/vfs-CNQbkhsf.mjs +222 -0
- package/foo.ts +3 -0
- package/foo2.ts +20 -0
- package/package.json +61 -0
- package/src/context.ts +77 -0
- package/src/db.ts +464 -0
- package/src/doc.ts +163 -0
- package/src/embed/base.ts +122 -0
- package/src/embed/index.ts +67 -0
- package/src/embed/llama.ts +111 -0
- package/src/embed/models.ts +104 -0
- package/src/embed/openai.ts +95 -0
- package/src/embed/transformers.ts +81 -0
- package/src/frecency.ts +58 -0
- package/src/fs.ts +36 -0
- package/src/glob.ts +163 -0
- package/src/index.ts +15 -0
- package/src/log.ts +60 -0
- package/src/md.ts +204 -0
- package/src/progress.ts +121 -0
- package/src/query.ts +131 -0
- package/src/runtime.bun.ts +33 -0
- package/src/runtime.node.ts +47 -0
- package/src/search.ts +230 -0
- package/src/snippet.ts +248 -0
- package/src/sqlite.ts +1 -0
- package/src/store.ts +180 -0
- package/src/uri.ts +28 -0
- package/src/util.ts +21 -0
- package/src/vfs.ts +257 -0
- package/test/doc.test.ts +61 -0
- package/test/fixtures/ignore-test/keep.md +0 -0
- package/test/fixtures/ignore-test/skip.log +0 -0
- package/test/fixtures/ignore-test/sub/keep.md +0 -0
- package/test/fixtures/store/agent/index.md +9 -0
- package/test/fixtures/store/agent/lessons.md +21 -0
- package/test/fixtures/store/agent/soul.md +28 -0
- package/test/fixtures/store/agent/tools.md +25 -0
- package/test/fixtures/store/concepts/frecency.md +30 -0
- package/test/fixtures/store/concepts/index.md +9 -0
- package/test/fixtures/store/concepts/memory-coherence.md +33 -0
- package/test/fixtures/store/concepts/rag.md +27 -0
- package/test/fixtures/store/index.md +9 -0
- package/test/fixtures/store/projects/index.md +9 -0
- package/test/fixtures/store/projects/rekall-inc/architecture.md +41 -0
- package/test/fixtures/store/projects/rekall-inc/decisions/index.md +9 -0
- package/test/fixtures/store/projects/rekall-inc/decisions/no-military.md +20 -0
- package/test/fixtures/store/projects/rekall-inc/index.md +28 -0
- package/test/fixtures/store/user/family.md +13 -0
- package/test/fixtures/store/user/index.md +9 -0
- package/test/fixtures/store/user/preferences.md +29 -0
- package/test/fixtures/store/user/profile.md +29 -0
- package/test/fs.test.ts +15 -0
- package/test/glob.test.ts +190 -0
- package/test/md.test.ts +177 -0
- package/test/query.test.ts +105 -0
- package/test/uri.test.ts +46 -0
- package/test/util.test.ts +62 -0
- package/test/vfs.test.ts +164 -0
- package/tsconfig.json +3 -0
- package/tsdown.config.ts +8 -0
package/src/glob.ts
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import type { Ignore } from "ignore"
|
|
2
|
+
import type { Dirent } from "node:fs"
|
|
3
|
+
|
|
4
|
+
import { readFileSync } from "node:fs"
|
|
5
|
+
import { readdir } from "node:fs/promises"
|
|
6
|
+
import { join } from "pathe"
|
|
7
|
+
import { findUp, normPath, sstat } from "./fs.ts"
|
|
8
|
+
import { toError } from "./util.ts"
|
|
9
|
+
|
|
10
|
+
export type GlobSort = (a: Dirent, b: Dirent) => number
|
|
11
|
+
|
|
12
|
+
const sorters = {
|
|
13
|
+
name: (a, b) => a.name.localeCompare(b.name),
|
|
14
|
+
none: () => 0,
|
|
15
|
+
type: (a, b) => {
|
|
16
|
+
if (a.isDirectory() && !b.isDirectory()) return -1
|
|
17
|
+
if (!a.isDirectory() && b.isDirectory()) return 1
|
|
18
|
+
return a.name.localeCompare(b.name)
|
|
19
|
+
},
|
|
20
|
+
} satisfies Record<string, GlobSort>
|
|
21
|
+
|
|
22
|
+
export type GlobOptions = {
|
|
23
|
+
cwd: string | string[]
|
|
24
|
+
glob?: string | string[] // optional glob patterns to filter files (e.g. "*.js")
|
|
25
|
+
follow: boolean // follow symlinks
|
|
26
|
+
hidden: boolean // include hidden files (those starting with a dot)
|
|
27
|
+
ignore: boolean // respect ignore files
|
|
28
|
+
type?: "file" | "directory" // filter by type
|
|
29
|
+
empty: boolean // include empty directories
|
|
30
|
+
depth: number // maximum depth to traverse
|
|
31
|
+
ignoreFiles: string[] // names of ignore files to look for in each directory
|
|
32
|
+
exclude: string[] // additional ignore rules to apply globally
|
|
33
|
+
onVisit?: (rel: string) => void
|
|
34
|
+
onError?: (path: string, error: Error) => void
|
|
35
|
+
sort?: GlobSort | keyof typeof sorters
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const defaults: GlobOptions = {
|
|
39
|
+
cwd: ".",
|
|
40
|
+
depth: Infinity,
|
|
41
|
+
empty: false,
|
|
42
|
+
exclude: [".git", "node_modules/"],
|
|
43
|
+
follow: false,
|
|
44
|
+
hidden: false,
|
|
45
|
+
ignore: true,
|
|
46
|
+
ignoreFiles: [".gitignore", ".ignore"],
|
|
47
|
+
sort: "name",
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
type GlobEntry = {
|
|
51
|
+
path: string
|
|
52
|
+
rel: string
|
|
53
|
+
ignore?: IgnoreTree
|
|
54
|
+
depth: number
|
|
55
|
+
dir: boolean
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
class IgnoreTree {
|
|
59
|
+
parent?: IgnoreTree
|
|
60
|
+
|
|
61
|
+
constructor(
|
|
62
|
+
public ig: Ignore,
|
|
63
|
+
public rel = ""
|
|
64
|
+
) {}
|
|
65
|
+
|
|
66
|
+
extend(ig: Ignore, rel: string) {
|
|
67
|
+
const child = new IgnoreTree(ig, rel)
|
|
68
|
+
child.parent = this
|
|
69
|
+
return child
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
ignores(rel: string): boolean {
|
|
73
|
+
const test = this.ig.test(rel.slice(this.rel.length))
|
|
74
|
+
if (test.ignored) return true
|
|
75
|
+
if (test.unignored) return false
|
|
76
|
+
return this.parent?.ignores(rel) ?? false
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export async function* glob(opts: Partial<GlobOptions> = {}): AsyncGenerator<string> {
|
|
81
|
+
if (opts.depth && opts.depth < 1) return // fast path for zero results
|
|
82
|
+
|
|
83
|
+
const { default: ignore } = await import("ignore")
|
|
84
|
+
const o: GlobOptions = { ...defaults, ...opts }
|
|
85
|
+
if (Array.isArray(o.cwd)) {
|
|
86
|
+
for (const cwd of o.cwd) yield* glob({ ...o, cwd })
|
|
87
|
+
return
|
|
88
|
+
}
|
|
89
|
+
const root = normPath(o.cwd)
|
|
90
|
+
const ignoreFiles = new Set(o.ignoreFiles)
|
|
91
|
+
const rootIgnore = ignore().add([...o.exclude, ...ignoreFiles])
|
|
92
|
+
const globIgnore = ignore().add(o.glob ?? [])
|
|
93
|
+
const sorter = (typeof o.sort === "string" ? sorters[o.sort] : o.sort) ?? sorters.name
|
|
94
|
+
const visited = new Set<string>()
|
|
95
|
+
|
|
96
|
+
if (o.ignore)
|
|
97
|
+
for (const igf of ignoreFiles) {
|
|
98
|
+
const igPath = findUp(root, igf, ".git")
|
|
99
|
+
if (igPath) rootIgnore.add(readFileSync(igPath, "utf8"))
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
async function ls(dir: GlobEntry) {
|
|
103
|
+
if (visited.has(dir.path)) return
|
|
104
|
+
visited.add(dir.path)
|
|
105
|
+
let entries
|
|
106
|
+
try {
|
|
107
|
+
const dirents = await readdir(dir.path, { withFileTypes: true })
|
|
108
|
+
entries = dirents.toSorted(sorter).toReversed()
|
|
109
|
+
} catch (error) {
|
|
110
|
+
return o.onError?.(dir.path, toError(error))
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
let ig = dir.ignore
|
|
114
|
+
const children: GlobEntry[] = []
|
|
115
|
+
|
|
116
|
+
for (const entry of entries) {
|
|
117
|
+
const path = join(entry.parentPath, entry.name)
|
|
118
|
+
if (o.ignore && entry.isFile() && ignoreFiles.has(entry.name)) {
|
|
119
|
+
const fig = ignore().add(readFileSync(path, "utf8"))
|
|
120
|
+
ig = ig ? ig.extend(fig, dir.rel) : new IgnoreTree(fig, dir.rel)
|
|
121
|
+
} else if (!o.hidden && entry.name.startsWith(".")) {
|
|
122
|
+
continue
|
|
123
|
+
} else {
|
|
124
|
+
let isDirectory = entry.isDirectory()
|
|
125
|
+
isDirectory ||= o.follow && entry.isSymbolicLink() && (sstat(path)?.isDirectory() ?? false)
|
|
126
|
+
const rel = dir.rel + entry.name + (isDirectory ? "/" : "")
|
|
127
|
+
const depth = dir.depth + 1
|
|
128
|
+
children.push({ depth, dir: isDirectory, path, rel })
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
for (const child of children) {
|
|
133
|
+
o.onVisit?.(child.rel)
|
|
134
|
+
if (o.ignore && ig?.ignores(child.rel)) continue
|
|
135
|
+
if (o.glob && !child.dir && !globIgnore.ignores(child.rel)) continue
|
|
136
|
+
stack.push({ ...child, ignore: ig })
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const stack: GlobEntry[] = [
|
|
141
|
+
{ depth: 0, dir: true, ignore: new IgnoreTree(rootIgnore), path: root, rel: "" },
|
|
142
|
+
]
|
|
143
|
+
const parents: GlobEntry[] = []
|
|
144
|
+
|
|
145
|
+
while (stack.length > 0) {
|
|
146
|
+
const entry = stack.pop()!
|
|
147
|
+
|
|
148
|
+
if (o.type !== "file" && entry.depth !== 0) {
|
|
149
|
+
while (!o.empty && parents.length > 0 && parents[parents.length - 1].depth >= entry.depth)
|
|
150
|
+
parents.pop()
|
|
151
|
+
if (entry.dir && entry.depth < o.depth) {
|
|
152
|
+
parents.push(entry)
|
|
153
|
+
} else {
|
|
154
|
+
for (const p of parents) yield p.rel
|
|
155
|
+
parents.length = 0
|
|
156
|
+
if (o.type !== "directory") yield entry.rel
|
|
157
|
+
}
|
|
158
|
+
} else if (!entry.dir) yield entry.rel
|
|
159
|
+
|
|
160
|
+
// oxlint-disable-next-line no-await-in-loop
|
|
161
|
+
if (entry.dir && entry.depth < o.depth) await ls(entry)
|
|
162
|
+
}
|
|
163
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export type * from "./db.ts"
|
|
2
|
+
export type * from "./embed/index.ts"
|
|
3
|
+
export type * from "./search.ts"
|
|
4
|
+
export type * from "./store.ts"
|
|
5
|
+
export type * from "./vfs.ts"
|
|
6
|
+
export * from "./context.ts"
|
|
7
|
+
export * from "./doc.ts"
|
|
8
|
+
export * from "./fs.ts"
|
|
9
|
+
export * from "./log.ts"
|
|
10
|
+
export * from "./progress.ts"
|
|
11
|
+
export * from "./query.ts"
|
|
12
|
+
export * from "./uri.ts"
|
|
13
|
+
export * from "./util.ts"
|
|
14
|
+
export * from "./md.ts"
|
|
15
|
+
export * from "./snippet.ts"
|
package/src/log.ts
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
export type LogLevel = (typeof LOG_LEVELS)[number]
|
|
2
|
+
export type LogFn<T = void> = (...msg: unknown[]) => T
|
|
3
|
+
export type Logger<T = void> = Record<LogLevel, LogFn<T>>
|
|
4
|
+
|
|
5
|
+
export const LOG_LEVELS = [
|
|
6
|
+
"cancel",
|
|
7
|
+
"info",
|
|
8
|
+
"success",
|
|
9
|
+
"warn",
|
|
10
|
+
"error",
|
|
11
|
+
"debug",
|
|
12
|
+
"fatal",
|
|
13
|
+
"prompt",
|
|
14
|
+
"log",
|
|
15
|
+
"trace",
|
|
16
|
+
] as const
|
|
17
|
+
|
|
18
|
+
// oxlint-disable-next-line sort-keys
|
|
19
|
+
const LOG_PRIORITY: Record<LogLevel, number> = {
|
|
20
|
+
trace: 0,
|
|
21
|
+
debug: 1,
|
|
22
|
+
log: 2,
|
|
23
|
+
info: 2,
|
|
24
|
+
prompt: 2,
|
|
25
|
+
success: 2,
|
|
26
|
+
cancel: 2,
|
|
27
|
+
warn: 3,
|
|
28
|
+
error: 4,
|
|
29
|
+
fatal: 5,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function isLogLevel(level: string): level is LogLevel {
|
|
33
|
+
return LOG_LEVELS.includes(level as LogLevel)
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function shouldLog(level: string, minLevel?: LogLevel): boolean {
|
|
37
|
+
if (!isLogLevel(level)) return true
|
|
38
|
+
return LOG_PRIORITY[level] >= LOG_PRIORITY[minLevel ?? "log"]
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export abstract class LoggerBase<T = void> implements Logger<T> {
|
|
42
|
+
cancel!: LogFn<T>
|
|
43
|
+
info!: LogFn<T>
|
|
44
|
+
success!: LogFn<T>
|
|
45
|
+
warn!: LogFn<T>
|
|
46
|
+
error!: LogFn<T>
|
|
47
|
+
debug!: LogFn<T>
|
|
48
|
+
fatal!: LogFn<T>
|
|
49
|
+
prompt!: LogFn<T>
|
|
50
|
+
log!: LogFn<T>
|
|
51
|
+
trace!: LogFn<T>
|
|
52
|
+
|
|
53
|
+
constructor() {
|
|
54
|
+
for (const level of LOG_LEVELS) {
|
|
55
|
+
this[level] = (...msg: unknown[]) => this._log(level, ...msg)
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
protected abstract _log(level: LogLevel, ...msg: unknown[]): T
|
|
60
|
+
}
|
package/src/md.ts
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import type { TokenCounter } from "./embed/index.ts"
|
|
2
|
+
|
|
3
|
+
import { parseYaml } from "#runtime"
|
|
4
|
+
|
|
5
|
+
// NOTE: all markdown parsing expects normalized line endings (\n)
|
|
6
|
+
|
|
7
|
+
// average chars per token across common models,
|
|
8
|
+
// used for estimating token counts without actual tokenization
|
|
9
|
+
const CHARS_PER_TOKEN = 3
|
|
10
|
+
|
|
11
|
+
export type MarkdownSection = {
|
|
12
|
+
content: string[]
|
|
13
|
+
context: string[] // parent headings for context, e.g. ["# Chapter 1", "## Section 1.2"]
|
|
14
|
+
/** full heading text with markdown syntax, e.g. "## Section 1.2" */
|
|
15
|
+
headingText: string
|
|
16
|
+
/** heading without markdown syntax, e.g. "Section 1.2" */
|
|
17
|
+
heading: string
|
|
18
|
+
level: number
|
|
19
|
+
offset: number // 0-indexed line offset of the section in the original markdown body, used for mapping back to source
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export type MarkdownDoc = {
|
|
23
|
+
body: string
|
|
24
|
+
bodyOffset: number // line offset of the body start (after frontmatter) in the original markdown
|
|
25
|
+
frontmatter: Frontmatter
|
|
26
|
+
frontmatterText?: string
|
|
27
|
+
sections: MarkdownSection[]
|
|
28
|
+
text: string
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export type Frontmatter = Record<string, unknown>
|
|
32
|
+
|
|
33
|
+
export function parseFrontmatter(text: string): Omit<MarkdownDoc, "sections"> {
|
|
34
|
+
const match = text.match(/^---\n([\s\S]*?)\n---\n?/)
|
|
35
|
+
const body = match ? text.slice(match[0].length) : text
|
|
36
|
+
return {
|
|
37
|
+
body,
|
|
38
|
+
bodyOffset: match?.[0].trim().split("\n").length ?? 0,
|
|
39
|
+
frontmatter: match ? (parseYaml(match[1]) as Record<string, unknown>) : {},
|
|
40
|
+
frontmatterText: match?.[0],
|
|
41
|
+
text,
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function parseMarkdown(text: string): MarkdownDoc {
|
|
46
|
+
const ret = parseFrontmatter(text)
|
|
47
|
+
return { ...ret, sections: parseSections(ret.body) }
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function parseSections(md: string): MarkdownSection[] {
|
|
51
|
+
const lines = md.split(/\n/)
|
|
52
|
+
let current: MarkdownSection = {
|
|
53
|
+
content: [],
|
|
54
|
+
context: [],
|
|
55
|
+
heading: "",
|
|
56
|
+
headingText: "",
|
|
57
|
+
level: 0,
|
|
58
|
+
offset: 0,
|
|
59
|
+
}
|
|
60
|
+
const sections: MarkdownSection[] = [current]
|
|
61
|
+
let codeBlock: string | undefined = undefined
|
|
62
|
+
for (const [i, line] of lines.entries()) {
|
|
63
|
+
const match = line.match(/^(#+)\s+(.*)/)
|
|
64
|
+
const fenceMatch = line.match(/^\s*(`{3,}|~{3,})/)
|
|
65
|
+
|
|
66
|
+
if (codeBlock && line.startsWith(codeBlock)) {
|
|
67
|
+
codeBlock = undefined // end of code block
|
|
68
|
+
} else if (!codeBlock && fenceMatch) {
|
|
69
|
+
codeBlock = fenceMatch[1] // start of code block
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (!codeBlock && match) {
|
|
73
|
+
if (current.content.length === 0) sections.pop() // discard empty sections.
|
|
74
|
+
const level = match[1].length
|
|
75
|
+
current = {
|
|
76
|
+
content: [line],
|
|
77
|
+
context: [],
|
|
78
|
+
heading: match[2].trim(),
|
|
79
|
+
headingText: match[0].trim(),
|
|
80
|
+
level,
|
|
81
|
+
offset: i,
|
|
82
|
+
}
|
|
83
|
+
sections.push(current)
|
|
84
|
+
} else current.content.push(line)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const stack: MarkdownSection[] = []
|
|
88
|
+
for (const section of sections) {
|
|
89
|
+
// Track parent sections
|
|
90
|
+
while ((stack.at(-1)?.level ?? -1) >= section.level) stack.pop()
|
|
91
|
+
section.context = stack.map((s) => s.headingText)
|
|
92
|
+
if (section.level > 0) stack.push(section)
|
|
93
|
+
}
|
|
94
|
+
return sections
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function findSplit(slice: string) {
|
|
98
|
+
for (const sub of ["\n\n", "\n", " ", "\t", " "]) {
|
|
99
|
+
const i = slice.lastIndexOf(sub)
|
|
100
|
+
if (i > slice.length * 0.8) {
|
|
101
|
+
return i
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return slice.length
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
class SafeCounter {
|
|
108
|
+
static #chars = 0
|
|
109
|
+
static #toks = 0
|
|
110
|
+
|
|
111
|
+
constructor(
|
|
112
|
+
public tok: TokenCounter,
|
|
113
|
+
public maxTokens = 500
|
|
114
|
+
) {}
|
|
115
|
+
|
|
116
|
+
get charsPerToken() {
|
|
117
|
+
return SafeCounter.#toks > this.maxTokens * 2
|
|
118
|
+
? SafeCounter.#chars / SafeCounter.#toks
|
|
119
|
+
: CHARS_PER_TOKEN
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
estimate(text: string) {
|
|
123
|
+
return Math.ceil(text.length / this.charsPerToken)
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Returns the actual token count, unless the estimated token count based
|
|
127
|
+
// on character length is much higher than the maxTokens
|
|
128
|
+
toks(text: string) {
|
|
129
|
+
if (text.length === 0) return { count: 0, estimated: false }
|
|
130
|
+
let count = this.estimate(text) * 0.9 // add 10% buffer to account for variance in chars per token
|
|
131
|
+
if (count > this.maxTokens) return { count, estimated: true }
|
|
132
|
+
count = this.tok.toks(text)
|
|
133
|
+
SafeCounter.#chars += text.length
|
|
134
|
+
SafeCounter.#toks += count
|
|
135
|
+
return { count, estimated: false }
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
export function chunkText(text: string, tok: TokenCounter, size = 500): string[] {
|
|
140
|
+
const counter = new SafeCounter(tok, size)
|
|
141
|
+
const chunks: string[] = []
|
|
142
|
+
while (text.length) {
|
|
143
|
+
let next = text
|
|
144
|
+
let toks = counter.toks(next)
|
|
145
|
+
|
|
146
|
+
if (toks.count <= size) {
|
|
147
|
+
chunks.push(next)
|
|
148
|
+
break
|
|
149
|
+
}
|
|
150
|
+
let maxChars = size * counter.charsPerToken * 0.8
|
|
151
|
+
// oxlint-disable-next-line typescript/no-unnecessary-condition
|
|
152
|
+
while (true) {
|
|
153
|
+
maxChars = Math.min(maxChars, next.length)
|
|
154
|
+
const split = findSplit(next.slice(0, maxChars))
|
|
155
|
+
next = next.slice(0, split)
|
|
156
|
+
toks = counter.toks(next)
|
|
157
|
+
if (toks.count <= size) break
|
|
158
|
+
maxChars *= (size / toks.count) * 0.8
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
chunks.push(next)
|
|
162
|
+
text = text.slice(next.length)
|
|
163
|
+
}
|
|
164
|
+
return chunks
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export function chunkMarkdown(md: string, tok: TokenCounter, size = 500): string[] {
|
|
168
|
+
const sections = parseSections(md)
|
|
169
|
+
type Chunk = { content: string[]; tokens: number; context: string[] }
|
|
170
|
+
const chunks: Chunk[] = [{ content: [], context: [], tokens: 0 }]
|
|
171
|
+
const counter = new SafeCounter(tok, size)
|
|
172
|
+
|
|
173
|
+
for (const section of sections) {
|
|
174
|
+
const chunk = chunks.at(-1) as Chunk
|
|
175
|
+
// Include parent headings in the content to preserve context
|
|
176
|
+
const content = [...section.context, ...section.content]
|
|
177
|
+
const text = content.join("\n")
|
|
178
|
+
const toks = counter.toks(text).count
|
|
179
|
+
|
|
180
|
+
if (chunk.tokens + toks <= size) {
|
|
181
|
+
// only add parent headings that aren't already in the chunk for context
|
|
182
|
+
const context = section.context.filter((h, c) => chunk.context[c] !== h)
|
|
183
|
+
chunk.content.push(...context)
|
|
184
|
+
chunk.content.push(...section.content)
|
|
185
|
+
chunk.context = [...section.context, section.headingText]
|
|
186
|
+
chunk.tokens += toks
|
|
187
|
+
} else if (toks <= size) {
|
|
188
|
+
chunks.push({ content, context: [...section.context, section.headingText], tokens: toks })
|
|
189
|
+
} else {
|
|
190
|
+
const context = section.context.join("\n")
|
|
191
|
+
const toksCtx = counter.toks(context)
|
|
192
|
+
chunks.push(
|
|
193
|
+
...chunkText(section.content.join("\n"), tok, size - toksCtx.count).map((c) => ({
|
|
194
|
+
content: (context.length ? `${context}\n${c}` : c).split("\n"),
|
|
195
|
+
context: [],
|
|
196
|
+
tokens: 0, // we don't track tokens for these sub-chunks since they're already guaranteed to fit
|
|
197
|
+
}))
|
|
198
|
+
)
|
|
199
|
+
chunks.push({ content: [], context: [], tokens: 0 }) // start a new chunk for the next section
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
return chunks.map((c) => c.content.join("\n").trim()).filter(Boolean)
|
|
204
|
+
}
|
package/src/progress.ts
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import type { TypedEmitter } from "./util.ts"
|
|
2
|
+
|
|
3
|
+
import { EventEmitter } from "node:events"
|
|
4
|
+
import { inspect } from "node:util"
|
|
5
|
+
|
|
6
|
+
export type ProgressOpts = { max?: number; status?: string; value?: number }
|
|
7
|
+
|
|
8
|
+
type ProgressEvents = {
|
|
9
|
+
update: [progress: Progress]
|
|
10
|
+
done: [progress: Progress]
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export class Progress extends (EventEmitter as new () => TypedEmitter<ProgressEvents>) {
|
|
14
|
+
#max = 100
|
|
15
|
+
#value = 0
|
|
16
|
+
#children = new Map<string, Progress>()
|
|
17
|
+
#status?: string
|
|
18
|
+
#done = false
|
|
19
|
+
|
|
20
|
+
constructor(
|
|
21
|
+
public name: string,
|
|
22
|
+
opts: ProgressOpts = {}
|
|
23
|
+
) {
|
|
24
|
+
super()
|
|
25
|
+
this.set(opts)
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
get group() {
|
|
29
|
+
return this.#children.size > 0
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
set(opts: ProgressOpts | number): this {
|
|
33
|
+
if (this.#done) return this
|
|
34
|
+
if (typeof opts === "number") this.#value = opts
|
|
35
|
+
else {
|
|
36
|
+
this.#max = opts.max ?? this.#max
|
|
37
|
+
this.#status = opts.status ?? this.#status
|
|
38
|
+
this.#value = opts.value ?? this.#value
|
|
39
|
+
}
|
|
40
|
+
this.emit("update", this)
|
|
41
|
+
if (this.#value >= this.#max) this.stop()
|
|
42
|
+
return this
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
get status() {
|
|
46
|
+
return this.#status ?? this.name
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
set status(status: string) {
|
|
50
|
+
this.set({ status })
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
set value(value: number) {
|
|
54
|
+
this.set(value)
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
get value(): number {
|
|
58
|
+
return !this.group ? this.#value : this.#children.values().reduce((sum, c) => sum + c.value, 0)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
set max(max: number) {
|
|
62
|
+
this.set({ max })
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
get max(): number {
|
|
66
|
+
return !this.group ? this.#max : this.#children.values().reduce((sum, c) => sum + c.max, 0)
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
get done() {
|
|
70
|
+
return this.#done
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
get ratio(): number {
|
|
74
|
+
return this.max === 0 ? 0 : Math.min(1, this.value / this.max)
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
get pct(): number {
|
|
78
|
+
return this.ratio * 100
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
stop() {
|
|
82
|
+
if (this.#done) return
|
|
83
|
+
this.#done = true
|
|
84
|
+
if (!this.group) this.#value = this.#max
|
|
85
|
+
this.#children.forEach((c) => c.stop())
|
|
86
|
+
this.emit("done", this)
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
children() {
|
|
90
|
+
return [...this.#children.values()]
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
child(name: string, opts: ProgressOpts = {}): Progress {
|
|
94
|
+
if (this.#value > 0)
|
|
95
|
+
throw new Error("Cannot add child to Progress that has already made progress")
|
|
96
|
+
let child = this.#children.get(name)
|
|
97
|
+
if (!child) {
|
|
98
|
+
child = new Progress(name, opts)
|
|
99
|
+
child.on("update", () => this.emit("update", this))
|
|
100
|
+
child.on("done", () => {
|
|
101
|
+
if (!this.done && this.children().every((c) => c.done)) this.stop()
|
|
102
|
+
})
|
|
103
|
+
this.#children.set(name, child)
|
|
104
|
+
}
|
|
105
|
+
return child
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
[inspect.custom](_depth: number, _options: object): string {
|
|
109
|
+
return this.toString()
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
override toString(indent = 0): string {
|
|
113
|
+
const pad = " ".repeat(indent)
|
|
114
|
+
const pct = `${this.pct.toFixed(0)}%`.padStart(4)
|
|
115
|
+
const status = this.#status ? ` ${this.#status}` : ""
|
|
116
|
+
const line = `${pad}${pct} ${this.name}${status}`
|
|
117
|
+
if (!this.group) return line
|
|
118
|
+
const children = [...this.#children.values()].map((c) => c.toString(indent + 1))
|
|
119
|
+
return [line, ...children].join("\n")
|
|
120
|
+
}
|
|
121
|
+
}
|
package/src/query.ts
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
type Token =
|
|
2
|
+
| { type: "term"; value: string; neg?: boolean; req?: boolean; field?: string }
|
|
3
|
+
| { type: "op"; value: "AND" | "OR" }
|
|
4
|
+
| { type: "paren"; value: "(" | ")" }
|
|
5
|
+
|
|
6
|
+
const FTS_FIELDS = new Set(["entities", "tags", "description", "title", "body"])
|
|
7
|
+
|
|
8
|
+
export function tokenize(input: string): Token[] {
|
|
9
|
+
const tokens: Token[] = []
|
|
10
|
+
let i = 0
|
|
11
|
+
|
|
12
|
+
while (i < input.length) {
|
|
13
|
+
while (i < input.length && input[i] === " ") i++
|
|
14
|
+
if (i >= input.length) break
|
|
15
|
+
|
|
16
|
+
const ch = input[i]
|
|
17
|
+
|
|
18
|
+
if (ch === "(" || ch === ")") {
|
|
19
|
+
tokens.push({ type: "paren", value: ch })
|
|
20
|
+
i++
|
|
21
|
+
} else if (ch === "|") {
|
|
22
|
+
tokens.push({ type: "op", value: "OR" })
|
|
23
|
+
i++
|
|
24
|
+
} else if ((ch === '"' || ch === "'") && (i === 0 || input[i - 1] === " ")) {
|
|
25
|
+
const quote = ch
|
|
26
|
+
i++
|
|
27
|
+
const start = i
|
|
28
|
+
while (i < input.length && input[i] !== quote) i++
|
|
29
|
+
if (start < i) tokens.push({ type: "term", value: input.slice(start, i) })
|
|
30
|
+
if (i < input.length) i++
|
|
31
|
+
} else {
|
|
32
|
+
const neg = ch === "-"
|
|
33
|
+
const req = ch === "+"
|
|
34
|
+
if (neg || req) i++
|
|
35
|
+
const start = i
|
|
36
|
+
while (i < input.length && !' "()|'.includes(input[i])) i++
|
|
37
|
+
if (start < i) {
|
|
38
|
+
const raw = input.slice(start, i)
|
|
39
|
+
const colon = raw.indexOf(":")
|
|
40
|
+
if (colon > 0 && FTS_FIELDS.has(raw.slice(0, colon))) {
|
|
41
|
+
tokens.push({
|
|
42
|
+
field: raw.slice(0, colon),
|
|
43
|
+
neg: neg || undefined,
|
|
44
|
+
req: req || undefined,
|
|
45
|
+
type: "term",
|
|
46
|
+
value: raw.slice(colon + 1),
|
|
47
|
+
})
|
|
48
|
+
} else {
|
|
49
|
+
tokens.push({ neg: neg || undefined, req: req || undefined, type: "term", value: raw })
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return tokens
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/** Sanitize a term for FTS5 — strip non-word/non-apostrophe chars, preserve colons */
|
|
58
|
+
function sanitize(term: string): string {
|
|
59
|
+
return term.replace(/[^\p{L}\p{N}\s':]/gu, "").trim()
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function buildTerm(token: Extract<Token, { type: "term" }>): string | undefined {
|
|
63
|
+
const clean = sanitize(token.value)
|
|
64
|
+
if (!clean) return
|
|
65
|
+
const isPrefix = token.value.endsWith("*")
|
|
66
|
+
const phrase = `"${clean}"${isPrefix ? "*" : ""}`
|
|
67
|
+
const scoped = token.field ? `${token.field} : ${phrase}` : phrase
|
|
68
|
+
return token.neg ? `NOT ${scoped}` : scoped
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function joinParts(parts: string[], op: string): string {
|
|
72
|
+
return parts.join(` ${op} `)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/** Build an FTS5 query string from user input */
|
|
76
|
+
export function toFts(input: string, defaultOp: "AND" | "OR" = "OR"): string {
|
|
77
|
+
const tokens = tokenize(input)
|
|
78
|
+
const hasRequired = tokens.some((t) => t.type === "term" && t.req)
|
|
79
|
+
|
|
80
|
+
// If no required terms, build normally
|
|
81
|
+
if (!hasRequired) {
|
|
82
|
+
const parts: string[] = []
|
|
83
|
+
let needsOp = false
|
|
84
|
+
|
|
85
|
+
for (const token of tokens) {
|
|
86
|
+
if (token.type === "paren") {
|
|
87
|
+
if (token.value === "(") {
|
|
88
|
+
if (needsOp) parts.push(defaultOp)
|
|
89
|
+
parts.push(token.value)
|
|
90
|
+
needsOp = false
|
|
91
|
+
} else {
|
|
92
|
+
parts.push(token.value)
|
|
93
|
+
needsOp = true
|
|
94
|
+
}
|
|
95
|
+
continue
|
|
96
|
+
}
|
|
97
|
+
if (token.type === "op") {
|
|
98
|
+
parts.push(token.value)
|
|
99
|
+
needsOp = false
|
|
100
|
+
continue
|
|
101
|
+
}
|
|
102
|
+
const term = buildTerm(token)
|
|
103
|
+
if (!term) continue
|
|
104
|
+
if (needsOp) parts.push(defaultOp)
|
|
105
|
+
parts.push(term)
|
|
106
|
+
needsOp = true
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return parts.join(" ").replace(/\( /g, "(").replace(/ \)/g, ")")
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// With required terms: required1 AND required2 AND (all terms joined with OR)
|
|
113
|
+
const required: string[] = []
|
|
114
|
+
const all: string[] = []
|
|
115
|
+
|
|
116
|
+
for (const token of tokens) {
|
|
117
|
+
if (token.type !== "term") continue
|
|
118
|
+
const term = buildTerm(token)
|
|
119
|
+
if (!term) continue
|
|
120
|
+
all.push(term)
|
|
121
|
+
if (token.req) required.push(term)
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const requiredPart = joinParts(required, "AND")
|
|
125
|
+
const allPart = joinParts(all, "OR")
|
|
126
|
+
|
|
127
|
+
// If everything is required, no need for the OR group
|
|
128
|
+
if (required.length === all.length) return requiredPart
|
|
129
|
+
|
|
130
|
+
return `${requiredPart} AND (${allPart})`
|
|
131
|
+
}
|