@rekal/mem 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/dist/db-BMh1OP4b.mjs +294 -0
  2. package/dist/doc-DnYN4jAU.mjs +116 -0
  3. package/dist/embed-rUMZxqed.mjs +100 -0
  4. package/dist/fs-DMp26Byo.mjs +32 -0
  5. package/dist/glob.d.mts +27 -0
  6. package/dist/glob.mjs +132 -0
  7. package/dist/index.d.mts +1465 -0
  8. package/dist/index.mjs +351 -0
  9. package/dist/llama-CT3dc9Cn.mjs +75 -0
  10. package/dist/models-DFQSgBNr.mjs +77 -0
  11. package/dist/openai-j2_2GM4J.mjs +76 -0
  12. package/dist/progress-B1JdNapX.mjs +263 -0
  13. package/dist/query-VFSpErTB.mjs +125 -0
  14. package/dist/runtime.node-DlQPaGrV.mjs +35 -0
  15. package/dist/search-BllHWtZF.mjs +166 -0
  16. package/dist/store-DE7S35SS.mjs +137 -0
  17. package/dist/transformers-CJ3QA2PK.mjs +55 -0
  18. package/dist/uri-CehXVDGB.mjs +28 -0
  19. package/dist/util-DNyrmcA3.mjs +11 -0
  20. package/dist/vfs-CNQbkhsf.mjs +222 -0
  21. package/foo.ts +3 -0
  22. package/foo2.ts +20 -0
  23. package/package.json +61 -0
  24. package/src/context.ts +77 -0
  25. package/src/db.ts +464 -0
  26. package/src/doc.ts +163 -0
  27. package/src/embed/base.ts +122 -0
  28. package/src/embed/index.ts +67 -0
  29. package/src/embed/llama.ts +111 -0
  30. package/src/embed/models.ts +104 -0
  31. package/src/embed/openai.ts +95 -0
  32. package/src/embed/transformers.ts +81 -0
  33. package/src/frecency.ts +58 -0
  34. package/src/fs.ts +36 -0
  35. package/src/glob.ts +163 -0
  36. package/src/index.ts +15 -0
  37. package/src/log.ts +60 -0
  38. package/src/md.ts +204 -0
  39. package/src/progress.ts +121 -0
  40. package/src/query.ts +131 -0
  41. package/src/runtime.bun.ts +33 -0
  42. package/src/runtime.node.ts +47 -0
  43. package/src/search.ts +230 -0
  44. package/src/snippet.ts +248 -0
  45. package/src/sqlite.ts +1 -0
  46. package/src/store.ts +180 -0
  47. package/src/uri.ts +28 -0
  48. package/src/util.ts +21 -0
  49. package/src/vfs.ts +257 -0
  50. package/test/doc.test.ts +61 -0
  51. package/test/fixtures/ignore-test/keep.md +0 -0
  52. package/test/fixtures/ignore-test/skip.log +0 -0
  53. package/test/fixtures/ignore-test/sub/keep.md +0 -0
  54. package/test/fixtures/store/agent/index.md +9 -0
  55. package/test/fixtures/store/agent/lessons.md +21 -0
  56. package/test/fixtures/store/agent/soul.md +28 -0
  57. package/test/fixtures/store/agent/tools.md +25 -0
  58. package/test/fixtures/store/concepts/frecency.md +30 -0
  59. package/test/fixtures/store/concepts/index.md +9 -0
  60. package/test/fixtures/store/concepts/memory-coherence.md +33 -0
  61. package/test/fixtures/store/concepts/rag.md +27 -0
  62. package/test/fixtures/store/index.md +9 -0
  63. package/test/fixtures/store/projects/index.md +9 -0
  64. package/test/fixtures/store/projects/rekall-inc/architecture.md +41 -0
  65. package/test/fixtures/store/projects/rekall-inc/decisions/index.md +9 -0
  66. package/test/fixtures/store/projects/rekall-inc/decisions/no-military.md +20 -0
  67. package/test/fixtures/store/projects/rekall-inc/index.md +28 -0
  68. package/test/fixtures/store/user/family.md +13 -0
  69. package/test/fixtures/store/user/index.md +9 -0
  70. package/test/fixtures/store/user/preferences.md +29 -0
  71. package/test/fixtures/store/user/profile.md +29 -0
  72. package/test/fs.test.ts +15 -0
  73. package/test/glob.test.ts +190 -0
  74. package/test/md.test.ts +177 -0
  75. package/test/query.test.ts +105 -0
  76. package/test/uri.test.ts +46 -0
  77. package/test/util.test.ts +62 -0
  78. package/test/vfs.test.ts +164 -0
  79. package/tsconfig.json +3 -0
  80. package/tsdown.config.ts +8 -0
@@ -0,0 +1,33 @@
1
+ ---
2
+ description: "Memory coherence — ensuring implanted memories integrate without contradicting existing ones"
3
+ tags: [neuroscience, memory, coherence]
4
+ entities: [Rekall Inc]
5
+ ---
6
+
7
+ # Memory Coherence
8
+
9
+ Memory coherence is the measure of how well an implanted memory integrates with a subject's existing memory topology. High coherence means the implant feels natural and indistinguishable from genuine memories. Low coherence leads to rejection cascades.
10
+
11
+ ## Coherence Score
12
+
13
+ Measured on a 0-100 scale:
14
+ - **90-100:** Seamless integration, indistinguishable from genuine memory
15
+ - **70-89:** Minor artifacts, occasional deja vu sensations
16
+ - **40-69:** Noticeable inconsistencies, risk of lucid bleed-through
17
+ - **Below 40:** Emergency extraction recommended, ego fragmentation risk
18
+
19
+ ## Rejection Cascades
20
+
21
+ When an implanted memory contradicts a genuine one, the brain attempts to resolve the conflict. If unmanaged, this creates a cascade:
22
+
23
+ 1. **Micro-contradiction:** Small detail mismatch (e.g., wrong hand dominance in implant)
24
+ 2. **Temporal paradox:** Implanted timeline conflicts with genuine timeline
25
+ 3. **Identity conflict:** Implanted persona contradicts core identity beliefs
26
+ 4. **Cascade:** Multiple contradictions reinforce each other, destabilizing the memory topology
27
+
28
+ ## Prevention
29
+
30
+ - Pre-implant neural mapping identifies high-risk memory regions
31
+ - Contextual anchoring ties implants to genuine emotional experiences
32
+ - Gradual integration over multiple sessions for complex implants
33
+ - Real-time monitoring with automatic dampening during the procedure
@@ -0,0 +1,27 @@
1
+ ---
2
+ description: "Retrieval-Augmented Generation — fetch relevant context before generating a response"
3
+ tags: [ai, retrieval, rag, architecture]
4
+ entities: [RAG, Rekall Inc]
5
+ ---
6
+
7
+ # Retrieval-Augmented Generation (RAG)
8
+
9
+ RAG augments LLM generation by retrieving relevant documents from an external store and injecting them into the prompt. Reduces hallucination and enables knowledge that wasn't in training data.
10
+
11
+ ## Pipeline
12
+
13
+ 1. **Query:** user message or derived search query
14
+ 2. **Retrieve:** search a document store (vector similarity, BM25, or hybrid)
15
+ 3. **Augment:** inject retrieved documents into the LLM prompt as context
16
+ 4. **Generate:** LLM produces a response grounded in the retrieved context
17
+
18
+ ## Limitations
19
+
20
+ - Retrieval quality bottlenecks generation quality
21
+ - Chunk boundaries can split relevant context
22
+ - Token budget limits how much context can be injected
23
+ - Flat retrieval misses hierarchical relationships between documents
24
+
25
+ ## Evolution
26
+
27
+ Rekall's early retrieval system used basic vector-only search. The current platform improves on this with hybrid BM25+vector retrieval, hierarchical document structure, and frecency-based injection that prioritizes contextually relevant memories over raw similarity scores.
@@ -0,0 +1,9 @@
1
+ ---
2
+ description: "Root of Douglas Quaid's memory store — projects, concepts, and agent configuration"
3
+ tags: [root]
4
+ entities: [Douglas Quaid]
5
+ ---
6
+
7
+ # Memory Store
8
+
9
+ Personal knowledge base for Douglas Quaid, covering Rekall Inc projects, technical concepts, and agent configuration.
@@ -0,0 +1,9 @@
1
+ ---
2
+ description: "Douglas's projects — Rekall Inc (memory implant platform), Mars Colony OS"
3
+ tags: [projects]
4
+ entities: [Douglas Quaid, Rekall Inc, Mars Colony OS]
5
+ ---
6
+
7
+ # Projects
8
+
9
+ Active and past projects spanning memory technology and colonial infrastructure. Current focus is Rekall Inc's memory implant platform.
@@ -0,0 +1,41 @@
1
+ ---
2
+ description: "Rekall memory implant architecture — engram pipeline, coherence engine, and neural interface"
3
+ tags: [architecture, memory, neuroscience]
4
+ entities: [Rekall Inc, Engram Pipeline]
5
+ ---
6
+
7
+ # Rekall Architecture
8
+
9
+ The memory implant system consists of three layers: the neural interface (hardware), the engram pipeline (synthesis), and the coherence engine (validation).
10
+
11
+ ## Neural Interface
12
+
13
+ The chair-mounted neural interface uses quantum resonance imaging to map the client's existing memory topology. This map serves as the scaffold for implant integration.
14
+
15
+ - Non-invasive transcranial stimulation
16
+ - 12-minute mapping session
17
+ - Compatible with 99.7% of neural architectures
18
+
19
+ ## Engram Pipeline
20
+
21
+ Raw experience data is synthesized into engrams — self-consistent memory units that integrate with the client's existing neural patterns.
22
+
23
+ 1. **Capture:** Record or generate base experience data
24
+ 2. **Personalize:** Adapt sensory details to client's neural profile
25
+ 3. **Contextualize:** Weave in emotional responses and temporal anchors
26
+ 4. **Compress:** Optimize for cortical buffer storage limits
27
+
28
+ ## Coherence Engine
29
+
30
+ The coherence engine monitors implanted memories for integration stability. Rejection cascades occur when implanted memories conflict with genuine ones.
31
+
32
+ - Real-time conflict detection during implantation
33
+ - Post-implant monitoring for 72 hours
34
+ - Automatic dampening of contradiction signals
35
+ - Emergency extraction protocol if coherence drops below 40%
36
+
37
+ ## Known Issues
38
+
39
+ - Ego death risk at 0.003% for identity overlay services
40
+ - Lucid bleed-through in 12% of vacation implants (client becomes aware the memory is synthetic)
41
+ - Mars packages have elevated rejection rates due to atmospheric sensory mismatch
@@ -0,0 +1,9 @@
1
+ ---
2
+ description: "Architecture decisions for Rekall Inc — ethics, technology choices, safety protocols"
3
+ tags: [decisions, architecture]
4
+ entities: [Rekall Inc]
5
+ ---
6
+
7
+ # Rekall Architecture Decisions
8
+
9
+ Records of significant design and policy decisions, documenting the reasoning behind key choices in memory implant technology and business operations.
@@ -0,0 +1,20 @@
1
+ ---
2
+ description: "Decision: Rekall does not provide military memory implants — ethical and liability concerns"
3
+ tags: [decision, ethics, policy]
4
+ entities: [Rekall Inc, Cohaagen]
5
+ ---
6
+
7
+ # Decision: No Military Memory Implants
8
+
9
+ Rekall does not offer combat training or tactical memory implants to military clients, despite significant revenue potential.
10
+
11
+ ## Rationale
12
+
13
+ - Combat memories carry high rejection cascade risk due to extreme emotional content
14
+ - Liability exposure for implanted skills used in real combat situations
15
+ - Ethical concerns about creating soldiers with artificial experience
16
+ - Regulatory scrutiny from the Colonial Authority
17
+
18
+ ## Consequence
19
+
20
+ Cohaagen has pushed back on this repeatedly, citing government contract opportunities. The board maintains the policy but reviews it annually. Skill implants for self-defense (civilian grade) remain available.
@@ -0,0 +1,28 @@
1
+ ---
2
+ description: "Rekall Inc — memory implant platform for vacation experiences, skill acquisition, and identity services"
3
+ tags: [project, ai, memory, neuroscience]
4
+ entities: [Rekall Inc, Douglas Quaid, Dr. Edgemar, Cohaagen]
5
+ ---
6
+
7
+ # Rekall Inc
8
+
9
+ Memory implant company specializing in artificial experience creation. Clients choose from vacation packages, skill implants, and custom identity overlays. Founded in 2078, headquartered in Chicago.
10
+
11
+ ## Services
12
+
13
+ - **Vacation memories:** Experience Mars, Saturn's rings, or deep ocean exploration without leaving your chair
14
+ - **Skill acquisition:** Instant expertise in languages, martial arts, or technical fields
15
+ - **Identity services:** Temporary persona overlays for entertainment or therapeutic purposes
16
+
17
+ ## Technology Stack
18
+
19
+ - Neural mapping via quantum resonance imaging
20
+ - Engram synthesis using transformer-based memory models
21
+ - Real-time coherence validation to prevent rejection cascades
22
+ - Distributed storage across redundant cortical buffers
23
+
24
+ ## Team
25
+
26
+ - **Dr. Edgemar** — Chief Science Officer, pioneer in synthetic engram research
27
+ - **Douglas Quaid** — Lead Engineer, memory coherence and retrieval systems
28
+ - **Cohaagen** — CEO, business strategy and government contracts
@@ -0,0 +1,13 @@
1
+ ---
2
+ description: "Douglas's family — wife Lori, recurring dreams about a woman named Melina on Mars"
3
+ tags: [user, family, personal]
4
+ entities: [Douglas Quaid, Lori Quaid, Melina]
5
+ ---
6
+
7
+ # Douglas's Family
8
+
9
+ Douglas lives with his wife Lori in their apartment in Chicago. Married for several years, though Douglas has been having recurring dreams about Mars and a mysterious woman named Melina.
10
+
11
+ - **Wife:** Lori Quaid
12
+ - **Home:** Chicago, apartment near the transit hub
13
+ - **Recurring dream:** A woman named Melina, red Martian sky, a feeling of being someone else entirely
@@ -0,0 +1,9 @@
1
+ ---
2
+ description: "Personal information about Douglas Quaid — family, profile, preferences"
3
+ tags: [user, personal]
4
+ entities: [Douglas Quaid]
5
+ ---
6
+
7
+ # Douglas Quaid — Personal Information
8
+
9
+ Information about Douglas Quaid: family, career at Rekall Inc, coding preferences, and background.
@@ -0,0 +1,29 @@
1
+ ---
2
+ description: "Douglas's working style — hands-on debugging, visual thinker, prefers simple solutions"
3
+ tags: [user, preferences, workflow]
4
+ entities: [Douglas Quaid]
5
+ ---
6
+
7
+ # Working Style and Preferences
8
+
9
+ Douglas is a hands-on engineer who prefers debugging by inspection over abstract analysis. Visual thinker, whiteboard-first approach.
10
+
11
+ ## Development Philosophy
12
+
13
+ - Build it, run it, fix it — iterate fast
14
+ - Prefer simple solutions over clever ones
15
+ - If you can't explain it on a whiteboard, it's too complex
16
+ - Test with real neural data, not synthetic benchmarks
17
+
18
+ ## Coding Style
19
+
20
+ - 2-space indentation
21
+ - TypeScript for platform code, Rust for neural interface drivers
22
+ - Comments for "why," not "what"
23
+ - Line width 100
24
+
25
+ ## Tools
26
+
27
+ - Neovim as primary editor
28
+ - Fish shell
29
+ - Debugs memory coherence issues by visualizing engram topology graphs
@@ -0,0 +1,29 @@
1
+ ---
2
+ description: "Douglas's identity — construction worker turned engineer, obsessed with Mars, memory anomalies"
3
+ tags: [user, identity, career]
4
+ entities: [Douglas Quaid, Rekall Inc, Mars Colony]
5
+ ---
6
+
7
+ # Douglas Quaid
8
+
9
+ Douglas Quaid is a construction worker in Chicago who recently transitioned to engineering at Rekall Inc. Plagued by vivid dreams about Mars that feel more like memories than imagination.
10
+
11
+ ## Background
12
+
13
+ - Construction worker for 8 years, specializing in heavy machinery
14
+ - Self-taught programmer, picked up neural interface engineering
15
+ - Recruited by Rekall Inc for his intuitive understanding of memory coherence
16
+ - Based in Chicago
17
+
18
+ ## Career at Rekall
19
+
20
+ - Lead Engineer on the memory coherence and retrieval systems
21
+ - Designed the conflict detection algorithm for the coherence engine
22
+ - Skeptical of the identity overlay service — thinks it's "playing with fire"
23
+
24
+ ## The Mars Question
25
+
26
+ - Recurring dreams about Mars since childhood
27
+ - Dreams feature specific locations: Venusville, the reactor, the mines
28
+ - Visited Rekall as a client before becoming an employee — session was interrupted
29
+ - Can't shake the feeling that the dreams are real memories, not imagination
@@ -0,0 +1,15 @@
1
+ import { describe, expect, test } from "vitest"
2
+ import { normPath } from "../src/fs.ts"
3
+
4
+ describe("norm", () => {
5
+ test("expands tilde to homedir", () => {
6
+ const result = normPath("~/test")
7
+ expect(result).not.toContain("~")
8
+ expect(result).toMatch(/^\//)
9
+ })
10
+
11
+ test("resolves relative paths", () => {
12
+ const result = normPath("./relative/path")
13
+ expect(result).toMatch(/^\//)
14
+ })
15
+ })
@@ -0,0 +1,190 @@
1
+ import { fileURLToPath } from "node:url"
2
+ import { join } from "pathe"
3
+ import { describe, expect, test } from "vitest"
4
+ import { glob } from "../src/glob.ts"
5
+
6
+ const FIXTURES = join(fileURLToPath(import.meta.url), "..", "fixtures/store")
7
+
8
+ async function collect(opts: Partial<Parameters<typeof glob>[0]> = {}) {
9
+ const results: string[] = []
10
+ for await (const path of glob({ cwd: FIXTURES, ...opts })) {
11
+ results.push(path)
12
+ }
13
+ return results
14
+ }
15
+
16
+ describe("glob", () => {
17
+ test("yields files and directories", async () => {
18
+ const results = await collect()
19
+ expect(results.length).toBeGreaterThan(0)
20
+ expect(results.some((r) => r.endsWith(".md"))).toBe(true)
21
+ expect(results.some((r) => r.endsWith("/"))).toBe(true)
22
+ })
23
+
24
+ test("finds known fixture files", async () => {
25
+ const results = await collect()
26
+ expect(results).toContain("index.md")
27
+ expect(results).toContain("user/")
28
+ expect(results).toContain("user/family.md")
29
+ expect(results).toContain("user/index.md")
30
+ expect(results).toContain("projects/rekall-inc/decisions/no-military.md")
31
+ })
32
+
33
+ test("results are sorted by name", async () => {
34
+ const results = await collect()
35
+ const topLevelFiles = results.filter((r) => !r.includes("/") && r.endsWith(".md"))
36
+ const sorted = [...topLevelFiles].toSorted()
37
+ expect(topLevelFiles).toEqual(sorted)
38
+ })
39
+
40
+ test("respects depth", async () => {
41
+ const results = await collect({ depth: 2 })
42
+ expect(results).toContain("index.md")
43
+ expect(results).toContain("user/")
44
+ expect(results.some((r) => r.startsWith("user/") && r.endsWith(".md"))).toBe(true)
45
+ expect(results.some((r) => r.includes("decisions/"))).toBe(false)
46
+ })
47
+
48
+ test("depth 1 only shows root contents", async () => {
49
+ const results = await collect({ depth: 1 })
50
+ expect(results).toContain("index.md")
51
+ expect(results).toContain("user/")
52
+ expect(results.every((r) => !r.includes("/") || r.endsWith("/"))).toBe(true)
53
+ })
54
+
55
+ test("skips hidden files and dirs by default", async () => {
56
+ const results = await collect()
57
+ expect(results.every((r) => !r.startsWith("."))).toBe(true)
58
+ expect(results.every((r) => !r.includes("/."))).toBe(true)
59
+ expect(results.some((r) => r.includes(".rekal"))).toBe(false)
60
+ })
61
+
62
+ test("includes hidden files when hidden is true", async () => {
63
+ const results = await collect({ depth: 1, hidden: true })
64
+ expect(results.length).toBeGreaterThan(0)
65
+ })
66
+
67
+ test("type=files excludes directories from output", async () => {
68
+ const results = await collect({ type: "file" })
69
+ expect(results.every((r) => !r.endsWith("/"))).toBe(true)
70
+ expect(results.some((r) => r.includes("/"))).toBe(true)
71
+ })
72
+ })
73
+
74
+ describe("glob patterns", () => {
75
+ test("filters to matching files only", async () => {
76
+ const results = await collect({ glob: "**/*.md" })
77
+ const files = results.filter((r) => !r.endsWith("/"))
78
+ expect(files.length).toBeGreaterThan(0)
79
+ expect(files.every((r) => r.endsWith(".md"))).toBe(true)
80
+ })
81
+
82
+ test("excludes non-matching files", async () => {
83
+ const results = await collect({ glob: "**/*.md" })
84
+ expect(results.some((r) => r.endsWith(".yaml"))).toBe(false)
85
+ })
86
+
87
+ test("still includes directories for traversal", async () => {
88
+ const results = await collect({ glob: "**/*.md" })
89
+ expect(results.some((r) => r.endsWith("/"))).toBe(true)
90
+ })
91
+
92
+ test("supports multiple glob patterns", async () => {
93
+ const results = await collect({ glob: ["**/family.md", "**/profile.md"] })
94
+ expect(results.some((r) => r.endsWith("family.md"))).toBe(true)
95
+ expect(results.some((r) => r.endsWith("profile.md"))).toBe(true)
96
+ })
97
+
98
+ test("narrow glob reduces results", async () => {
99
+ const allMd = await collect({ glob: "**/*.md" })
100
+ const userOnly = await collect({ glob: "user/*.md" })
101
+ expect(userOnly.length).toBeLessThan(allMd.length)
102
+ const files = userOnly.filter((r) => !r.endsWith("/"))
103
+ expect(files.every((r) => r.startsWith("user/"))).toBe(true)
104
+ })
105
+ })
106
+
107
+ describe("glob empty directories", () => {
108
+ test("excludes empty directories by default", async () => {
109
+ const results = await collect()
110
+ expect(results.some((r) => r === "empty-dir/")).toBe(false)
111
+ })
112
+
113
+ test("includes empty directories when empty is true", async () => {
114
+ const results = await collect({ empty: true })
115
+ expect(results.some((r) => r === "empty-dir/")).toBe(true)
116
+ })
117
+ })
118
+
119
+ describe("glob onVisit", () => {
120
+ test("calls onVisit for entries", async () => {
121
+ const visited: string[] = []
122
+ await collect({ onVisit: (rel) => visited.push(rel) })
123
+ expect(visited.length).toBeGreaterThan(0)
124
+ })
125
+ })
126
+
127
+ describe("glob onError", () => {
128
+ test("calls onError for inaccessible directories", async () => {
129
+ const errors: { path: string; error: Error }[] = []
130
+ await collect({
131
+ cwd: "/nonexistent-path-that-does-not-exist",
132
+ onError: (path, error) => errors.push({ error, path }),
133
+ })
134
+ expect(errors.length).toBeGreaterThan(0)
135
+ })
136
+ })
137
+
138
+ describe("glob sort", () => {
139
+ test("type sort puts directories before files", async () => {
140
+ const results = await collect({ depth: 1, sort: "type" })
141
+ const firstFile = results.findIndex((r) => !r.endsWith("/"))
142
+ const lastDir = results.findLastIndex((r) => r.endsWith("/"))
143
+ if (firstFile !== -1 && lastDir !== -1) {
144
+ expect(lastDir).toBeLessThan(firstFile)
145
+ }
146
+ })
147
+
148
+ test("name sort is alphabetical", async () => {
149
+ const results = await collect({ depth: 1, sort: "name" })
150
+ const sorted = [...results].toSorted()
151
+ expect(results).toEqual(sorted)
152
+ })
153
+ })
154
+
155
+ describe("glob ignore", () => {
156
+ test("respects exclude rules", async () => {
157
+ const results = await collect({ exclude: ["user/"] })
158
+ expect(results.some((r) => r.startsWith("user/"))).toBe(false)
159
+ expect(results.some((r) => r === "user/")).toBe(false)
160
+ expect(results.some((r) => r.startsWith("projects/"))).toBe(true)
161
+ })
162
+
163
+ test("ignore=false skips all ignore file processing", async () => {
164
+ const withIgnore = await collect()
165
+ const withoutIgnore = await collect({ hidden: true, ignore: false })
166
+ expect(withoutIgnore.length).toBeGreaterThanOrEqual(withIgnore.length)
167
+ })
168
+ })
169
+
170
+ describe("glob nested ignore files", () => {
171
+ const IGNORE_FIXTURES = join(fileURLToPath(import.meta.url), "..", "fixtures/ignore-test")
172
+
173
+ async function collectIgnore(opts: Partial<Parameters<typeof glob>[0]> = {}) {
174
+ const results: string[] = []
175
+ for await (const path of glob({ cwd: IGNORE_FIXTURES, ...opts })) {
176
+ results.push(path)
177
+ }
178
+ return results
179
+ }
180
+
181
+ test("nested gitignore with / prefix only applies relative to its directory", async () => {
182
+ // sub/.gitignore contains "/skip.log"
183
+ // This should ignore sub/skip.log but NOT skip.log at the root
184
+ const results = await collectIgnore({ hidden: false })
185
+ expect(results).toContain("skip.log") // root skip.log should NOT be ignored
186
+ expect(results).not.toContain("sub/skip.log") // sub/skip.log SHOULD be ignored
187
+ expect(results).toContain("keep.md")
188
+ expect(results).toContain("sub/keep.md")
189
+ })
190
+ })
@@ -0,0 +1,177 @@
1
+ import { describe, expect, test } from "vitest"
2
+ import { chunkMarkdown, parseSections } from "../src/md.ts"
3
+
4
+ describe("parse", () => {
5
+ test("parses sections by heading", () => {
6
+ const sections = parseSections(`# Title
7
+
8
+ Intro paragraph.
9
+
10
+ ## Section One
11
+
12
+ Content one.
13
+
14
+ ## Section Two
15
+
16
+ Content two.
17
+ `)
18
+ expect(sections).toHaveLength(3)
19
+ expect(sections[0].headingText).toBe("# Title")
20
+ expect(sections[0].level).toBe(1)
21
+ expect(sections[1].headingText).toBe("## Section One")
22
+ expect(sections[1].level).toBe(2)
23
+ expect(sections[2].headingText).toBe("## Section Two")
24
+ expect(sections[2].level).toBe(2)
25
+ })
26
+
27
+ test("handles content before first heading", () => {
28
+ const sections = parseSections(`Some preamble text.
29
+
30
+ # First Heading
31
+
32
+ Content.
33
+ `)
34
+ expect(sections).toHaveLength(2)
35
+ expect(sections[0].headingText).toBe("")
36
+ expect(sections[0].level).toBe(0)
37
+ expect(sections[0].content.join("\n")).toContain("preamble")
38
+ })
39
+
40
+ test("ignores headings inside code blocks", () => {
41
+ const sections = parseSections(`# Real Heading
42
+
43
+ \`\`\`markdown
44
+ # Not a heading
45
+ ## Also not a heading
46
+ \`\`\`
47
+
48
+ More content.
49
+ `)
50
+ expect(sections).toHaveLength(1)
51
+ expect(sections[0].headingText).toBe("# Real Heading")
52
+ expect(sections[0].content.join("\n")).toContain("# Not a heading")
53
+ })
54
+
55
+ test("handles tilde code fences", () => {
56
+ const sections = parseSections(`# Title
57
+
58
+ ~~~python
59
+ # comment not a heading
60
+ ~~~
61
+
62
+ ## Next Section
63
+
64
+ Content.
65
+ `)
66
+ expect(sections).toHaveLength(2)
67
+ expect(sections[0].headingText).toBe("# Title")
68
+ expect(sections[1].headingText).toBe("## Next Section")
69
+ })
70
+
71
+ test("discards empty sections", () => {
72
+ // A section with a heading but no content lines (immediately followed by another heading)
73
+ const sections = parseSections(`# First
74
+ ## Second
75
+
76
+ Content here.
77
+ `)
78
+ // "First" has "## Second" as its only content line before being replaced,
79
+ // but parse() keeps sections that have the heading line itself as content
80
+ expect(sections).toHaveLength(2)
81
+ expect(sections[0].headingText).toBe("# First")
82
+ expect(sections[1].headingText).toBe("## Second")
83
+ })
84
+
85
+ test("handles nested heading levels", () => {
86
+ const sections = parseSections(`# H1
87
+
88
+ ## H2
89
+
90
+ ### H3
91
+
92
+ Content.
93
+
94
+ ## Another H2
95
+
96
+ More content.
97
+ `)
98
+ expect(sections.map((s) => s.headingText)).toEqual(["# H1", "## H2", "### H3", "## Another H2"])
99
+ expect(sections.map((s) => s.level)).toEqual([1, 2, 3, 2])
100
+ })
101
+ })
102
+
103
+ describe("chunk", () => {
104
+ // Simple mock tokenizer that counts words as tokens
105
+ const mockTokenizer = {
106
+ toks: (text: string) => text.split(/\s+/).filter(Boolean).length,
107
+ } as Parameters<typeof chunkMarkdown>[1]
108
+
109
+ test("small document produces single chunk", () => {
110
+ const result = chunkMarkdown("# Title\n\nShort content.", mockTokenizer, 100)
111
+ expect(result).toHaveLength(1)
112
+ expect(result[0]).toContain("Title")
113
+ expect(result[0]).toContain("Short content.")
114
+ })
115
+
116
+ test("splits at section boundaries", () => {
117
+ const md = `# Title
118
+
119
+ First section with enough words to take some space in the chunk budget.
120
+
121
+ ## Second Section
122
+
123
+ Second section content that should go into a new chunk because of size limits.
124
+ `
125
+ const result = chunkMarkdown(md, mockTokenizer, 15)
126
+ expect(result.length).toBeGreaterThan(1)
127
+ })
128
+
129
+ test("includes parent headings in child chunks", () => {
130
+ const md = `# Parent
131
+
132
+ ## Child
133
+
134
+ Child content that is long enough to be its own chunk separate from the parent section.
135
+ `
136
+ const result = chunkMarkdown(md, mockTokenizer, 15)
137
+ // The child chunk should include "# Parent" for context
138
+ const childChunk = result.find((c) => c.includes("Child content"))
139
+ expect(childChunk).toContain("# Parent")
140
+ })
141
+
142
+ test("handles deeply nested headings", () => {
143
+ const md = `# Level 1
144
+
145
+ ## Level 2
146
+
147
+ ### Level 3
148
+
149
+ Deep content that needs all its parent headings for context.
150
+ `
151
+ const result = chunkMarkdown(md, mockTokenizer, 20)
152
+ const deepChunk = result.find((c) => c.includes("Deep content"))
153
+ expect(deepChunk).toBeDefined()
154
+ if (deepChunk) {
155
+ expect(deepChunk).toContain("# Level 1")
156
+ expect(deepChunk).toContain("## Level 2")
157
+ }
158
+ })
159
+
160
+ test("does not duplicate headings when packing siblings", () => {
161
+ const md = `# Parent
162
+
163
+ ## Child A
164
+
165
+ Short A.
166
+
167
+ ## Child B
168
+
169
+ Short B.
170
+ `
171
+ const result = chunkMarkdown(md, mockTokenizer, 100)
172
+ // Both children fit in one chunk, parent heading should appear only once
173
+ expect(result).toHaveLength(1)
174
+ const count = result[0].split("# Parent").length - 1
175
+ expect(count).toBe(1)
176
+ })
177
+ })