@rekal/mem 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/db-BMh1OP4b.mjs +294 -0
- package/dist/doc-DnYN4jAU.mjs +116 -0
- package/dist/embed-rUMZxqed.mjs +100 -0
- package/dist/fs-DMp26Byo.mjs +32 -0
- package/dist/glob.d.mts +27 -0
- package/dist/glob.mjs +132 -0
- package/dist/index.d.mts +1465 -0
- package/dist/index.mjs +351 -0
- package/dist/llama-CT3dc9Cn.mjs +75 -0
- package/dist/models-DFQSgBNr.mjs +77 -0
- package/dist/openai-j2_2GM4J.mjs +76 -0
- package/dist/progress-B1JdNapX.mjs +263 -0
- package/dist/query-VFSpErTB.mjs +125 -0
- package/dist/runtime.node-DlQPaGrV.mjs +35 -0
- package/dist/search-BllHWtZF.mjs +166 -0
- package/dist/store-DE7S35SS.mjs +137 -0
- package/dist/transformers-CJ3QA2PK.mjs +55 -0
- package/dist/uri-CehXVDGB.mjs +28 -0
- package/dist/util-DNyrmcA3.mjs +11 -0
- package/dist/vfs-CNQbkhsf.mjs +222 -0
- package/foo.ts +3 -0
- package/foo2.ts +20 -0
- package/package.json +61 -0
- package/src/context.ts +77 -0
- package/src/db.ts +464 -0
- package/src/doc.ts +163 -0
- package/src/embed/base.ts +122 -0
- package/src/embed/index.ts +67 -0
- package/src/embed/llama.ts +111 -0
- package/src/embed/models.ts +104 -0
- package/src/embed/openai.ts +95 -0
- package/src/embed/transformers.ts +81 -0
- package/src/frecency.ts +58 -0
- package/src/fs.ts +36 -0
- package/src/glob.ts +163 -0
- package/src/index.ts +15 -0
- package/src/log.ts +60 -0
- package/src/md.ts +204 -0
- package/src/progress.ts +121 -0
- package/src/query.ts +131 -0
- package/src/runtime.bun.ts +33 -0
- package/src/runtime.node.ts +47 -0
- package/src/search.ts +230 -0
- package/src/snippet.ts +248 -0
- package/src/sqlite.ts +1 -0
- package/src/store.ts +180 -0
- package/src/uri.ts +28 -0
- package/src/util.ts +21 -0
- package/src/vfs.ts +257 -0
- package/test/doc.test.ts +61 -0
- package/test/fixtures/ignore-test/keep.md +0 -0
- package/test/fixtures/ignore-test/skip.log +0 -0
- package/test/fixtures/ignore-test/sub/keep.md +0 -0
- package/test/fixtures/store/agent/index.md +9 -0
- package/test/fixtures/store/agent/lessons.md +21 -0
- package/test/fixtures/store/agent/soul.md +28 -0
- package/test/fixtures/store/agent/tools.md +25 -0
- package/test/fixtures/store/concepts/frecency.md +30 -0
- package/test/fixtures/store/concepts/index.md +9 -0
- package/test/fixtures/store/concepts/memory-coherence.md +33 -0
- package/test/fixtures/store/concepts/rag.md +27 -0
- package/test/fixtures/store/index.md +9 -0
- package/test/fixtures/store/projects/index.md +9 -0
- package/test/fixtures/store/projects/rekall-inc/architecture.md +41 -0
- package/test/fixtures/store/projects/rekall-inc/decisions/index.md +9 -0
- package/test/fixtures/store/projects/rekall-inc/decisions/no-military.md +20 -0
- package/test/fixtures/store/projects/rekall-inc/index.md +28 -0
- package/test/fixtures/store/user/family.md +13 -0
- package/test/fixtures/store/user/index.md +9 -0
- package/test/fixtures/store/user/preferences.md +29 -0
- package/test/fixtures/store/user/profile.md +29 -0
- package/test/fs.test.ts +15 -0
- package/test/glob.test.ts +190 -0
- package/test/md.test.ts +177 -0
- package/test/query.test.ts +105 -0
- package/test/uri.test.ts +46 -0
- package/test/util.test.ts +62 -0
- package/test/vfs.test.ts +164 -0
- package/tsconfig.json +3 -0
- package/tsdown.config.ts +8 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Memory coherence — ensuring implanted memories integrate without contradicting existing ones"
|
|
3
|
+
tags: [neuroscience, memory, coherence]
|
|
4
|
+
entities: [Rekall Inc]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Memory Coherence
|
|
8
|
+
|
|
9
|
+
Memory coherence is the measure of how well an implanted memory integrates with a subject's existing memory topology. High coherence means the implant feels natural and indistinguishable from genuine memories. Low coherence leads to rejection cascades.
|
|
10
|
+
|
|
11
|
+
## Coherence Score
|
|
12
|
+
|
|
13
|
+
Measured on a 0-100 scale:
|
|
14
|
+
- **90-100:** Seamless integration, indistinguishable from genuine memory
|
|
15
|
+
- **70-89:** Minor artifacts, occasional deja vu sensations
|
|
16
|
+
- **40-69:** Noticeable inconsistencies, risk of lucid bleed-through
|
|
17
|
+
- **Below 40:** Emergency extraction recommended, ego fragmentation risk
|
|
18
|
+
|
|
19
|
+
## Rejection Cascades
|
|
20
|
+
|
|
21
|
+
When an implanted memory contradicts a genuine one, the brain attempts to resolve the conflict. If unmanaged, this creates a cascade:
|
|
22
|
+
|
|
23
|
+
1. **Micro-contradiction:** Small detail mismatch (e.g., wrong hand dominance in implant)
|
|
24
|
+
2. **Temporal paradox:** Implanted timeline conflicts with genuine timeline
|
|
25
|
+
3. **Identity conflict:** Implanted persona contradicts core identity beliefs
|
|
26
|
+
4. **Cascade:** Multiple contradictions reinforce each other, destabilizing the memory topology
|
|
27
|
+
|
|
28
|
+
## Prevention
|
|
29
|
+
|
|
30
|
+
- Pre-implant neural mapping identifies high-risk memory regions
|
|
31
|
+
- Contextual anchoring ties implants to genuine emotional experiences
|
|
32
|
+
- Gradual integration over multiple sessions for complex implants
|
|
33
|
+
- Real-time monitoring with automatic dampening during the procedure
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Retrieval-Augmented Generation — fetch relevant context before generating a response"
|
|
3
|
+
tags: [ai, retrieval, rag, architecture]
|
|
4
|
+
entities: [RAG, Rekall Inc]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Retrieval-Augmented Generation (RAG)
|
|
8
|
+
|
|
9
|
+
RAG augments LLM generation by retrieving relevant documents from an external store and injecting them into the prompt. Reduces hallucination and enables knowledge that wasn't in training data.
|
|
10
|
+
|
|
11
|
+
## Pipeline
|
|
12
|
+
|
|
13
|
+
1. **Query:** user message or derived search query
|
|
14
|
+
2. **Retrieve:** search a document store (vector similarity, BM25, or hybrid)
|
|
15
|
+
3. **Augment:** inject retrieved documents into the LLM prompt as context
|
|
16
|
+
4. **Generate:** LLM produces a response grounded in the retrieved context
|
|
17
|
+
|
|
18
|
+
## Limitations
|
|
19
|
+
|
|
20
|
+
- Retrieval quality bottlenecks generation quality
|
|
21
|
+
- Chunk boundaries can split relevant context
|
|
22
|
+
- Token budget limits how much context can be injected
|
|
23
|
+
- Flat retrieval misses hierarchical relationships between documents
|
|
24
|
+
|
|
25
|
+
## Evolution
|
|
26
|
+
|
|
27
|
+
Rekall's early retrieval system used basic vector-only search. The current platform improves on this with hybrid BM25+vector retrieval, hierarchical document structure, and frecency-based injection that prioritizes contextually relevant memories over raw similarity scores.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Root of Douglas Quaid's memory store — projects, concepts, and agent configuration"
|
|
3
|
+
tags: [root]
|
|
4
|
+
entities: [Douglas Quaid]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Memory Store
|
|
8
|
+
|
|
9
|
+
Personal knowledge base for Douglas Quaid, covering Rekall Inc projects, technical concepts, and agent configuration.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Douglas's projects — Rekall Inc (memory implant platform), Mars Colony OS"
|
|
3
|
+
tags: [projects]
|
|
4
|
+
entities: [Douglas Quaid, Rekall Inc, Mars Colony OS]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Projects
|
|
8
|
+
|
|
9
|
+
Active and past projects spanning memory technology and colonial infrastructure. Current focus is Rekall Inc's memory implant platform.
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Rekall memory implant architecture — engram pipeline, coherence engine, and neural interface"
|
|
3
|
+
tags: [architecture, memory, neuroscience]
|
|
4
|
+
entities: [Rekall Inc, Engram Pipeline]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Rekall Architecture
|
|
8
|
+
|
|
9
|
+
The memory implant system consists of three layers: the neural interface (hardware), the engram pipeline (synthesis), and the coherence engine (validation).
|
|
10
|
+
|
|
11
|
+
## Neural Interface
|
|
12
|
+
|
|
13
|
+
The chair-mounted neural interface uses quantum resonance imaging to map the client's existing memory topology. This map serves as the scaffold for implant integration.
|
|
14
|
+
|
|
15
|
+
- Non-invasive transcranial stimulation
|
|
16
|
+
- 12-minute mapping session
|
|
17
|
+
- Compatible with 99.7% of neural architectures
|
|
18
|
+
|
|
19
|
+
## Engram Pipeline
|
|
20
|
+
|
|
21
|
+
Raw experience data is synthesized into engrams — self-consistent memory units that integrate with the client's existing neural patterns.
|
|
22
|
+
|
|
23
|
+
1. **Capture:** Record or generate base experience data
|
|
24
|
+
2. **Personalize:** Adapt sensory details to client's neural profile
|
|
25
|
+
3. **Contextualize:** Weave in emotional responses and temporal anchors
|
|
26
|
+
4. **Compress:** Optimize for cortical buffer storage limits
|
|
27
|
+
|
|
28
|
+
## Coherence Engine
|
|
29
|
+
|
|
30
|
+
The coherence engine monitors implanted memories for integration stability. Rejection cascades occur when implanted memories conflict with genuine ones.
|
|
31
|
+
|
|
32
|
+
- Real-time conflict detection during implantation
|
|
33
|
+
- Post-implant monitoring for 72 hours
|
|
34
|
+
- Automatic dampening of contradiction signals
|
|
35
|
+
- Emergency extraction protocol if coherence drops below 40%
|
|
36
|
+
|
|
37
|
+
## Known Issues
|
|
38
|
+
|
|
39
|
+
- Ego death risk at 0.003% for identity overlay services
|
|
40
|
+
- Lucid bleed-through in 12% of vacation implants (client becomes aware the memory is synthetic)
|
|
41
|
+
- Mars packages have elevated rejection rates due to atmospheric sensory mismatch
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Architecture decisions for Rekall Inc — ethics, technology choices, safety protocols"
|
|
3
|
+
tags: [decisions, architecture]
|
|
4
|
+
entities: [Rekall Inc]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Rekall Architecture Decisions
|
|
8
|
+
|
|
9
|
+
Records of significant design and policy decisions, documenting the reasoning behind key choices in memory implant technology and business operations.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Decision: Rekall does not provide military memory implants — ethical and liability concerns"
|
|
3
|
+
tags: [decision, ethics, policy]
|
|
4
|
+
entities: [Rekall Inc, Cohaagen]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Decision: No Military Memory Implants
|
|
8
|
+
|
|
9
|
+
Rekall does not offer combat training or tactical memory implants to military clients, despite significant revenue potential.
|
|
10
|
+
|
|
11
|
+
## Rationale
|
|
12
|
+
|
|
13
|
+
- Combat memories carry high rejection cascade risk due to extreme emotional content
|
|
14
|
+
- Liability exposure for implanted skills used in real combat situations
|
|
15
|
+
- Ethical concerns about creating soldiers with artificial experience
|
|
16
|
+
- Regulatory scrutiny from the Colonial Authority
|
|
17
|
+
|
|
18
|
+
## Consequence
|
|
19
|
+
|
|
20
|
+
Cohaagen has pushed back on this repeatedly, citing government contract opportunities. The board maintains the policy but reviews it annually. Skill implants for self-defense (civilian grade) remain available.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Rekall Inc — memory implant platform for vacation experiences, skill acquisition, and identity services"
|
|
3
|
+
tags: [project, ai, memory, neuroscience]
|
|
4
|
+
entities: [Rekall Inc, Douglas Quaid, Dr. Edgemar, Cohaagen]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Rekall Inc
|
|
8
|
+
|
|
9
|
+
Memory implant company specializing in artificial experience creation. Clients choose from vacation packages, skill implants, and custom identity overlays. Founded in 2078, headquartered in Chicago.
|
|
10
|
+
|
|
11
|
+
## Services
|
|
12
|
+
|
|
13
|
+
- **Vacation memories:** Experience Mars, Saturn's rings, or deep ocean exploration without leaving your chair
|
|
14
|
+
- **Skill acquisition:** Instant expertise in languages, martial arts, or technical fields
|
|
15
|
+
- **Identity services:** Temporary persona overlays for entertainment or therapeutic purposes
|
|
16
|
+
|
|
17
|
+
## Technology Stack
|
|
18
|
+
|
|
19
|
+
- Neural mapping via quantum resonance imaging
|
|
20
|
+
- Engram synthesis using transformer-based memory models
|
|
21
|
+
- Real-time coherence validation to prevent rejection cascades
|
|
22
|
+
- Distributed storage across redundant cortical buffers
|
|
23
|
+
|
|
24
|
+
## Team
|
|
25
|
+
|
|
26
|
+
- **Dr. Edgemar** — Chief Science Officer, pioneer in synthetic engram research
|
|
27
|
+
- **Douglas Quaid** — Lead Engineer, memory coherence and retrieval systems
|
|
28
|
+
- **Cohaagen** — CEO, business strategy and government contracts
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Douglas's family — wife Lori, recurring dreams about a woman named Melina on Mars"
|
|
3
|
+
tags: [user, family, personal]
|
|
4
|
+
entities: [Douglas Quaid, Lori Quaid, Melina]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Douglas's Family
|
|
8
|
+
|
|
9
|
+
Douglas lives with his wife Lori in their apartment in Chicago. Married for several years, though Douglas has been having recurring dreams about Mars and a mysterious woman named Melina.
|
|
10
|
+
|
|
11
|
+
- **Wife:** Lori Quaid
|
|
12
|
+
- **Home:** Chicago, apartment near the transit hub
|
|
13
|
+
- **Recurring dream:** A woman named Melina, red Martian sky, a feeling of being someone else entirely
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Personal information about Douglas Quaid — family, profile, preferences"
|
|
3
|
+
tags: [user, personal]
|
|
4
|
+
entities: [Douglas Quaid]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Douglas Quaid — Personal Information
|
|
8
|
+
|
|
9
|
+
Information about Douglas Quaid: family, career at Rekall Inc, coding preferences, and background.
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Douglas's working style — hands-on debugging, visual thinker, prefers simple solutions"
|
|
3
|
+
tags: [user, preferences, workflow]
|
|
4
|
+
entities: [Douglas Quaid]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Working Style and Preferences
|
|
8
|
+
|
|
9
|
+
Douglas is a hands-on engineer who prefers debugging by inspection over abstract analysis. Visual thinker, whiteboard-first approach.
|
|
10
|
+
|
|
11
|
+
## Development Philosophy
|
|
12
|
+
|
|
13
|
+
- Build it, run it, fix it — iterate fast
|
|
14
|
+
- Prefer simple solutions over clever ones
|
|
15
|
+
- If you can't explain it on a whiteboard, it's too complex
|
|
16
|
+
- Test with real neural data, not synthetic benchmarks
|
|
17
|
+
|
|
18
|
+
## Coding Style
|
|
19
|
+
|
|
20
|
+
- 2-space indentation
|
|
21
|
+
- TypeScript for platform code, Rust for neural interface drivers
|
|
22
|
+
- Comments for "why," not "what"
|
|
23
|
+
- Line width 100
|
|
24
|
+
|
|
25
|
+
## Tools
|
|
26
|
+
|
|
27
|
+
- Neovim as primary editor
|
|
28
|
+
- Fish shell
|
|
29
|
+
- Debugs memory coherence issues by visualizing engram topology graphs
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Douglas's identity — construction worker turned engineer, obsessed with Mars, memory anomalies"
|
|
3
|
+
tags: [user, identity, career]
|
|
4
|
+
entities: [Douglas Quaid, Rekall Inc, Mars Colony]
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Douglas Quaid
|
|
8
|
+
|
|
9
|
+
Douglas Quaid is a construction worker in Chicago who recently transitioned to engineering at Rekall Inc. Plagued by vivid dreams about Mars that feel more like memories than imagination.
|
|
10
|
+
|
|
11
|
+
## Background
|
|
12
|
+
|
|
13
|
+
- Construction worker for 8 years, specializing in heavy machinery
|
|
14
|
+
- Self-taught programmer, picked up neural interface engineering
|
|
15
|
+
- Recruited by Rekall Inc for his intuitive understanding of memory coherence
|
|
16
|
+
- Based in Chicago
|
|
17
|
+
|
|
18
|
+
## Career at Rekall
|
|
19
|
+
|
|
20
|
+
- Lead Engineer on the memory coherence and retrieval systems
|
|
21
|
+
- Designed the conflict detection algorithm for the coherence engine
|
|
22
|
+
- Skeptical of the identity overlay service — thinks it's "playing with fire"
|
|
23
|
+
|
|
24
|
+
## The Mars Question
|
|
25
|
+
|
|
26
|
+
- Recurring dreams about Mars since childhood
|
|
27
|
+
- Dreams feature specific locations: Venusville, the reactor, the mines
|
|
28
|
+
- Visited Rekall as a client before becoming an employee — session was interrupted
|
|
29
|
+
- Can't shake the feeling that the dreams are real memories, not imagination
|
package/test/fs.test.ts
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { describe, expect, test } from "vitest"
|
|
2
|
+
import { normPath } from "../src/fs.ts"
|
|
3
|
+
|
|
4
|
+
describe("norm", () => {
|
|
5
|
+
test("expands tilde to homedir", () => {
|
|
6
|
+
const result = normPath("~/test")
|
|
7
|
+
expect(result).not.toContain("~")
|
|
8
|
+
expect(result).toMatch(/^\//)
|
|
9
|
+
})
|
|
10
|
+
|
|
11
|
+
test("resolves relative paths", () => {
|
|
12
|
+
const result = normPath("./relative/path")
|
|
13
|
+
expect(result).toMatch(/^\//)
|
|
14
|
+
})
|
|
15
|
+
})
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import { fileURLToPath } from "node:url"
|
|
2
|
+
import { join } from "pathe"
|
|
3
|
+
import { describe, expect, test } from "vitest"
|
|
4
|
+
import { glob } from "../src/glob.ts"
|
|
5
|
+
|
|
6
|
+
const FIXTURES = join(fileURLToPath(import.meta.url), "..", "fixtures/store")
|
|
7
|
+
|
|
8
|
+
async function collect(opts: Partial<Parameters<typeof glob>[0]> = {}) {
|
|
9
|
+
const results: string[] = []
|
|
10
|
+
for await (const path of glob({ cwd: FIXTURES, ...opts })) {
|
|
11
|
+
results.push(path)
|
|
12
|
+
}
|
|
13
|
+
return results
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
describe("glob", () => {
|
|
17
|
+
test("yields files and directories", async () => {
|
|
18
|
+
const results = await collect()
|
|
19
|
+
expect(results.length).toBeGreaterThan(0)
|
|
20
|
+
expect(results.some((r) => r.endsWith(".md"))).toBe(true)
|
|
21
|
+
expect(results.some((r) => r.endsWith("/"))).toBe(true)
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
test("finds known fixture files", async () => {
|
|
25
|
+
const results = await collect()
|
|
26
|
+
expect(results).toContain("index.md")
|
|
27
|
+
expect(results).toContain("user/")
|
|
28
|
+
expect(results).toContain("user/family.md")
|
|
29
|
+
expect(results).toContain("user/index.md")
|
|
30
|
+
expect(results).toContain("projects/rekall-inc/decisions/no-military.md")
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
test("results are sorted by name", async () => {
|
|
34
|
+
const results = await collect()
|
|
35
|
+
const topLevelFiles = results.filter((r) => !r.includes("/") && r.endsWith(".md"))
|
|
36
|
+
const sorted = [...topLevelFiles].toSorted()
|
|
37
|
+
expect(topLevelFiles).toEqual(sorted)
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
test("respects depth", async () => {
|
|
41
|
+
const results = await collect({ depth: 2 })
|
|
42
|
+
expect(results).toContain("index.md")
|
|
43
|
+
expect(results).toContain("user/")
|
|
44
|
+
expect(results.some((r) => r.startsWith("user/") && r.endsWith(".md"))).toBe(true)
|
|
45
|
+
expect(results.some((r) => r.includes("decisions/"))).toBe(false)
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
test("depth 1 only shows root contents", async () => {
|
|
49
|
+
const results = await collect({ depth: 1 })
|
|
50
|
+
expect(results).toContain("index.md")
|
|
51
|
+
expect(results).toContain("user/")
|
|
52
|
+
expect(results.every((r) => !r.includes("/") || r.endsWith("/"))).toBe(true)
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
test("skips hidden files and dirs by default", async () => {
|
|
56
|
+
const results = await collect()
|
|
57
|
+
expect(results.every((r) => !r.startsWith("."))).toBe(true)
|
|
58
|
+
expect(results.every((r) => !r.includes("/."))).toBe(true)
|
|
59
|
+
expect(results.some((r) => r.includes(".rekal"))).toBe(false)
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
test("includes hidden files when hidden is true", async () => {
|
|
63
|
+
const results = await collect({ depth: 1, hidden: true })
|
|
64
|
+
expect(results.length).toBeGreaterThan(0)
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
test("type=files excludes directories from output", async () => {
|
|
68
|
+
const results = await collect({ type: "file" })
|
|
69
|
+
expect(results.every((r) => !r.endsWith("/"))).toBe(true)
|
|
70
|
+
expect(results.some((r) => r.includes("/"))).toBe(true)
|
|
71
|
+
})
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
describe("glob patterns", () => {
|
|
75
|
+
test("filters to matching files only", async () => {
|
|
76
|
+
const results = await collect({ glob: "**/*.md" })
|
|
77
|
+
const files = results.filter((r) => !r.endsWith("/"))
|
|
78
|
+
expect(files.length).toBeGreaterThan(0)
|
|
79
|
+
expect(files.every((r) => r.endsWith(".md"))).toBe(true)
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
test("excludes non-matching files", async () => {
|
|
83
|
+
const results = await collect({ glob: "**/*.md" })
|
|
84
|
+
expect(results.some((r) => r.endsWith(".yaml"))).toBe(false)
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
test("still includes directories for traversal", async () => {
|
|
88
|
+
const results = await collect({ glob: "**/*.md" })
|
|
89
|
+
expect(results.some((r) => r.endsWith("/"))).toBe(true)
|
|
90
|
+
})
|
|
91
|
+
|
|
92
|
+
test("supports multiple glob patterns", async () => {
|
|
93
|
+
const results = await collect({ glob: ["**/family.md", "**/profile.md"] })
|
|
94
|
+
expect(results.some((r) => r.endsWith("family.md"))).toBe(true)
|
|
95
|
+
expect(results.some((r) => r.endsWith("profile.md"))).toBe(true)
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
test("narrow glob reduces results", async () => {
|
|
99
|
+
const allMd = await collect({ glob: "**/*.md" })
|
|
100
|
+
const userOnly = await collect({ glob: "user/*.md" })
|
|
101
|
+
expect(userOnly.length).toBeLessThan(allMd.length)
|
|
102
|
+
const files = userOnly.filter((r) => !r.endsWith("/"))
|
|
103
|
+
expect(files.every((r) => r.startsWith("user/"))).toBe(true)
|
|
104
|
+
})
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
describe("glob empty directories", () => {
|
|
108
|
+
test("excludes empty directories by default", async () => {
|
|
109
|
+
const results = await collect()
|
|
110
|
+
expect(results.some((r) => r === "empty-dir/")).toBe(false)
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
test("includes empty directories when empty is true", async () => {
|
|
114
|
+
const results = await collect({ empty: true })
|
|
115
|
+
expect(results.some((r) => r === "empty-dir/")).toBe(true)
|
|
116
|
+
})
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
describe("glob onVisit", () => {
|
|
120
|
+
test("calls onVisit for entries", async () => {
|
|
121
|
+
const visited: string[] = []
|
|
122
|
+
await collect({ onVisit: (rel) => visited.push(rel) })
|
|
123
|
+
expect(visited.length).toBeGreaterThan(0)
|
|
124
|
+
})
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
describe("glob onError", () => {
|
|
128
|
+
test("calls onError for inaccessible directories", async () => {
|
|
129
|
+
const errors: { path: string; error: Error }[] = []
|
|
130
|
+
await collect({
|
|
131
|
+
cwd: "/nonexistent-path-that-does-not-exist",
|
|
132
|
+
onError: (path, error) => errors.push({ error, path }),
|
|
133
|
+
})
|
|
134
|
+
expect(errors.length).toBeGreaterThan(0)
|
|
135
|
+
})
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
describe("glob sort", () => {
|
|
139
|
+
test("type sort puts directories before files", async () => {
|
|
140
|
+
const results = await collect({ depth: 1, sort: "type" })
|
|
141
|
+
const firstFile = results.findIndex((r) => !r.endsWith("/"))
|
|
142
|
+
const lastDir = results.findLastIndex((r) => r.endsWith("/"))
|
|
143
|
+
if (firstFile !== -1 && lastDir !== -1) {
|
|
144
|
+
expect(lastDir).toBeLessThan(firstFile)
|
|
145
|
+
}
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
test("name sort is alphabetical", async () => {
|
|
149
|
+
const results = await collect({ depth: 1, sort: "name" })
|
|
150
|
+
const sorted = [...results].toSorted()
|
|
151
|
+
expect(results).toEqual(sorted)
|
|
152
|
+
})
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
describe("glob ignore", () => {
|
|
156
|
+
test("respects exclude rules", async () => {
|
|
157
|
+
const results = await collect({ exclude: ["user/"] })
|
|
158
|
+
expect(results.some((r) => r.startsWith("user/"))).toBe(false)
|
|
159
|
+
expect(results.some((r) => r === "user/")).toBe(false)
|
|
160
|
+
expect(results.some((r) => r.startsWith("projects/"))).toBe(true)
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
test("ignore=false skips all ignore file processing", async () => {
|
|
164
|
+
const withIgnore = await collect()
|
|
165
|
+
const withoutIgnore = await collect({ hidden: true, ignore: false })
|
|
166
|
+
expect(withoutIgnore.length).toBeGreaterThanOrEqual(withIgnore.length)
|
|
167
|
+
})
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
describe("glob nested ignore files", () => {
|
|
171
|
+
const IGNORE_FIXTURES = join(fileURLToPath(import.meta.url), "..", "fixtures/ignore-test")
|
|
172
|
+
|
|
173
|
+
async function collectIgnore(opts: Partial<Parameters<typeof glob>[0]> = {}) {
|
|
174
|
+
const results: string[] = []
|
|
175
|
+
for await (const path of glob({ cwd: IGNORE_FIXTURES, ...opts })) {
|
|
176
|
+
results.push(path)
|
|
177
|
+
}
|
|
178
|
+
return results
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
test("nested gitignore with / prefix only applies relative to its directory", async () => {
|
|
182
|
+
// sub/.gitignore contains "/skip.log"
|
|
183
|
+
// This should ignore sub/skip.log but NOT skip.log at the root
|
|
184
|
+
const results = await collectIgnore({ hidden: false })
|
|
185
|
+
expect(results).toContain("skip.log") // root skip.log should NOT be ignored
|
|
186
|
+
expect(results).not.toContain("sub/skip.log") // sub/skip.log SHOULD be ignored
|
|
187
|
+
expect(results).toContain("keep.md")
|
|
188
|
+
expect(results).toContain("sub/keep.md")
|
|
189
|
+
})
|
|
190
|
+
})
|
package/test/md.test.ts
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import { describe, expect, test } from "vitest"
|
|
2
|
+
import { chunkMarkdown, parseSections } from "../src/md.ts"
|
|
3
|
+
|
|
4
|
+
describe("parse", () => {
|
|
5
|
+
test("parses sections by heading", () => {
|
|
6
|
+
const sections = parseSections(`# Title
|
|
7
|
+
|
|
8
|
+
Intro paragraph.
|
|
9
|
+
|
|
10
|
+
## Section One
|
|
11
|
+
|
|
12
|
+
Content one.
|
|
13
|
+
|
|
14
|
+
## Section Two
|
|
15
|
+
|
|
16
|
+
Content two.
|
|
17
|
+
`)
|
|
18
|
+
expect(sections).toHaveLength(3)
|
|
19
|
+
expect(sections[0].headingText).toBe("# Title")
|
|
20
|
+
expect(sections[0].level).toBe(1)
|
|
21
|
+
expect(sections[1].headingText).toBe("## Section One")
|
|
22
|
+
expect(sections[1].level).toBe(2)
|
|
23
|
+
expect(sections[2].headingText).toBe("## Section Two")
|
|
24
|
+
expect(sections[2].level).toBe(2)
|
|
25
|
+
})
|
|
26
|
+
|
|
27
|
+
test("handles content before first heading", () => {
|
|
28
|
+
const sections = parseSections(`Some preamble text.
|
|
29
|
+
|
|
30
|
+
# First Heading
|
|
31
|
+
|
|
32
|
+
Content.
|
|
33
|
+
`)
|
|
34
|
+
expect(sections).toHaveLength(2)
|
|
35
|
+
expect(sections[0].headingText).toBe("")
|
|
36
|
+
expect(sections[0].level).toBe(0)
|
|
37
|
+
expect(sections[0].content.join("\n")).toContain("preamble")
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
test("ignores headings inside code blocks", () => {
|
|
41
|
+
const sections = parseSections(`# Real Heading
|
|
42
|
+
|
|
43
|
+
\`\`\`markdown
|
|
44
|
+
# Not a heading
|
|
45
|
+
## Also not a heading
|
|
46
|
+
\`\`\`
|
|
47
|
+
|
|
48
|
+
More content.
|
|
49
|
+
`)
|
|
50
|
+
expect(sections).toHaveLength(1)
|
|
51
|
+
expect(sections[0].headingText).toBe("# Real Heading")
|
|
52
|
+
expect(sections[0].content.join("\n")).toContain("# Not a heading")
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
test("handles tilde code fences", () => {
|
|
56
|
+
const sections = parseSections(`# Title
|
|
57
|
+
|
|
58
|
+
~~~python
|
|
59
|
+
# comment not a heading
|
|
60
|
+
~~~
|
|
61
|
+
|
|
62
|
+
## Next Section
|
|
63
|
+
|
|
64
|
+
Content.
|
|
65
|
+
`)
|
|
66
|
+
expect(sections).toHaveLength(2)
|
|
67
|
+
expect(sections[0].headingText).toBe("# Title")
|
|
68
|
+
expect(sections[1].headingText).toBe("## Next Section")
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
test("discards empty sections", () => {
|
|
72
|
+
// A section with a heading but no content lines (immediately followed by another heading)
|
|
73
|
+
const sections = parseSections(`# First
|
|
74
|
+
## Second
|
|
75
|
+
|
|
76
|
+
Content here.
|
|
77
|
+
`)
|
|
78
|
+
// "First" has "## Second" as its only content line before being replaced,
|
|
79
|
+
// but parse() keeps sections that have the heading line itself as content
|
|
80
|
+
expect(sections).toHaveLength(2)
|
|
81
|
+
expect(sections[0].headingText).toBe("# First")
|
|
82
|
+
expect(sections[1].headingText).toBe("## Second")
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
test("handles nested heading levels", () => {
|
|
86
|
+
const sections = parseSections(`# H1
|
|
87
|
+
|
|
88
|
+
## H2
|
|
89
|
+
|
|
90
|
+
### H3
|
|
91
|
+
|
|
92
|
+
Content.
|
|
93
|
+
|
|
94
|
+
## Another H2
|
|
95
|
+
|
|
96
|
+
More content.
|
|
97
|
+
`)
|
|
98
|
+
expect(sections.map((s) => s.headingText)).toEqual(["# H1", "## H2", "### H3", "## Another H2"])
|
|
99
|
+
expect(sections.map((s) => s.level)).toEqual([1, 2, 3, 2])
|
|
100
|
+
})
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
describe("chunk", () => {
|
|
104
|
+
// Simple mock tokenizer that counts words as tokens
|
|
105
|
+
const mockTokenizer = {
|
|
106
|
+
toks: (text: string) => text.split(/\s+/).filter(Boolean).length,
|
|
107
|
+
} as Parameters<typeof chunkMarkdown>[1]
|
|
108
|
+
|
|
109
|
+
test("small document produces single chunk", () => {
|
|
110
|
+
const result = chunkMarkdown("# Title\n\nShort content.", mockTokenizer, 100)
|
|
111
|
+
expect(result).toHaveLength(1)
|
|
112
|
+
expect(result[0]).toContain("Title")
|
|
113
|
+
expect(result[0]).toContain("Short content.")
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
test("splits at section boundaries", () => {
|
|
117
|
+
const md = `# Title
|
|
118
|
+
|
|
119
|
+
First section with enough words to take some space in the chunk budget.
|
|
120
|
+
|
|
121
|
+
## Second Section
|
|
122
|
+
|
|
123
|
+
Second section content that should go into a new chunk because of size limits.
|
|
124
|
+
`
|
|
125
|
+
const result = chunkMarkdown(md, mockTokenizer, 15)
|
|
126
|
+
expect(result.length).toBeGreaterThan(1)
|
|
127
|
+
})
|
|
128
|
+
|
|
129
|
+
test("includes parent headings in child chunks", () => {
|
|
130
|
+
const md = `# Parent
|
|
131
|
+
|
|
132
|
+
## Child
|
|
133
|
+
|
|
134
|
+
Child content that is long enough to be its own chunk separate from the parent section.
|
|
135
|
+
`
|
|
136
|
+
const result = chunkMarkdown(md, mockTokenizer, 15)
|
|
137
|
+
// The child chunk should include "# Parent" for context
|
|
138
|
+
const childChunk = result.find((c) => c.includes("Child content"))
|
|
139
|
+
expect(childChunk).toContain("# Parent")
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
test("handles deeply nested headings", () => {
|
|
143
|
+
const md = `# Level 1
|
|
144
|
+
|
|
145
|
+
## Level 2
|
|
146
|
+
|
|
147
|
+
### Level 3
|
|
148
|
+
|
|
149
|
+
Deep content that needs all its parent headings for context.
|
|
150
|
+
`
|
|
151
|
+
const result = chunkMarkdown(md, mockTokenizer, 20)
|
|
152
|
+
const deepChunk = result.find((c) => c.includes("Deep content"))
|
|
153
|
+
expect(deepChunk).toBeDefined()
|
|
154
|
+
if (deepChunk) {
|
|
155
|
+
expect(deepChunk).toContain("# Level 1")
|
|
156
|
+
expect(deepChunk).toContain("## Level 2")
|
|
157
|
+
}
|
|
158
|
+
})
|
|
159
|
+
|
|
160
|
+
test("does not duplicate headings when packing siblings", () => {
|
|
161
|
+
const md = `# Parent
|
|
162
|
+
|
|
163
|
+
## Child A
|
|
164
|
+
|
|
165
|
+
Short A.
|
|
166
|
+
|
|
167
|
+
## Child B
|
|
168
|
+
|
|
169
|
+
Short B.
|
|
170
|
+
`
|
|
171
|
+
const result = chunkMarkdown(md, mockTokenizer, 100)
|
|
172
|
+
// Both children fit in one chunk, parent heading should appear only once
|
|
173
|
+
expect(result).toHaveLength(1)
|
|
174
|
+
const count = result[0].split("# Parent").length - 1
|
|
175
|
+
expect(count).toBe(1)
|
|
176
|
+
})
|
|
177
|
+
})
|