mdcontext 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.github/workflows/ci.yml +83 -0
  4. package/.github/workflows/release.yml +113 -0
  5. package/.tldrignore +112 -0
  6. package/AGENTS.md +46 -0
  7. package/BACKLOG.md +338 -0
  8. package/README.md +231 -11
  9. package/biome.json +36 -0
  10. package/cspell.config.yaml +14 -0
  11. package/dist/chunk-KRYIFLQR.js +92 -0
  12. package/dist/chunk-S7E6TFX6.js +742 -0
  13. package/dist/chunk-VVTGZNBT.js +1519 -0
  14. package/dist/cli/main.d.ts +1 -0
  15. package/dist/cli/main.js +2015 -0
  16. package/dist/index.d.ts +266 -0
  17. package/dist/index.js +86 -0
  18. package/dist/mcp/server.d.ts +1 -0
  19. package/dist/mcp/server.js +376 -0
  20. package/docs/019-USAGE.md +586 -0
  21. package/docs/020-current-implementation.md +364 -0
  22. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  23. package/docs/BACKLOG.md +80 -0
  24. package/docs/DESIGN.md +439 -0
  25. package/docs/PROJECT.md +88 -0
  26. package/docs/ROADMAP.md +407 -0
  27. package/docs/test-links.md +9 -0
  28. package/package.json +69 -10
  29. package/pnpm-workspace.yaml +5 -0
  30. package/research/config-analysis/01-current-implementation.md +470 -0
  31. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  32. package/research/config-analysis/03-task-candidates.md +715 -0
  33. package/research/config-analysis/033-research-configuration-management.md +828 -0
  34. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  35. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  36. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  37. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  38. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  39. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  40. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  41. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  42. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  43. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  44. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  45. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  46. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  47. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  48. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  49. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  50. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  51. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  52. package/research/effect-cli-error-handling.md +845 -0
  53. package/research/effect-errors-as-values.md +943 -0
  54. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  55. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  56. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  57. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  58. package/research/mdcontext-error-analysis.md +521 -0
  59. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  60. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  61. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  62. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  63. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  64. package/research/semantic-search/002-research-embedding-models.md +490 -0
  65. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  66. package/research/semantic-search/004-research-vector-search.md +841 -0
  67. package/research/semantic-search/032-research-semantic-search.md +427 -0
  68. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  69. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  70. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  71. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  72. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  73. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  74. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  75. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  76. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  77. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  78. package/scripts/rebuild-hnswlib.js +63 -0
  79. package/src/cli/argv-preprocessor.test.ts +210 -0
  80. package/src/cli/argv-preprocessor.ts +202 -0
  81. package/src/cli/cli.test.ts +430 -0
  82. package/src/cli/commands/backlinks.ts +54 -0
  83. package/src/cli/commands/context.ts +197 -0
  84. package/src/cli/commands/index-cmd.ts +300 -0
  85. package/src/cli/commands/index.ts +13 -0
  86. package/src/cli/commands/links.ts +52 -0
  87. package/src/cli/commands/search.ts +451 -0
  88. package/src/cli/commands/stats.ts +146 -0
  89. package/src/cli/commands/tree.ts +107 -0
  90. package/src/cli/flag-schemas.ts +275 -0
  91. package/src/cli/help.ts +386 -0
  92. package/src/cli/index.ts +9 -0
  93. package/src/cli/main.ts +145 -0
  94. package/src/cli/options.ts +31 -0
  95. package/src/cli/typo-suggester.test.ts +105 -0
  96. package/src/cli/typo-suggester.ts +130 -0
  97. package/src/cli/utils.ts +126 -0
  98. package/src/core/index.ts +1 -0
  99. package/src/core/types.ts +140 -0
  100. package/src/embeddings/index.ts +8 -0
  101. package/src/embeddings/openai-provider.ts +165 -0
  102. package/src/embeddings/semantic-search.ts +583 -0
  103. package/src/embeddings/types.ts +82 -0
  104. package/src/embeddings/vector-store.ts +299 -0
  105. package/src/index/index.ts +4 -0
  106. package/src/index/indexer.ts +446 -0
  107. package/src/index/storage.ts +196 -0
  108. package/src/index/types.ts +109 -0
  109. package/src/index/watcher.ts +131 -0
  110. package/src/index.ts +8 -0
  111. package/src/mcp/server.ts +483 -0
  112. package/src/parser/index.ts +1 -0
  113. package/src/parser/parser.test.ts +291 -0
  114. package/src/parser/parser.ts +395 -0
  115. package/src/parser/section-filter.ts +270 -0
  116. package/src/search/query-parser.test.ts +260 -0
  117. package/src/search/query-parser.ts +319 -0
  118. package/src/search/searcher.test.ts +182 -0
  119. package/src/search/searcher.ts +602 -0
  120. package/src/summarize/budget-bugs.test.ts +620 -0
  121. package/src/summarize/formatters.ts +419 -0
  122. package/src/summarize/index.ts +20 -0
  123. package/src/summarize/summarizer.test.ts +275 -0
  124. package/src/summarize/summarizer.ts +528 -0
  125. package/src/summarize/verify-bugs.test.ts +238 -0
  126. package/src/utils/index.ts +1 -0
  127. package/src/utils/tokens.test.ts +142 -0
  128. package/src/utils/tokens.ts +186 -0
  129. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  130. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  131. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  132. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +233 -0
  133. package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
  134. package/tests/fixtures/cli/.mdcontext/vectors.meta.json +1264 -0
  135. package/tests/fixtures/cli/README.md +9 -0
  136. package/tests/fixtures/cli/api-reference.md +11 -0
  137. package/tests/fixtures/cli/getting-started.md +11 -0
  138. package/tsconfig.json +26 -0
  139. package/vitest.config.ts +21 -0
  140. package/vitest.setup.ts +12 -0
@@ -0,0 +1,742 @@
1
+ // src/utils/tokens.ts
2
+ import { Effect } from "effect";
3
+ var encoder = null;
4
+ var getEncoder = Effect.gen(function* () {
5
+ if (encoder === null) {
6
+ const { get_encoding } = yield* Effect.promise(() => import("tiktoken"));
7
+ encoder = get_encoding("cl100k_base");
8
+ }
9
+ return encoder;
10
+ });
11
+ var countTokens = (text) => Effect.gen(function* () {
12
+ const enc = yield* getEncoder;
13
+ const tokens = enc.encode(text);
14
+ return tokens.length;
15
+ });
16
+ var countTokensApprox = (text) => {
17
+ if (text.length === 0) return 0;
18
+ const cjkPattern = /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af\u3400-\u4dbf]/g;
19
+ const cjkMatches = text.match(cjkPattern) || [];
20
+ const cjkCount = cjkMatches.length;
21
+ const emojiPattern = /[\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1F600}-\u{1F64F}\u{1F680}-\u{1F6FF}\u{2300}-\u{23FF}\u{2190}-\u{21FF}\u{25A0}-\u{25FF}\u{2B00}-\u{2BFF}]/gu;
22
+ const emojiMatches = text.match(emojiPattern) || [];
23
+ const emojiCount = emojiMatches.length;
24
+ const variationSelectorPattern = /[\uFE0E\uFE0F]/g;
25
+ const variationMatches = text.match(variationSelectorPattern) || [];
26
+ const variationCount = variationMatches.length;
27
+ let workingText = text;
28
+ const codeBlockMatches = text.match(/```[\s\S]*?```/g) || [];
29
+ let codeBlockTokens = 0;
30
+ for (const block of codeBlockMatches) {
31
+ const hasLang = /^```\w+/.test(block);
32
+ const overhead = hasLang ? 6 : 4;
33
+ const content = block.replace(/^```\w*\n?/, "").replace(/\n?```$/, "");
34
+ const contentNewlines = (content.match(/\n/g) || []).length;
35
+ const contentTokens = content.length > 0 ? content.length / 2.5 : 0;
36
+ codeBlockTokens += Math.max(
37
+ overhead,
38
+ overhead + contentNewlines + contentTokens
39
+ );
40
+ workingText = workingText.replace(block, "");
41
+ }
42
+ const inlineCodeMatches = workingText.match(/`[^`]+`/g) || [];
43
+ let inlineCodeTokens = 0;
44
+ for (const match of inlineCodeMatches) {
45
+ const content = match.slice(1, -1);
46
+ inlineCodeTokens += 2 + content.length / 2.5;
47
+ workingText = workingText.replace(match, "");
48
+ }
49
+ const pathMatches = workingText.match(/(?:\/[\w.-]+)+/g) || [];
50
+ let pathTokens = 0;
51
+ for (const match of pathMatches) {
52
+ const slashCount = (match.match(/\//g) || []).length;
53
+ const contentLength = match.length - slashCount;
54
+ pathTokens += slashCount + contentLength / 3.5;
55
+ workingText = workingText.replace(match, "");
56
+ }
57
+ const punctuationMatches = workingText.match(/[!?,.:;'"()[\]{}@#$%^&*+=|\\<>~\-/]/g) || [];
58
+ const punctuationCount = punctuationMatches.length;
59
+ const proseNewlines = (workingText.match(/\n/g) || []).length;
60
+ const proseLength = Math.max(
61
+ 0,
62
+ workingText.length - proseNewlines - cjkCount - emojiCount - variationCount - punctuationCount
63
+ );
64
+ const proseTokens = proseLength / 3.5;
65
+ const proseNewlineTokens = proseNewlines * 1;
66
+ const punctuationBonus = punctuationCount * 0.8;
67
+ const cjkTokens = cjkCount * 1.2;
68
+ const emojiTokens = emojiCount * 2.5;
69
+ const variationTokens = variationCount * 1;
70
+ const estimate = proseTokens + proseNewlineTokens + codeBlockTokens + inlineCodeTokens + pathTokens + punctuationBonus + cjkTokens + emojiTokens + variationTokens;
71
+ return Math.ceil(estimate * 1.1);
72
+ };
73
+ var countWords = (text) => {
74
+ const trimmed = text.trim();
75
+ if (trimmed.length === 0) return 0;
76
+ return trimmed.split(/\s+/).length;
77
+ };
78
+ var freeEncoder = () => {
79
+ if (encoder !== null) {
80
+ encoder.free();
81
+ encoder = null;
82
+ }
83
+ };
84
+
85
+ // src/parser/parser.ts
86
+ import * as crypto from "crypto";
87
+ import { Effect as Effect2 } from "effect";
88
+ import matter from "gray-matter";
89
+ import remarkGfm from "remark-gfm";
90
+ import remarkParse from "remark-parse";
91
+ import { unified } from "unified";
92
+ import { visit } from "unist-util-visit";
93
+ var processor = unified().use(remarkParse).use(remarkGfm);
94
+ var generateId = (input) => {
95
+ return crypto.createHash("md5").update(input).digest("hex").slice(0, 12);
96
+ };
97
+ var slugify = (text) => {
98
+ return text.toLowerCase().replace(/[^\w\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").trim();
99
+ };
100
+ var isInternalLink = (href) => {
101
+ if (href.startsWith("http://") || href.startsWith("https://")) return false;
102
+ if (href.startsWith("mailto:")) return false;
103
+ if (href.startsWith("#")) return true;
104
+ if (href.endsWith(".md") || href.includes(".md#")) return true;
105
+ return !href.includes("://");
106
+ };
107
+ var extractPlainText = (node) => {
108
+ const texts = [];
109
+ visit(node, "text", (textNode) => {
110
+ texts.push(textNode.value);
111
+ });
112
+ return texts.join(" ");
113
+ };
114
+ var getNodeEndLine = (node) => {
115
+ return node?.position?.end?.line ?? 0;
116
+ };
117
+ var getNodeStartLine = (node) => {
118
+ return node?.position?.start?.line ?? 0;
119
+ };
120
+ var extractRawSections = (tree) => {
121
+ const sections = [];
122
+ const headings = [];
123
+ tree.children.forEach((node, index) => {
124
+ if (node.type === "heading") {
125
+ const heading = node;
126
+ headings.push({
127
+ heading: extractPlainText(heading),
128
+ level: heading.depth,
129
+ line: getNodeStartLine(node),
130
+ index
131
+ });
132
+ }
133
+ });
134
+ headings.forEach((h, i) => {
135
+ const nextHeading = headings[i + 1];
136
+ const endIndex = nextHeading ? nextHeading.index : tree.children.length;
137
+ const contentNodes = tree.children.slice(h.index + 1, endIndex);
138
+ const lastContentNode = contentNodes[contentNodes.length - 1];
139
+ const endLine = lastContentNode ? getNodeEndLine(lastContentNode) : h.line;
140
+ sections.push({
141
+ heading: h.heading,
142
+ level: h.level,
143
+ startLine: h.line,
144
+ endLine,
145
+ contentStartLine: h.line + 1,
146
+ contentNodes
147
+ });
148
+ });
149
+ return sections;
150
+ };
151
+ var buildSectionHierarchy = (rawSections, docId, lines) => {
152
+ const result = [];
153
+ const stack = [];
154
+ for (const raw of rawSections) {
155
+ const contentLines = lines.slice(raw.startLine - 1, raw.endLine);
156
+ const content = contentLines.join("\n");
157
+ const plainText = extractSectionPlainText(raw.contentNodes);
158
+ const hasCode = raw.contentNodes.some(
159
+ (n) => n.type === "code"
160
+ );
161
+ const hasList = raw.contentNodes.some(
162
+ (n) => n.type === "list"
163
+ );
164
+ const hasTable = raw.contentNodes.some(
165
+ (n) => n.type === "table"
166
+ );
167
+ const section = {
168
+ id: `${docId}-${slugify(raw.heading)}`,
169
+ heading: raw.heading,
170
+ level: raw.level,
171
+ content,
172
+ plainText,
173
+ startLine: raw.startLine,
174
+ endLine: raw.endLine,
175
+ children: [],
176
+ metadata: {
177
+ wordCount: countWords(plainText),
178
+ tokenCount: countTokensApprox(content),
179
+ hasCode,
180
+ hasList,
181
+ hasTable
182
+ }
183
+ };
184
+ while (stack.length > 0 && stack[stack.length - 1].level >= raw.level) {
185
+ stack.pop();
186
+ }
187
+ if (stack.length === 0) {
188
+ result.push(section);
189
+ } else {
190
+ const parent = stack[stack.length - 1];
191
+ parent.section.children.push(section);
192
+ }
193
+ stack.push({ section, level: raw.level });
194
+ }
195
+ return result;
196
+ };
197
+ var extractSectionPlainText = (nodes) => {
198
+ const texts = [];
199
+ for (const node of nodes) {
200
+ if ("value" in node && typeof node.value === "string") {
201
+ texts.push(node.value);
202
+ } else if ("children" in node) {
203
+ texts.push(extractPlainText(node));
204
+ }
205
+ }
206
+ return texts.join(" ");
207
+ };
208
+ var countAllSections = (sections) => {
209
+ let count = 0;
210
+ for (const section of sections) {
211
+ count += 1;
212
+ count += countAllSections(section.children);
213
+ }
214
+ return count;
215
+ };
216
+ var extractLinks = (tree, docId) => {
217
+ const links = [];
218
+ let currentSectionId = docId;
219
+ visit(tree, (node) => {
220
+ if (node.type === "heading") {
221
+ currentSectionId = `${docId}-${slugify(extractPlainText(node))}`;
222
+ }
223
+ if (node.type === "link") {
224
+ const link = node;
225
+ const internal = isInternalLink(link.url);
226
+ links.push({
227
+ type: internal ? "internal" : "external",
228
+ href: link.url,
229
+ text: extractPlainText(link),
230
+ sectionId: currentSectionId,
231
+ line: getNodeStartLine(node)
232
+ });
233
+ }
234
+ if (node.type === "image") {
235
+ const img = node;
236
+ links.push({
237
+ type: "image",
238
+ href: img.url,
239
+ text: img.alt ?? "",
240
+ sectionId: currentSectionId,
241
+ line: getNodeStartLine(node)
242
+ });
243
+ }
244
+ });
245
+ return links;
246
+ };
247
+ var extractCodeBlocks = (tree, docId) => {
248
+ const codeBlocks = [];
249
+ let currentSectionId = docId;
250
+ visit(tree, (node) => {
251
+ if (node.type === "heading") {
252
+ currentSectionId = `${docId}-${slugify(extractPlainText(node))}`;
253
+ }
254
+ if (node.type === "code") {
255
+ const code = node;
256
+ codeBlocks.push({
257
+ language: code.lang ?? null,
258
+ content: code.value,
259
+ sectionId: currentSectionId,
260
+ startLine: getNodeStartLine(node),
261
+ endLine: getNodeEndLine(node)
262
+ });
263
+ }
264
+ });
265
+ return codeBlocks;
266
+ };
267
+ var parse = (content, options = {}) => Effect2.gen(function* () {
268
+ const path3 = options.path ?? "unknown";
269
+ const docId = generateId(path3);
270
+ const now = /* @__PURE__ */ new Date();
271
+ let frontmatter = {};
272
+ let markdownContent = content;
273
+ try {
274
+ const parsed = matter(content);
275
+ frontmatter = parsed.data;
276
+ markdownContent = parsed.content;
277
+ } catch (error) {
278
+ const msg = error instanceof Error ? error.message : String(error);
279
+ console.warn(
280
+ `Warning: Malformed frontmatter in ${path3}, skipping: ${msg.split("\n")[0]}`
281
+ );
282
+ }
283
+ const tree = processor.parse(markdownContent);
284
+ const lines = markdownContent.split("\n");
285
+ const rawSections = extractRawSections(tree);
286
+ const sections = buildSectionHierarchy(rawSections, docId, lines);
287
+ const links = extractLinks(tree, docId);
288
+ const codeBlocks = extractCodeBlocks(tree, docId);
289
+ const firstH1 = sections.find((s) => s.level === 1);
290
+ const title = firstH1?.heading ?? (typeof frontmatter.title === "string" ? frontmatter.title : null) ?? path3.split("/").pop()?.replace(/\.md$/, "") ?? "Untitled";
291
+ const totalContent = sections.map((s) => s.content).join("\n");
292
+ const metadata = {
293
+ wordCount: countWords(totalContent),
294
+ tokenCount: countTokensApprox(content),
295
+ headingCount: countAllSections(sections),
296
+ linkCount: links.length,
297
+ codeBlockCount: codeBlocks.length,
298
+ lastModified: options.lastModified ?? now,
299
+ indexedAt: now
300
+ };
301
+ const document = {
302
+ id: docId,
303
+ path: path3,
304
+ title,
305
+ frontmatter,
306
+ sections,
307
+ links,
308
+ codeBlocks,
309
+ metadata
310
+ };
311
+ return document;
312
+ });
313
+ var parseFile = (filePath) => Effect2.gen(function* () {
314
+ const fs3 = yield* Effect2.promise(() => import("fs/promises"));
315
+ let content;
316
+ let stats;
317
+ try {
318
+ ;
319
+ [content, stats] = yield* Effect2.all([
320
+ Effect2.promise(() => fs3.readFile(filePath, "utf-8")),
321
+ Effect2.promise(() => fs3.stat(filePath))
322
+ ]);
323
+ } catch (error) {
324
+ return yield* Effect2.fail({
325
+ _tag: "IoError",
326
+ message: error instanceof Error ? error.message : "Unknown error",
327
+ path: filePath
328
+ });
329
+ }
330
+ return yield* parse(content, {
331
+ path: filePath,
332
+ lastModified: stats.mtime
333
+ });
334
+ });
335
+
336
+ // src/index/types.ts
337
+ var INDEX_DIR = ".mdcontext";
338
+ var INDEX_VERSION = 1;
339
+ var getIndexPaths = (rootPath) => ({
340
+ root: `${rootPath}/${INDEX_DIR}`,
341
+ config: `${rootPath}/${INDEX_DIR}/config.json`,
342
+ documents: `${rootPath}/${INDEX_DIR}/indexes/documents.json`,
343
+ sections: `${rootPath}/${INDEX_DIR}/indexes/sections.json`,
344
+ links: `${rootPath}/${INDEX_DIR}/indexes/links.json`,
345
+ cache: `${rootPath}/${INDEX_DIR}/cache`,
346
+ parsed: `${rootPath}/${INDEX_DIR}/cache/parsed`
347
+ });
348
+
349
+ // src/index/storage.ts
350
+ import * as crypto2 from "crypto";
351
+ import * as fs from "fs/promises";
352
+ import * as path from "path";
353
+ import { Effect as Effect3 } from "effect";
354
+ var ensureDir = (dirPath) => Effect3.tryPromise({
355
+ try: () => fs.mkdir(dirPath, { recursive: true }),
356
+ catch: (e) => new Error(`Failed to create directory ${dirPath}: ${e}`)
357
+ }).pipe(Effect3.map(() => void 0));
358
+ var readJsonFile = (filePath) => Effect3.tryPromise({
359
+ try: async () => {
360
+ try {
361
+ const content = await fs.readFile(filePath, "utf-8");
362
+ return JSON.parse(content);
363
+ } catch {
364
+ return null;
365
+ }
366
+ },
367
+ catch: (e) => new Error(`Failed to read ${filePath}: ${e}`)
368
+ });
369
+ var writeJsonFile = (filePath, data) => Effect3.gen(function* () {
370
+ const dir = path.dirname(filePath);
371
+ yield* ensureDir(dir);
372
+ yield* Effect3.tryPromise({
373
+ try: () => fs.writeFile(filePath, JSON.stringify(data, null, 2)),
374
+ catch: (e) => new Error(`Failed to write ${filePath}: ${e}`)
375
+ });
376
+ });
377
+ var computeHash = (content) => {
378
+ return crypto2.createHash("sha256").update(content).digest("hex").slice(0, 16);
379
+ };
380
+ var createStorage = (rootPath) => ({
381
+ rootPath: path.resolve(rootPath),
382
+ paths: getIndexPaths(path.resolve(rootPath))
383
+ });
384
+ var initializeIndex = (storage) => Effect3.gen(function* () {
385
+ yield* ensureDir(storage.paths.root);
386
+ yield* ensureDir(storage.paths.parsed);
387
+ yield* ensureDir(path.dirname(storage.paths.documents));
388
+ const existingConfig = yield* loadConfig(storage);
389
+ if (!existingConfig) {
390
+ const config = {
391
+ version: INDEX_VERSION,
392
+ rootPath: storage.rootPath,
393
+ include: ["**/*.md", "**/*.mdx"],
394
+ exclude: ["**/node_modules/**", "**/.*/**"],
395
+ createdAt: (/* @__PURE__ */ new Date()).toISOString(),
396
+ updatedAt: (/* @__PURE__ */ new Date()).toISOString()
397
+ };
398
+ yield* saveConfig(storage, config);
399
+ }
400
+ });
401
+ var loadConfig = (storage) => readJsonFile(storage.paths.config);
402
+ var saveConfig = (storage, config) => writeJsonFile(storage.paths.config, {
403
+ ...config,
404
+ updatedAt: (/* @__PURE__ */ new Date()).toISOString()
405
+ });
406
+ var loadDocumentIndex = (storage) => readJsonFile(storage.paths.documents);
407
+ var saveDocumentIndex = (storage, index) => writeJsonFile(storage.paths.documents, index);
408
+ var createEmptyDocumentIndex = (rootPath) => ({
409
+ version: INDEX_VERSION,
410
+ rootPath,
411
+ documents: {}
412
+ });
413
+ var loadSectionIndex = (storage) => readJsonFile(storage.paths.sections);
414
+ var saveSectionIndex = (storage, index) => writeJsonFile(storage.paths.sections, index);
415
+ var createEmptySectionIndex = () => ({
416
+ version: INDEX_VERSION,
417
+ sections: {},
418
+ byHeading: {},
419
+ byDocument: {}
420
+ });
421
+ var loadLinkIndex = (storage) => readJsonFile(storage.paths.links);
422
+ var saveLinkIndex = (storage, index) => writeJsonFile(storage.paths.links, index);
423
+ var createEmptyLinkIndex = () => ({
424
+ version: INDEX_VERSION,
425
+ forward: {},
426
+ backward: {},
427
+ broken: []
428
+ });
429
+ var indexExists = (storage) => Effect3.tryPromise({
430
+ try: async () => {
431
+ try {
432
+ await fs.access(storage.paths.config);
433
+ return true;
434
+ } catch {
435
+ return false;
436
+ }
437
+ },
438
+ catch: (e) => new Error(`Failed to check index existence: ${e}`)
439
+ });
440
+
441
+ // src/index/indexer.ts
442
+ import * as fs2 from "fs/promises";
443
+ import * as path2 from "path";
444
+ import { Effect as Effect4 } from "effect";
445
+ var isMarkdownFile = (filename) => filename.endsWith(".md") || filename.endsWith(".mdx");
446
+ var shouldExclude = (filePath, exclude) => {
447
+ const normalized = filePath.toLowerCase();
448
+ for (const pattern of exclude) {
449
+ if (pattern.includes("node_modules") && normalized.includes("node_modules")) {
450
+ return true;
451
+ }
452
+ if (pattern.startsWith("**/.*") && normalized.includes("/.")) {
453
+ return true;
454
+ }
455
+ }
456
+ return false;
457
+ };
458
+ var walkDirectory = async (dir, exclude) => {
459
+ const files = [];
460
+ const entries = await fs2.readdir(dir, { withFileTypes: true });
461
+ for (const entry of entries) {
462
+ const fullPath = path2.join(dir, entry.name);
463
+ if (entry.name.startsWith(".") || entry.name === "node_modules") {
464
+ continue;
465
+ }
466
+ if (shouldExclude(fullPath, exclude)) {
467
+ continue;
468
+ }
469
+ if (entry.isDirectory()) {
470
+ const subFiles = await walkDirectory(fullPath, exclude);
471
+ files.push(...subFiles);
472
+ } else if (entry.isFile() && isMarkdownFile(entry.name)) {
473
+ files.push(fullPath);
474
+ }
475
+ }
476
+ return files;
477
+ };
478
+ var flattenSections = (sections, docId, docPath) => {
479
+ const result = [];
480
+ const traverse = (section) => {
481
+ result.push({
482
+ id: section.id,
483
+ documentId: docId,
484
+ documentPath: docPath,
485
+ heading: section.heading,
486
+ level: section.level,
487
+ startLine: section.startLine,
488
+ endLine: section.endLine,
489
+ tokenCount: section.metadata.tokenCount,
490
+ hasCode: section.metadata.hasCode,
491
+ hasList: section.metadata.hasList,
492
+ hasTable: section.metadata.hasTable
493
+ });
494
+ for (const child of section.children) {
495
+ traverse(child);
496
+ }
497
+ };
498
+ for (const section of sections) {
499
+ traverse(section);
500
+ }
501
+ return result;
502
+ };
503
+ var resolveInternalLink = (href, fromPath, rootPath) => {
504
+ if (href.startsWith("#")) {
505
+ return fromPath;
506
+ }
507
+ if (href.startsWith("http://") || href.startsWith("https://")) {
508
+ return null;
509
+ }
510
+ const linkPath = href.split("#")[0] ?? "";
511
+ if (!linkPath) return null;
512
+ const fromDir = path2.dirname(fromPath);
513
+ const resolved = path2.resolve(fromDir, linkPath);
514
+ if (!resolved.startsWith(rootPath)) {
515
+ return null;
516
+ }
517
+ return path2.relative(rootPath, resolved);
518
+ };
519
+ var buildIndex = (rootPath, options = {}) => Effect4.gen(function* () {
520
+ const startTime = Date.now();
521
+ const storage = createStorage(rootPath);
522
+ const errors = [];
523
+ yield* initializeIndex(storage);
524
+ const existingDocIndex = yield* loadDocumentIndex(storage);
525
+ const docIndex = options.force || !existingDocIndex ? createEmptyDocumentIndex(storage.rootPath) : existingDocIndex;
526
+ const existingSectionIndex = yield* loadSectionIndex(storage);
527
+ const existingLinkIndex = yield* loadLinkIndex(storage);
528
+ const sectionIndex = existingSectionIndex ?? createEmptySectionIndex();
529
+ const linkIndex = existingLinkIndex ?? createEmptyLinkIndex();
530
+ const exclude = options.exclude ?? ["**/node_modules/**", "**/.*/**"];
531
+ const files = yield* Effect4.tryPromise({
532
+ try: () => walkDirectory(storage.rootPath, exclude),
533
+ catch: (e) => new Error(`Failed to walk directory: ${e}`)
534
+ });
535
+ let documentsIndexed = 0;
536
+ let sectionsIndexed = 0;
537
+ let linksIndexed = 0;
538
+ const mutableDocuments = {
539
+ ...docIndex.documents
540
+ };
541
+ const mutableSections = {
542
+ ...sectionIndex.sections
543
+ };
544
+ const mutableByHeading = Object.fromEntries(
545
+ Object.entries(sectionIndex.byHeading).map(([k, v]) => [k, [...v]])
546
+ );
547
+ const mutableByDocument = Object.fromEntries(
548
+ Object.entries(sectionIndex.byDocument).map(([k, v]) => [k, [...v]])
549
+ );
550
+ const mutableForward = Object.fromEntries(
551
+ Object.entries(linkIndex.forward).map(([k, v]) => [k, [...v]])
552
+ );
553
+ const mutableBackward = Object.fromEntries(
554
+ Object.entries(linkIndex.backward).map(([k, v]) => [k, [...v]])
555
+ );
556
+ const brokenLinks = [...linkIndex.broken];
557
+ for (const filePath of files) {
558
+ const relativePath = path2.relative(storage.rootPath, filePath);
559
+ const processFile = Effect4.gen(function* () {
560
+ const [content, stats] = yield* Effect4.promise(
561
+ () => Promise.all([fs2.readFile(filePath, "utf-8"), fs2.stat(filePath)])
562
+ );
563
+ const hash = computeHash(content);
564
+ const existingEntry = mutableDocuments[relativePath];
565
+ if (!options.force && existingEntry && existingEntry.hash === hash && existingEntry.mtime === stats.mtime.getTime()) {
566
+ return;
567
+ }
568
+ const doc = yield* parse(content, {
569
+ path: relativePath,
570
+ lastModified: stats.mtime
571
+ }).pipe(
572
+ Effect4.mapError(
573
+ (e) => new Error(`Parse error in ${relativePath}: ${e.message}`)
574
+ )
575
+ );
576
+ if (existingEntry) {
577
+ const oldSectionIds = mutableByDocument[existingEntry.id] ?? [];
578
+ for (const sectionId of oldSectionIds) {
579
+ const oldSection = mutableSections[sectionId];
580
+ if (oldSection) {
581
+ const headingKey = oldSection.heading.toLowerCase();
582
+ const headingList = mutableByHeading[headingKey];
583
+ if (headingList) {
584
+ const idx = headingList.indexOf(sectionId);
585
+ if (idx !== -1) headingList.splice(idx, 1);
586
+ }
587
+ }
588
+ delete mutableSections[sectionId];
589
+ }
590
+ delete mutableByDocument[existingEntry.id];
591
+ delete mutableForward[relativePath];
592
+ }
593
+ mutableDocuments[relativePath] = {
594
+ id: doc.id,
595
+ path: relativePath,
596
+ title: doc.title,
597
+ mtime: stats.mtime.getTime(),
598
+ hash,
599
+ tokenCount: doc.metadata.tokenCount,
600
+ sectionCount: doc.metadata.headingCount
601
+ };
602
+ documentsIndexed++;
603
+ const sections = flattenSections(doc.sections, doc.id, relativePath);
604
+ mutableByDocument[doc.id] = [];
605
+ for (const section of sections) {
606
+ mutableSections[section.id] = section;
607
+ mutableByDocument[doc.id]?.push(section.id);
608
+ const headingKey = section.heading.toLowerCase();
609
+ if (!mutableByHeading[headingKey]) {
610
+ mutableByHeading[headingKey] = [];
611
+ }
612
+ mutableByHeading[headingKey]?.push(section.id);
613
+ sectionsIndexed++;
614
+ }
615
+ const internalLinks = doc.links.filter((l) => l.type === "internal");
616
+ const outgoingLinks = [];
617
+ for (const link of internalLinks) {
618
+ const target = resolveInternalLink(
619
+ link.href,
620
+ filePath,
621
+ storage.rootPath
622
+ );
623
+ if (target) {
624
+ outgoingLinks.push(target);
625
+ if (!mutableBackward[target]) {
626
+ mutableBackward[target] = [];
627
+ }
628
+ if (!mutableBackward[target]?.includes(relativePath)) {
629
+ mutableBackward[target]?.push(relativePath);
630
+ }
631
+ linksIndexed++;
632
+ }
633
+ }
634
+ mutableForward[relativePath] = outgoingLinks;
635
+ }).pipe(
636
+ Effect4.catchAll((error) => {
637
+ errors.push({
638
+ path: relativePath,
639
+ message: error instanceof Error ? error.message : String(error)
640
+ });
641
+ return Effect4.void;
642
+ })
643
+ );
644
+ yield* processFile;
645
+ }
646
+ for (const [_from, targets] of Object.entries(mutableForward)) {
647
+ for (const target of targets) {
648
+ if (!mutableDocuments[target] && !brokenLinks.includes(target)) {
649
+ brokenLinks.push(target);
650
+ }
651
+ }
652
+ }
653
+ yield* saveDocumentIndex(storage, {
654
+ version: docIndex.version,
655
+ rootPath: storage.rootPath,
656
+ documents: mutableDocuments
657
+ });
658
+ yield* saveSectionIndex(storage, {
659
+ version: sectionIndex.version,
660
+ sections: mutableSections,
661
+ byHeading: mutableByHeading,
662
+ byDocument: mutableByDocument
663
+ });
664
+ yield* saveLinkIndex(storage, {
665
+ version: linkIndex.version,
666
+ forward: mutableForward,
667
+ backward: mutableBackward,
668
+ broken: brokenLinks
669
+ });
670
+ const duration = Date.now() - startTime;
671
+ const totalLinks = Object.values(mutableForward).reduce(
672
+ (sum, links) => sum + links.length,
673
+ 0
674
+ );
675
+ return {
676
+ documentsIndexed,
677
+ sectionsIndexed,
678
+ linksIndexed,
679
+ totalDocuments: Object.keys(mutableDocuments).length,
680
+ totalSections: Object.keys(mutableSections).length,
681
+ totalLinks,
682
+ duration,
683
+ errors
684
+ };
685
+ });
686
+ var getOutgoingLinks = (rootPath, filePath) => Effect4.gen(function* () {
687
+ const storage = createStorage(rootPath);
688
+ const linkIndex = yield* loadLinkIndex(storage);
689
+ if (!linkIndex) {
690
+ return [];
691
+ }
692
+ const relativePath = path2.relative(storage.rootPath, path2.resolve(filePath));
693
+ return linkIndex.forward[relativePath] ?? [];
694
+ });
695
+ var getIncomingLinks = (rootPath, filePath) => Effect4.gen(function* () {
696
+ const storage = createStorage(rootPath);
697
+ const linkIndex = yield* loadLinkIndex(storage);
698
+ if (!linkIndex) {
699
+ return [];
700
+ }
701
+ const relativePath = path2.relative(storage.rootPath, path2.resolve(filePath));
702
+ return linkIndex.backward[relativePath] ?? [];
703
+ });
704
+ var getBrokenLinks = (rootPath) => Effect4.gen(function* () {
705
+ const storage = createStorage(rootPath);
706
+ const linkIndex = yield* loadLinkIndex(storage);
707
+ if (!linkIndex) {
708
+ return [];
709
+ }
710
+ return linkIndex.broken;
711
+ });
712
+
713
+ export {
714
+ countTokens,
715
+ countTokensApprox,
716
+ countWords,
717
+ freeEncoder,
718
+ parse,
719
+ parseFile,
720
+ INDEX_DIR,
721
+ INDEX_VERSION,
722
+ getIndexPaths,
723
+ computeHash,
724
+ createStorage,
725
+ initializeIndex,
726
+ loadConfig,
727
+ saveConfig,
728
+ loadDocumentIndex,
729
+ saveDocumentIndex,
730
+ createEmptyDocumentIndex,
731
+ loadSectionIndex,
732
+ saveSectionIndex,
733
+ createEmptySectionIndex,
734
+ loadLinkIndex,
735
+ saveLinkIndex,
736
+ createEmptyLinkIndex,
737
+ indexExists,
738
+ buildIndex,
739
+ getOutgoingLinks,
740
+ getIncomingLinks,
741
+ getBrokenLinks
742
+ };