mdcontext 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. package/.changeset/config.json +9 -9
  2. package/.claude/settings.local.json +25 -0
  3. package/.github/workflows/claude-code-review.yml +44 -0
  4. package/.github/workflows/claude.yml +85 -0
  5. package/CONTRIBUTING.md +186 -0
  6. package/NOTES/NOTES +44 -0
  7. package/README.md +206 -3
  8. package/biome.json +1 -1
  9. package/dist/chunk-23UPXDNL.js +3044 -0
  10. package/dist/chunk-2W7MO2DL.js +1366 -0
  11. package/dist/chunk-3NUAZGMA.js +1689 -0
  12. package/dist/chunk-7TOWB2XB.js +366 -0
  13. package/dist/chunk-7XOTOADQ.js +3065 -0
  14. package/dist/chunk-AH2PDM2K.js +3042 -0
  15. package/dist/chunk-BNXWSZ63.js +3742 -0
  16. package/dist/chunk-BTL5DJVU.js +3222 -0
  17. package/dist/chunk-HDHYG7E4.js +104 -0
  18. package/dist/chunk-HLR4KZBP.js +3234 -0
  19. package/dist/chunk-IP3FRFEB.js +1045 -0
  20. package/dist/chunk-KHU56VDO.js +3042 -0
  21. package/dist/chunk-KRYIFLQR.js +85 -89
  22. package/dist/chunk-LBSDNLEM.js +287 -0
  23. package/dist/chunk-MNTQ7HCP.js +2643 -0
  24. package/dist/chunk-MUJELQQ6.js +1387 -0
  25. package/dist/chunk-MXJGMSLV.js +2199 -0
  26. package/dist/chunk-N6QJGC3Z.js +2636 -0
  27. package/dist/chunk-OBELGBPM.js +1713 -0
  28. package/dist/chunk-OT7R5XTA.js +3192 -0
  29. package/dist/chunk-P7X4RA2T.js +106 -0
  30. package/dist/chunk-PIDUQNC2.js +3185 -0
  31. package/dist/chunk-POGCDIH4.js +3187 -0
  32. package/dist/chunk-PSIEOQGZ.js +3043 -0
  33. package/dist/chunk-PVRT3IHA.js +3238 -0
  34. package/dist/chunk-QNN4TT23.js +1430 -0
  35. package/dist/chunk-RE3R45RJ.js +3042 -0
  36. package/dist/chunk-S7E6TFX6.js +718 -657
  37. package/dist/chunk-SG6GLU4U.js +1378 -0
  38. package/dist/chunk-SJCDV2ST.js +274 -0
  39. package/dist/chunk-SYE5XLF3.js +104 -0
  40. package/dist/chunk-T5VLYBZD.js +103 -0
  41. package/dist/chunk-TOQB7VWU.js +3238 -0
  42. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  43. package/dist/chunk-VVTGZNBT.js +1533 -1423
  44. package/dist/chunk-W7Q4RFEV.js +104 -0
  45. package/dist/chunk-XTYYVRLO.js +3190 -0
  46. package/dist/chunk-Y6MDYVJD.js +3063 -0
  47. package/dist/cli/main.js +4072 -629
  48. package/dist/index.d.ts +420 -33
  49. package/dist/index.js +8 -15
  50. package/dist/mcp/server.js +103 -7
  51. package/dist/schema-BAWSG7KY.js +22 -0
  52. package/dist/schema-E3QUPL26.js +20 -0
  53. package/dist/schema-EHL7WUT6.js +20 -0
  54. package/docs/019-USAGE.md +44 -5
  55. package/docs/020-current-implementation.md +8 -8
  56. package/docs/021-DOGFOODING-FINDINGS.md +1 -1
  57. package/docs/CONFIG.md +1123 -0
  58. package/docs/ERRORS.md +383 -0
  59. package/docs/summarization.md +320 -0
  60. package/justfile +40 -0
  61. package/package.json +39 -33
  62. package/research/INDEX.md +315 -0
  63. package/research/code-review/README.md +90 -0
  64. package/research/code-review/cli-error-handling-review.md +979 -0
  65. package/research/code-review/code-review-validation-report.md +464 -0
  66. package/research/code-review/main-ts-review.md +1128 -0
  67. package/research/config-docs/SUMMARY.md +357 -0
  68. package/research/config-docs/TEST-RESULTS.md +776 -0
  69. package/research/config-docs/TODO.md +542 -0
  70. package/research/config-docs/analysis.md +744 -0
  71. package/research/config-docs/fix-validation.md +502 -0
  72. package/research/config-docs/help-audit.md +264 -0
  73. package/research/config-docs/help-system-analysis.md +890 -0
  74. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  75. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  76. package/research/issue-review.md +603 -0
  77. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  78. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  79. package/research/llm-summarization/anthropic-2026.md +367 -0
  80. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  81. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  82. package/research/llm-summarization/openai-2026.md +473 -0
  83. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  84. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  85. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  86. package/research/llm-summarization/prototype-results.md +56 -0
  87. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  88. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  89. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  90. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  91. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  92. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  93. package/research/mdcontext-pudding/02-search.md +970 -0
  94. package/research/mdcontext-pudding/03-context.md +779 -0
  95. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  96. package/research/mdcontext-pudding/04-tree.md +704 -0
  97. package/research/mdcontext-pudding/05-config.md +1038 -0
  98. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  99. package/research/mdcontext-pudding/06-links.md +679 -0
  100. package/research/mdcontext-pudding/07-stats.md +693 -0
  101. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  102. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  103. package/research/mdcontext-pudding/README.md +168 -0
  104. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  105. package/research/research-quality-review.md +834 -0
  106. package/research/semantic-search/embedding-text-analysis.md +156 -0
  107. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  108. package/research/semantic-search/query-processing-analysis.md +207 -0
  109. package/research/semantic-search/root-cause-and-solution.md +114 -0
  110. package/research/semantic-search/threshold-validation-report.md +69 -0
  111. package/research/semantic-search/vector-search-analysis.md +63 -0
  112. package/research/test-path-issues.md +276 -0
  113. package/review/ALP-76/1-error-type-design.md +962 -0
  114. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  115. package/review/ALP-76/3-error-presentation.md +624 -0
  116. package/review/ALP-76/4-test-coverage.md +625 -0
  117. package/review/ALP-76/5-migration-completeness.md +440 -0
  118. package/review/ALP-76/6-effect-best-practices.md +755 -0
  119. package/scripts/apply-branch-protection.sh +47 -0
  120. package/scripts/branch-protection-templates.json +79 -0
  121. package/scripts/prototype-summarization.ts +346 -0
  122. package/scripts/rebuild-hnswlib.js +32 -37
  123. package/scripts/setup-branch-protection.sh +64 -0
  124. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  125. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  126. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  127. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  128. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  129. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  130. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  131. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  132. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  133. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  134. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  135. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  136. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  137. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  138. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  139. package/src/cli/argv-preprocessor.test.ts +2 -2
  140. package/src/cli/cli.test.ts +230 -33
  141. package/src/cli/commands/config-cmd.ts +642 -0
  142. package/src/cli/commands/context.ts +97 -9
  143. package/src/cli/commands/duplicates.ts +122 -0
  144. package/src/cli/commands/embeddings.ts +529 -0
  145. package/src/cli/commands/index-cmd.ts +210 -30
  146. package/src/cli/commands/index.ts +3 -0
  147. package/src/cli/commands/search.ts +894 -64
  148. package/src/cli/commands/stats.ts +3 -0
  149. package/src/cli/commands/tree.ts +26 -5
  150. package/src/cli/config-layer.ts +176 -0
  151. package/src/cli/error-handler.test.ts +235 -0
  152. package/src/cli/error-handler.ts +655 -0
  153. package/src/cli/flag-schemas.ts +66 -0
  154. package/src/cli/help.ts +209 -7
  155. package/src/cli/main.ts +348 -58
  156. package/src/cli/options.ts +10 -0
  157. package/src/cli/shared-error-handling.ts +199 -0
  158. package/src/cli/utils.ts +150 -17
  159. package/src/config/file-provider.test.ts +320 -0
  160. package/src/config/file-provider.ts +273 -0
  161. package/src/config/index.ts +72 -0
  162. package/src/config/integration.test.ts +667 -0
  163. package/src/config/precedence.test.ts +277 -0
  164. package/src/config/precedence.ts +451 -0
  165. package/src/config/schema.test.ts +414 -0
  166. package/src/config/schema.ts +603 -0
  167. package/src/config/service.test.ts +320 -0
  168. package/src/config/service.ts +243 -0
  169. package/src/config/testing.test.ts +264 -0
  170. package/src/config/testing.ts +110 -0
  171. package/src/core/types.ts +6 -33
  172. package/src/duplicates/detector.test.ts +183 -0
  173. package/src/duplicates/detector.ts +414 -0
  174. package/src/duplicates/index.ts +18 -0
  175. package/src/embeddings/embedding-namespace.test.ts +300 -0
  176. package/src/embeddings/embedding-namespace.ts +947 -0
  177. package/src/embeddings/heading-boost.test.ts +222 -0
  178. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  179. package/src/embeddings/hyde.test.ts +272 -0
  180. package/src/embeddings/hyde.ts +264 -0
  181. package/src/embeddings/index.ts +2 -0
  182. package/src/embeddings/openai-provider.ts +332 -83
  183. package/src/embeddings/pricing.json +22 -0
  184. package/src/embeddings/provider-constants.ts +204 -0
  185. package/src/embeddings/provider-errors.test.ts +967 -0
  186. package/src/embeddings/provider-errors.ts +565 -0
  187. package/src/embeddings/provider-factory.test.ts +240 -0
  188. package/src/embeddings/provider-factory.ts +225 -0
  189. package/src/embeddings/provider-integration.test.ts +788 -0
  190. package/src/embeddings/query-preprocessing.test.ts +187 -0
  191. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  192. package/src/embeddings/semantic-search.ts +780 -93
  193. package/src/embeddings/types.ts +293 -16
  194. package/src/embeddings/vector-store.ts +486 -77
  195. package/src/embeddings/voyage-provider.ts +313 -0
  196. package/src/errors/errors.test.ts +845 -0
  197. package/src/errors/index.ts +533 -0
  198. package/src/index/ignore-patterns.test.ts +354 -0
  199. package/src/index/ignore-patterns.ts +305 -0
  200. package/src/index/indexer.ts +286 -48
  201. package/src/index/storage.ts +94 -30
  202. package/src/index/types.ts +40 -2
  203. package/src/index/watcher.ts +67 -9
  204. package/src/index.ts +22 -0
  205. package/src/integration/search-keyword.test.ts +678 -0
  206. package/src/mcp/server.ts +135 -6
  207. package/src/parser/parser.ts +18 -19
  208. package/src/parser/section-filter.test.ts +277 -0
  209. package/src/parser/section-filter.ts +125 -3
  210. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  211. package/src/search/bm25-store.ts +366 -0
  212. package/src/search/cross-encoder.test.ts +253 -0
  213. package/src/search/cross-encoder.ts +406 -0
  214. package/src/search/fuzzy-search.test.ts +419 -0
  215. package/src/search/fuzzy-search.ts +273 -0
  216. package/src/search/hybrid-search.ts +448 -0
  217. package/src/search/path-matcher.test.ts +276 -0
  218. package/src/search/path-matcher.ts +33 -0
  219. package/src/search/searcher.test.ts +99 -1
  220. package/src/search/searcher.ts +189 -67
  221. package/src/search/wink-bm25.d.ts +30 -0
  222. package/src/summarization/cli-providers/claude.ts +202 -0
  223. package/src/summarization/cli-providers/detection.test.ts +273 -0
  224. package/src/summarization/cli-providers/detection.ts +118 -0
  225. package/src/summarization/cli-providers/index.ts +8 -0
  226. package/src/summarization/cost.test.ts +139 -0
  227. package/src/summarization/cost.ts +102 -0
  228. package/src/summarization/error-handler.test.ts +127 -0
  229. package/src/summarization/error-handler.ts +111 -0
  230. package/src/summarization/index.ts +102 -0
  231. package/src/summarization/pipeline.test.ts +498 -0
  232. package/src/summarization/pipeline.ts +231 -0
  233. package/src/summarization/prompts.test.ts +269 -0
  234. package/src/summarization/prompts.ts +133 -0
  235. package/src/summarization/provider-factory.test.ts +396 -0
  236. package/src/summarization/provider-factory.ts +178 -0
  237. package/src/summarization/types.ts +184 -0
  238. package/src/summarize/summarizer.ts +104 -35
  239. package/src/types/huggingface-transformers.d.ts +66 -0
  240. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  241. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  242. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  243. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
  244. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
  245. package/tests/integration/embed-index.test.ts +712 -0
  246. package/tests/integration/search-context.test.ts +469 -0
  247. package/tests/integration/search-semantic.test.ts +522 -0
  248. package/vitest.config.ts +1 -6
  249. package/AGENTS.md +0 -46
  250. package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
  251. package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
@@ -0,0 +1,1045 @@
1
+ // src/utils/tokens.ts
2
+ import { Effect } from "effect";
3
+
4
+ var encoder = null;
5
+ var getEncoder = Effect.gen(function* () {
6
+ if (encoder === null) {
7
+ const { get_encoding } = yield* Effect.promise(() => import("tiktoken"));
8
+ encoder = get_encoding("cl100k_base");
9
+ }
10
+ return encoder;
11
+ });
12
+ var countTokens = (text) =>
13
+ Effect.gen(function* () {
14
+ const enc = yield* getEncoder;
15
+ const tokens = enc.encode(text);
16
+ return tokens.length;
17
+ });
18
+ var countTokensApprox = (text) => {
19
+ if (text.length === 0) return 0;
20
+ const cjkPattern =
21
+ /[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af\u3400-\u4dbf]/g;
22
+ const cjkMatches = text.match(cjkPattern) || [];
23
+ const cjkCount = cjkMatches.length;
24
+ const emojiPattern =
25
+ /[\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1F600}-\u{1F64F}\u{1F680}-\u{1F6FF}\u{2300}-\u{23FF}\u{2190}-\u{21FF}\u{25A0}-\u{25FF}\u{2B00}-\u{2BFF}]/gu;
26
+ const emojiMatches = text.match(emojiPattern) || [];
27
+ const emojiCount = emojiMatches.length;
28
+ const variationSelectorPattern = /[\uFE0E\uFE0F]/g;
29
+ const variationMatches = text.match(variationSelectorPattern) || [];
30
+ const variationCount = variationMatches.length;
31
+ let workingText = text;
32
+ const codeBlockMatches = text.match(/```[\s\S]*?```/g) || [];
33
+ let codeBlockTokens = 0;
34
+ for (const block of codeBlockMatches) {
35
+ const hasLang = /^```\w+/.test(block);
36
+ const overhead = hasLang ? 6 : 4;
37
+ const content = block.replace(/^```\w*\n?/, "").replace(/\n?```$/, "");
38
+ const contentNewlines = (content.match(/\n/g) || []).length;
39
+ const contentTokens = content.length > 0 ? content.length / 2.5 : 0;
40
+ codeBlockTokens += Math.max(
41
+ overhead,
42
+ overhead + contentNewlines + contentTokens,
43
+ );
44
+ workingText = workingText.replace(block, "");
45
+ }
46
+ const inlineCodeMatches = workingText.match(/`[^`]+`/g) || [];
47
+ let inlineCodeTokens = 0;
48
+ for (const match of inlineCodeMatches) {
49
+ const content = match.slice(1, -1);
50
+ inlineCodeTokens += 2 + content.length / 2.5;
51
+ workingText = workingText.replace(match, "");
52
+ }
53
+ const pathMatches = workingText.match(/(?:\/[\w.-]+)+/g) || [];
54
+ let pathTokens = 0;
55
+ for (const match of pathMatches) {
56
+ const slashCount = (match.match(/\//g) || []).length;
57
+ const contentLength = match.length - slashCount;
58
+ pathTokens += slashCount + contentLength / 3.5;
59
+ workingText = workingText.replace(match, "");
60
+ }
61
+ const punctuationMatches =
62
+ workingText.match(/[!?,.:;'"()[\]{}@#$%^&*+=|\\<>~\-/]/g) || [];
63
+ const punctuationCount = punctuationMatches.length;
64
+ const proseNewlines = (workingText.match(/\n/g) || []).length;
65
+ const proseLength = Math.max(
66
+ 0,
67
+ workingText.length -
68
+ proseNewlines -
69
+ cjkCount -
70
+ emojiCount -
71
+ variationCount -
72
+ punctuationCount,
73
+ );
74
+ const proseTokens = proseLength / 3.5;
75
+ const proseNewlineTokens = proseNewlines * 1;
76
+ const punctuationBonus = punctuationCount * 0.8;
77
+ const cjkTokens = cjkCount * 1.2;
78
+ const emojiTokens = emojiCount * 2.5;
79
+ const variationTokens = variationCount * 1;
80
+ const estimate =
81
+ proseTokens +
82
+ proseNewlineTokens +
83
+ codeBlockTokens +
84
+ inlineCodeTokens +
85
+ pathTokens +
86
+ punctuationBonus +
87
+ cjkTokens +
88
+ emojiTokens +
89
+ variationTokens;
90
+ return Math.ceil(estimate * 1.1);
91
+ };
92
+ var countWords = (text) => {
93
+ const trimmed = text.trim();
94
+ if (trimmed.length === 0) return 0;
95
+ return trimmed.split(/\s+/).length;
96
+ };
97
+ var freeEncoder = () => {
98
+ if (encoder !== null) {
99
+ encoder.free();
100
+ encoder = null;
101
+ }
102
+ };
103
+
104
+ // src/parser/parser.ts
105
+ import * as crypto from "crypto";
106
+ // src/errors/index.ts
107
+ import { Data, Effect as Effect2 } from "effect";
108
+ import matter from "gray-matter";
109
+ import remarkGfm from "remark-gfm";
110
+ import remarkParse from "remark-parse";
111
+ import { unified } from "unified";
112
+ import { visit } from "unist-util-visit";
113
+
114
+ var ErrorCode = {
115
+ // File system errors (E1xx)
116
+ FILE_READ: "E100",
117
+ FILE_WRITE: "E101",
118
+ DIRECTORY_CREATE: "E102",
119
+ DIRECTORY_WALK: "E103",
120
+ // Parse errors (E2xx)
121
+ PARSE: "E200",
122
+ // API/authentication errors (E3xx)
123
+ API_KEY_MISSING: "E300",
124
+ API_KEY_INVALID: "E301",
125
+ EMBEDDING_RATE_LIMIT: "E310",
126
+ EMBEDDING_QUOTA: "E311",
127
+ EMBEDDING_NETWORK: "E312",
128
+ EMBEDDING_MODEL: "E313",
129
+ EMBEDDING_UNKNOWN: "E319",
130
+ // Index errors (E4xx)
131
+ INDEX_NOT_FOUND: "E400",
132
+ INDEX_CORRUPTED: "E401",
133
+ INDEX_BUILD: "E402",
134
+ // Search errors (E5xx)
135
+ DOCUMENT_NOT_FOUND: "E500",
136
+ EMBEDDINGS_NOT_FOUND: "E501",
137
+ // Vector store errors (E6xx)
138
+ VECTOR_STORE: "E600",
139
+ // Config errors (E7xx)
140
+ CONFIG: "E700",
141
+ // Watch errors (E8xx)
142
+ WATCH: "E800",
143
+ // CLI errors (E9xx)
144
+ CLI_VALIDATION: "E900",
145
+ };
146
+ var FileReadError = class extends Data.TaggedError("FileReadError") {
147
+ get code() {
148
+ return ErrorCode.FILE_READ;
149
+ }
150
+ };
151
+ var FileWriteError = class extends Data.TaggedError("FileWriteError") {
152
+ get code() {
153
+ return ErrorCode.FILE_WRITE;
154
+ }
155
+ };
156
+ var DirectoryCreateError = class extends Data.TaggedError(
157
+ "DirectoryCreateError",
158
+ ) {
159
+ get code() {
160
+ return ErrorCode.DIRECTORY_CREATE;
161
+ }
162
+ };
163
+ var DirectoryWalkError = class extends Data.TaggedError("DirectoryWalkError") {
164
+ get code() {
165
+ return ErrorCode.DIRECTORY_WALK;
166
+ }
167
+ };
168
+ var ParseError = class extends Data.TaggedError("ParseError") {
169
+ get code() {
170
+ return ErrorCode.PARSE;
171
+ }
172
+ };
173
+ var ApiKeyMissingError = class extends Data.TaggedError("ApiKeyMissingError") {
174
+ get code() {
175
+ return ErrorCode.API_KEY_MISSING;
176
+ }
177
+ get message() {
178
+ return `${this.envVar} not set`;
179
+ }
180
+ };
181
+ var ApiKeyInvalidError = class extends Data.TaggedError("ApiKeyInvalidError") {
182
+ get code() {
183
+ return ErrorCode.API_KEY_INVALID;
184
+ }
185
+ get message() {
186
+ return this.details ?? `Invalid API key for ${this.provider}`;
187
+ }
188
+ };
189
+ var EmbeddingError = class extends Data.TaggedError("EmbeddingError") {
190
+ get code() {
191
+ switch (this.reason) {
192
+ case "RateLimit":
193
+ return ErrorCode.EMBEDDING_RATE_LIMIT;
194
+ case "QuotaExceeded":
195
+ return ErrorCode.EMBEDDING_QUOTA;
196
+ case "Network":
197
+ return ErrorCode.EMBEDDING_NETWORK;
198
+ case "ModelError":
199
+ return ErrorCode.EMBEDDING_MODEL;
200
+ default:
201
+ return ErrorCode.EMBEDDING_UNKNOWN;
202
+ }
203
+ }
204
+ };
205
+ var IndexNotFoundError = class extends Data.TaggedError("IndexNotFoundError") {
206
+ get code() {
207
+ return ErrorCode.INDEX_NOT_FOUND;
208
+ }
209
+ get message() {
210
+ return `Index not found at ${this.path}`;
211
+ }
212
+ };
213
+ var IndexCorruptedError = class extends Data.TaggedError(
214
+ "IndexCorruptedError",
215
+ ) {
216
+ get code() {
217
+ return ErrorCode.INDEX_CORRUPTED;
218
+ }
219
+ get message() {
220
+ return `Index corrupted at ${this.path}: ${this.reason}`;
221
+ }
222
+ };
223
+ var IndexBuildError = class extends Data.TaggedError("IndexBuildError") {
224
+ get code() {
225
+ return ErrorCode.INDEX_BUILD;
226
+ }
227
+ };
228
+ var DocumentNotFoundError = class extends Data.TaggedError(
229
+ "DocumentNotFoundError",
230
+ ) {
231
+ get code() {
232
+ return ErrorCode.DOCUMENT_NOT_FOUND;
233
+ }
234
+ get message() {
235
+ return `Document not found in index: ${this.path}`;
236
+ }
237
+ };
238
+ var ConfigError = class extends Data.TaggedError("ConfigError") {
239
+ get code() {
240
+ return ErrorCode.CONFIG;
241
+ }
242
+ };
243
+ var VectorStoreError = class extends Data.TaggedError("VectorStoreError") {
244
+ get code() {
245
+ return ErrorCode.VECTOR_STORE;
246
+ }
247
+ };
248
+ var EmbeddingsNotFoundError = class extends Data.TaggedError(
249
+ "EmbeddingsNotFoundError",
250
+ ) {
251
+ get code() {
252
+ return ErrorCode.EMBEDDINGS_NOT_FOUND;
253
+ }
254
+ get message() {
255
+ return `Embeddings not found at ${this.path}. Run 'mdcontext index --embed' first.`;
256
+ }
257
+ };
258
+ var WatchError = class extends Data.TaggedError("WatchError") {
259
+ get code() {
260
+ return ErrorCode.WATCH;
261
+ }
262
+ };
263
+ var CliValidationError = class extends Data.TaggedError("CliValidationError") {
264
+ get code() {
265
+ return ErrorCode.CLI_VALIDATION;
266
+ }
267
+ };
268
+
269
+ // src/parser/parser.ts
270
+ var processor = unified().use(remarkParse).use(remarkGfm);
271
+ var generateId = (input) => {
272
+ return crypto.createHash("md5").update(input).digest("hex").slice(0, 12);
273
+ };
274
+ var slugify = (text) => {
275
+ return text
276
+ .toLowerCase()
277
+ .replace(/[^\w\s-]/g, "")
278
+ .replace(/\s+/g, "-")
279
+ .replace(/-+/g, "-")
280
+ .trim();
281
+ };
282
+ var isInternalLink = (href) => {
283
+ if (href.startsWith("http://") || href.startsWith("https://")) return false;
284
+ if (href.startsWith("mailto:")) return false;
285
+ if (href.startsWith("#")) return true;
286
+ if (href.endsWith(".md") || href.includes(".md#")) return true;
287
+ return !href.includes("://");
288
+ };
289
+ var extractPlainText = (node) => {
290
+ const texts = [];
291
+ visit(node, "text", (textNode) => {
292
+ texts.push(textNode.value);
293
+ });
294
+ return texts.join(" ");
295
+ };
296
+ var getNodeEndLine = (node) => {
297
+ return node?.position?.end?.line ?? 0;
298
+ };
299
+ var getNodeStartLine = (node) => {
300
+ return node?.position?.start?.line ?? 0;
301
+ };
302
+ var extractRawSections = (tree) => {
303
+ const sections = [];
304
+ const headings = [];
305
+ tree.children.forEach((node, index) => {
306
+ if (node.type === "heading") {
307
+ const heading = node;
308
+ headings.push({
309
+ heading: extractPlainText(heading),
310
+ level: heading.depth,
311
+ line: getNodeStartLine(node),
312
+ index,
313
+ });
314
+ }
315
+ });
316
+ headings.forEach((h, i) => {
317
+ const nextHeading = headings[i + 1];
318
+ const endIndex = nextHeading ? nextHeading.index : tree.children.length;
319
+ const contentNodes = tree.children.slice(h.index + 1, endIndex);
320
+ const lastContentNode = contentNodes[contentNodes.length - 1];
321
+ const endLine = lastContentNode ? getNodeEndLine(lastContentNode) : h.line;
322
+ sections.push({
323
+ heading: h.heading,
324
+ level: h.level,
325
+ startLine: h.line,
326
+ endLine,
327
+ contentStartLine: h.line + 1,
328
+ contentNodes,
329
+ });
330
+ });
331
+ return sections;
332
+ };
333
+ var buildSectionHierarchy = (rawSections, docId, lines) => {
334
+ const result = [];
335
+ const stack = [];
336
+ for (const raw of rawSections) {
337
+ const contentLines = lines.slice(raw.startLine - 1, raw.endLine);
338
+ const content = contentLines.join("\n");
339
+ const plainText = extractSectionPlainText(raw.contentNodes);
340
+ const hasCode = raw.contentNodes.some((n) => n.type === "code");
341
+ const hasList = raw.contentNodes.some((n) => n.type === "list");
342
+ const hasTable = raw.contentNodes.some((n) => n.type === "table");
343
+ const section = {
344
+ id: `${docId}-${slugify(raw.heading)}`,
345
+ heading: raw.heading,
346
+ level: raw.level,
347
+ content,
348
+ plainText,
349
+ startLine: raw.startLine,
350
+ endLine: raw.endLine,
351
+ children: [],
352
+ metadata: {
353
+ wordCount: countWords(plainText),
354
+ tokenCount: countTokensApprox(content),
355
+ hasCode,
356
+ hasList,
357
+ hasTable,
358
+ },
359
+ };
360
+ while (stack.length > 0 && stack[stack.length - 1].level >= raw.level) {
361
+ stack.pop();
362
+ }
363
+ if (stack.length === 0) {
364
+ result.push(section);
365
+ } else {
366
+ const parent = stack[stack.length - 1];
367
+ parent.section.children.push(section);
368
+ }
369
+ stack.push({ section, level: raw.level });
370
+ }
371
+ return result;
372
+ };
373
+ var extractSectionPlainText = (nodes) => {
374
+ const texts = [];
375
+ for (const node of nodes) {
376
+ if ("value" in node && typeof node.value === "string") {
377
+ texts.push(node.value);
378
+ } else if ("children" in node) {
379
+ texts.push(extractPlainText(node));
380
+ }
381
+ }
382
+ return texts.join(" ");
383
+ };
384
+ var countAllSections = (sections) => {
385
+ let count = 0;
386
+ for (const section of sections) {
387
+ count += 1;
388
+ count += countAllSections(section.children);
389
+ }
390
+ return count;
391
+ };
392
+ var extractLinks = (tree, docId) => {
393
+ const links = [];
394
+ let currentSectionId = docId;
395
+ visit(tree, (node) => {
396
+ if (node.type === "heading") {
397
+ currentSectionId = `${docId}-${slugify(extractPlainText(node))}`;
398
+ }
399
+ if (node.type === "link") {
400
+ const link = node;
401
+ const internal = isInternalLink(link.url);
402
+ links.push({
403
+ type: internal ? "internal" : "external",
404
+ href: link.url,
405
+ text: extractPlainText(link),
406
+ sectionId: currentSectionId,
407
+ line: getNodeStartLine(node),
408
+ });
409
+ }
410
+ if (node.type === "image") {
411
+ const img = node;
412
+ links.push({
413
+ type: "image",
414
+ href: img.url,
415
+ text: img.alt ?? "",
416
+ sectionId: currentSectionId,
417
+ line: getNodeStartLine(node),
418
+ });
419
+ }
420
+ });
421
+ return links;
422
+ };
423
+ var extractCodeBlocks = (tree, docId) => {
424
+ const codeBlocks = [];
425
+ let currentSectionId = docId;
426
+ visit(tree, (node) => {
427
+ if (node.type === "heading") {
428
+ currentSectionId = `${docId}-${slugify(extractPlainText(node))}`;
429
+ }
430
+ if (node.type === "code") {
431
+ const code = node;
432
+ codeBlocks.push({
433
+ language: code.lang ?? null,
434
+ content: code.value,
435
+ sectionId: currentSectionId,
436
+ startLine: getNodeStartLine(node),
437
+ endLine: getNodeEndLine(node),
438
+ });
439
+ }
440
+ });
441
+ return codeBlocks;
442
+ };
443
+ var parse = (content, options = {}) =>
444
+ Effect2.gen(function* () {
445
+ const path3 = options.path ?? "unknown";
446
+ const docId = generateId(path3);
447
+ const now = /* @__PURE__ */ new Date();
448
+ let frontmatter = {};
449
+ let markdownContent = content;
450
+ try {
451
+ const parsed = matter(content);
452
+ frontmatter = parsed.data;
453
+ markdownContent = parsed.content;
454
+ } catch (error) {
455
+ const msg = error instanceof Error ? error.message : String(error);
456
+ console.warn(
457
+ `Warning: Malformed frontmatter in ${path3}, skipping: ${msg.split("\n")[0]}`,
458
+ );
459
+ }
460
+ const tree = processor.parse(markdownContent);
461
+ const lines = markdownContent.split("\n");
462
+ const rawSections = extractRawSections(tree);
463
+ const sections = buildSectionHierarchy(rawSections, docId, lines);
464
+ const links = extractLinks(tree, docId);
465
+ const codeBlocks = extractCodeBlocks(tree, docId);
466
+ const firstH1 = sections.find((s) => s.level === 1);
467
+ const title =
468
+ firstH1?.heading ??
469
+ (typeof frontmatter.title === "string" ? frontmatter.title : null) ??
470
+ path3.split("/").pop()?.replace(/\.md$/, "") ??
471
+ "Untitled";
472
+ const totalContent = sections.map((s) => s.content).join("\n");
473
+ const metadata = {
474
+ wordCount: countWords(totalContent),
475
+ tokenCount: countTokensApprox(content),
476
+ headingCount: countAllSections(sections),
477
+ linkCount: links.length,
478
+ codeBlockCount: codeBlocks.length,
479
+ lastModified: options.lastModified ?? now,
480
+ indexedAt: now,
481
+ };
482
+ const document = {
483
+ id: docId,
484
+ path: path3,
485
+ title,
486
+ frontmatter,
487
+ sections,
488
+ links,
489
+ codeBlocks,
490
+ metadata,
491
+ };
492
+ return document;
493
+ });
494
+ var parseFile = (filePath) =>
495
+ Effect2.gen(function* () {
496
+ const fs3 = yield* Effect2.promise(() => import("fs/promises"));
497
+ const [content, stats] = yield* Effect2.tryPromise({
498
+ try: () =>
499
+ Promise.all([fs3.readFile(filePath, "utf-8"), fs3.stat(filePath)]),
500
+ catch: (error) =>
501
+ new FileReadError({
502
+ path: filePath,
503
+ message: error instanceof Error ? error.message : "Unknown error",
504
+ cause: error,
505
+ }),
506
+ });
507
+ return yield* parse(content, {
508
+ path: filePath,
509
+ lastModified: stats.mtime,
510
+ });
511
+ });
512
+
513
+ // src/index/types.ts
514
+ var INDEX_DIR = ".mdcontext";
515
+ var INDEX_VERSION = 1;
516
+ var getIndexPaths = (rootPath) => ({
517
+ root: `${rootPath}/${INDEX_DIR}`,
518
+ config: `${rootPath}/${INDEX_DIR}/config.json`,
519
+ documents: `${rootPath}/${INDEX_DIR}/indexes/documents.json`,
520
+ sections: `${rootPath}/${INDEX_DIR}/indexes/sections.json`,
521
+ links: `${rootPath}/${INDEX_DIR}/indexes/links.json`,
522
+ cache: `${rootPath}/${INDEX_DIR}/cache`,
523
+ parsed: `${rootPath}/${INDEX_DIR}/cache/parsed`,
524
+ });
525
+
526
+ // src/index/storage.ts
527
+ import * as crypto2 from "crypto";
528
+ import { Effect as Effect3 } from "effect";
529
+ import * as fs from "fs/promises";
530
+ import * as path from "path";
531
+
532
+ var ensureDir = (dirPath) =>
533
+ Effect3.tryPromise({
534
+ try: () => fs.mkdir(dirPath, { recursive: true }),
535
+ catch: (e) =>
536
+ new DirectoryCreateError({
537
+ path: dirPath,
538
+ message: e instanceof Error ? e.message : String(e),
539
+ cause: e,
540
+ }),
541
+ }).pipe(Effect3.map(() => void 0));
542
+ var readJsonFile = (filePath) =>
543
+ Effect3.gen(function* () {
544
+ const contentResult = yield* Effect3.tryPromise({
545
+ try: () => fs.readFile(filePath, "utf-8"),
546
+ catch: (e) => {
547
+ if (e && typeof e === "object" && "code" in e && e.code === "ENOENT") {
548
+ return { notFound: true };
549
+ }
550
+ return new FileReadError({
551
+ path: filePath,
552
+ message: e instanceof Error ? e.message : String(e),
553
+ cause: e,
554
+ });
555
+ },
556
+ }).pipe(
557
+ Effect3.map((content) =>
558
+ typeof content === "string" ? { content } : content,
559
+ ),
560
+ // Note: catchAll here filters out "file not found" as expected case (returns null),
561
+ // while other errors are re-thrown to propagate as typed FileReadError
562
+ Effect3.catchAll((e) =>
563
+ e && "notFound" in e
564
+ ? Effect3.succeed({ notFound: true })
565
+ : Effect3.fail(e),
566
+ ),
567
+ );
568
+ if ("notFound" in contentResult) {
569
+ return null;
570
+ }
571
+ return yield* Effect3.try({
572
+ try: () => JSON.parse(contentResult.content),
573
+ catch: (e) =>
574
+ new IndexCorruptedError({
575
+ path: filePath,
576
+ reason: "InvalidJson",
577
+ details: e instanceof Error ? e.message : String(e),
578
+ }),
579
+ });
580
+ });
581
+ var writeJsonFile = (filePath, data) =>
582
+ Effect3.gen(function* () {
583
+ const dir = path.dirname(filePath);
584
+ yield* ensureDir(dir);
585
+ yield* Effect3.tryPromise({
586
+ try: () => fs.writeFile(filePath, JSON.stringify(data, null, 2)),
587
+ catch: (e) =>
588
+ new FileWriteError({
589
+ path: filePath,
590
+ message: e instanceof Error ? e.message : String(e),
591
+ cause: e,
592
+ }),
593
+ });
594
+ });
595
+ var computeHash = (content) => {
596
+ return crypto2
597
+ .createHash("sha256")
598
+ .update(content)
599
+ .digest("hex")
600
+ .slice(0, 16);
601
+ };
602
+ var createStorage = (rootPath) => ({
603
+ rootPath: path.resolve(rootPath),
604
+ paths: getIndexPaths(path.resolve(rootPath)),
605
+ });
606
+ var initializeIndex = (storage) =>
607
+ Effect3.gen(function* () {
608
+ yield* ensureDir(storage.paths.root);
609
+ yield* ensureDir(storage.paths.parsed);
610
+ yield* ensureDir(path.dirname(storage.paths.documents));
611
+ const existingConfig = yield* loadConfig(storage);
612
+ if (!existingConfig) {
613
+ const config = {
614
+ version: INDEX_VERSION,
615
+ rootPath: storage.rootPath,
616
+ include: ["**/*.md", "**/*.mdx"],
617
+ exclude: ["**/node_modules/**", "**/.*/**"],
618
+ createdAt: /* @__PURE__ */ new Date().toISOString(),
619
+ updatedAt: /* @__PURE__ */ new Date().toISOString(),
620
+ };
621
+ yield* saveConfig(storage, config);
622
+ }
623
+ });
624
+ var loadConfig = (storage) => readJsonFile(storage.paths.config);
625
+ var saveConfig = (storage, config) =>
626
+ writeJsonFile(storage.paths.config, {
627
+ ...config,
628
+ updatedAt: /* @__PURE__ */ new Date().toISOString(),
629
+ });
630
+ var loadDocumentIndex = (storage) => readJsonFile(storage.paths.documents);
631
+ var saveDocumentIndex = (storage, index) =>
632
+ writeJsonFile(storage.paths.documents, index);
633
+ var createEmptyDocumentIndex = (rootPath) => ({
634
+ version: INDEX_VERSION,
635
+ rootPath,
636
+ documents: {},
637
+ });
638
+ var loadSectionIndex = (storage) => readJsonFile(storage.paths.sections);
639
+ var saveSectionIndex = (storage, index) =>
640
+ writeJsonFile(storage.paths.sections, index);
641
+ var createEmptySectionIndex = () => ({
642
+ version: INDEX_VERSION,
643
+ sections: {},
644
+ byHeading: {},
645
+ byDocument: {},
646
+ });
647
+ var loadLinkIndex = (storage) => readJsonFile(storage.paths.links);
648
+ var saveLinkIndex = (storage, index) =>
649
+ writeJsonFile(storage.paths.links, index);
650
+ var createEmptyLinkIndex = () => ({
651
+ version: INDEX_VERSION,
652
+ forward: {},
653
+ backward: {},
654
+ broken: [],
655
+ });
656
+ var indexExists = (storage) =>
657
+ Effect3.tryPromise({
658
+ try: async () => {
659
+ try {
660
+ await fs.access(storage.paths.config);
661
+ return true;
662
+ } catch {
663
+ return false;
664
+ }
665
+ },
666
+ catch: (e) =>
667
+ new FileReadError({
668
+ path: storage.paths.config,
669
+ message: e instanceof Error ? e.message : String(e),
670
+ cause: e,
671
+ }),
672
+ });
673
+
674
+ import { Effect as Effect4 } from "effect";
675
+ // src/index/indexer.ts
676
+ import * as fs2 from "fs/promises";
677
+ import * as path2 from "path";
678
+
679
+ var isMarkdownFile = (filename) =>
680
+ filename.endsWith(".md") || filename.endsWith(".mdx");
681
+ var shouldExclude = (filePath, exclude) => {
682
+ const normalized = filePath.toLowerCase();
683
+ for (const pattern of exclude) {
684
+ if (
685
+ pattern.includes("node_modules") &&
686
+ normalized.includes("node_modules")
687
+ ) {
688
+ return true;
689
+ }
690
+ if (pattern.startsWith("**/.*") && normalized.includes("/.")) {
691
+ return true;
692
+ }
693
+ }
694
+ return false;
695
+ };
696
+ var walkDirectory = async (dir, exclude) => {
697
+ const files = [];
698
+ let hiddenCount = 0;
699
+ let excludedCount = 0;
700
+ const entries = await fs2.readdir(dir, { withFileTypes: true });
701
+ for (const entry of entries) {
702
+ const fullPath = path2.join(dir, entry.name);
703
+ if (entry.name.startsWith(".") || entry.name === "node_modules") {
704
+ if (entry.isDirectory()) {
705
+ hiddenCount++;
706
+ }
707
+ continue;
708
+ }
709
+ if (shouldExclude(fullPath, exclude)) {
710
+ excludedCount++;
711
+ continue;
712
+ }
713
+ if (entry.isDirectory()) {
714
+ const subResult = await walkDirectory(fullPath, exclude);
715
+ files.push(...subResult.files);
716
+ hiddenCount += subResult.skipped.hidden;
717
+ excludedCount += subResult.skipped.excluded;
718
+ } else if (entry.isFile() && isMarkdownFile(entry.name)) {
719
+ files.push(fullPath);
720
+ }
721
+ }
722
+ return { files, skipped: { hidden: hiddenCount, excluded: excludedCount } };
723
+ };
724
+ var flattenSections = (sections, docId, docPath) => {
725
+ const result = [];
726
+ const traverse = (section) => {
727
+ result.push({
728
+ id: section.id,
729
+ documentId: docId,
730
+ documentPath: docPath,
731
+ heading: section.heading,
732
+ level: section.level,
733
+ startLine: section.startLine,
734
+ endLine: section.endLine,
735
+ tokenCount: section.metadata.tokenCount,
736
+ hasCode: section.metadata.hasCode,
737
+ hasList: section.metadata.hasList,
738
+ hasTable: section.metadata.hasTable,
739
+ });
740
+ for (const child of section.children) {
741
+ traverse(child);
742
+ }
743
+ };
744
+ for (const section of sections) {
745
+ traverse(section);
746
+ }
747
+ return result;
748
+ };
749
+ var resolveInternalLink = (href, fromPath, rootPath) => {
750
+ if (href.startsWith("#")) {
751
+ return fromPath;
752
+ }
753
+ if (href.startsWith("http://") || href.startsWith("https://")) {
754
+ return null;
755
+ }
756
+ const linkPath = href.split("#")[0] ?? "";
757
+ if (!linkPath) return null;
758
+ const fromDir = path2.dirname(fromPath);
759
+ const resolved = path2.resolve(fromDir, linkPath);
760
+ if (!resolved.startsWith(rootPath)) {
761
+ return null;
762
+ }
763
+ return path2.relative(rootPath, resolved);
764
+ };
765
+ var buildIndex = (rootPath, options = {}) =>
766
+ Effect4.gen(function* () {
767
+ const startTime = Date.now();
768
+ const storage = createStorage(rootPath);
769
+ const errors = [];
770
+ yield* initializeIndex(storage);
771
+ const existingDocIndex = yield* loadDocumentIndex(storage);
772
+ const docIndex =
773
+ options.force || !existingDocIndex
774
+ ? createEmptyDocumentIndex(storage.rootPath)
775
+ : existingDocIndex;
776
+ const existingSectionIndex = yield* loadSectionIndex(storage);
777
+ const existingLinkIndex = yield* loadLinkIndex(storage);
778
+ const sectionIndex = existingSectionIndex ?? createEmptySectionIndex();
779
+ const linkIndex = existingLinkIndex ?? createEmptyLinkIndex();
780
+ const exclude = options.exclude ?? ["**/node_modules/**", "**/.*/**"];
781
+ const walkResult = yield* Effect4.tryPromise({
782
+ try: () => walkDirectory(storage.rootPath, exclude),
783
+ catch: (e) =>
784
+ new DirectoryWalkError({
785
+ path: storage.rootPath,
786
+ message: `Failed to traverse directory: ${e instanceof Error ? e.message : String(e)}`,
787
+ cause: e,
788
+ }),
789
+ });
790
+ const { files, skipped: walkSkipped } = walkResult;
791
+ let documentsIndexed = 0;
792
+ let sectionsIndexed = 0;
793
+ let linksIndexed = 0;
794
+ let unchangedCount = 0;
795
+ const mutableDocuments = {
796
+ ...docIndex.documents,
797
+ };
798
+ const mutableSections = {
799
+ ...sectionIndex.sections,
800
+ };
801
+ const mutableByHeading = Object.fromEntries(
802
+ Object.entries(sectionIndex.byHeading).map(([k, v]) => [k, [...v]]),
803
+ );
804
+ const mutableByDocument = Object.fromEntries(
805
+ Object.entries(sectionIndex.byDocument).map(([k, v]) => [k, [...v]]),
806
+ );
807
+ const mutableForward = Object.fromEntries(
808
+ Object.entries(linkIndex.forward).map(([k, v]) => [k, [...v]]),
809
+ );
810
+ const mutableBackward = Object.fromEntries(
811
+ Object.entries(linkIndex.backward).map(([k, v]) => [k, [...v]]),
812
+ );
813
+ const brokenLinks = [...linkIndex.broken];
814
+ for (const filePath of files) {
815
+ const relativePath = path2.relative(storage.rootPath, filePath);
816
+ const processFile = Effect4.gen(function* () {
817
+ const [content, stats] = yield* Effect4.promise(() =>
818
+ Promise.all([fs2.readFile(filePath, "utf-8"), fs2.stat(filePath)]),
819
+ );
820
+ const hash = computeHash(content);
821
+ const existingEntry = mutableDocuments[relativePath];
822
+ if (
823
+ !options.force &&
824
+ existingEntry &&
825
+ existingEntry.hash === hash &&
826
+ existingEntry.mtime === stats.mtime.getTime()
827
+ ) {
828
+ unchangedCount++;
829
+ return;
830
+ }
831
+ const doc = yield* parse(content, {
832
+ path: relativePath,
833
+ lastModified: stats.mtime,
834
+ }).pipe(
835
+ Effect4.mapError(
836
+ (e) =>
837
+ new ParseError({
838
+ message: e.message,
839
+ path: relativePath,
840
+ ...(e.line !== void 0 && { line: e.line }),
841
+ ...(e.column !== void 0 && { column: e.column }),
842
+ }),
843
+ ),
844
+ );
845
+ if (existingEntry) {
846
+ const oldSectionIds = mutableByDocument[existingEntry.id] ?? [];
847
+ for (const sectionId of oldSectionIds) {
848
+ const oldSection = mutableSections[sectionId];
849
+ if (oldSection) {
850
+ const headingKey = oldSection.heading.toLowerCase();
851
+ const headingList = mutableByHeading[headingKey];
852
+ if (headingList) {
853
+ const idx = headingList.indexOf(sectionId);
854
+ if (idx !== -1) headingList.splice(idx, 1);
855
+ }
856
+ }
857
+ delete mutableSections[sectionId];
858
+ }
859
+ delete mutableByDocument[existingEntry.id];
860
+ delete mutableForward[relativePath];
861
+ }
862
+ mutableDocuments[relativePath] = {
863
+ id: doc.id,
864
+ path: relativePath,
865
+ title: doc.title,
866
+ mtime: stats.mtime.getTime(),
867
+ hash,
868
+ tokenCount: doc.metadata.tokenCount,
869
+ sectionCount: doc.metadata.headingCount,
870
+ };
871
+ documentsIndexed++;
872
+ const sections = flattenSections(doc.sections, doc.id, relativePath);
873
+ mutableByDocument[doc.id] = [];
874
+ for (const section of sections) {
875
+ mutableSections[section.id] = section;
876
+ mutableByDocument[doc.id]?.push(section.id);
877
+ const headingKey = section.heading.toLowerCase();
878
+ if (!mutableByHeading[headingKey]) {
879
+ mutableByHeading[headingKey] = [];
880
+ }
881
+ mutableByHeading[headingKey]?.push(section.id);
882
+ sectionsIndexed++;
883
+ }
884
+ const internalLinks = doc.links.filter((l) => l.type === "internal");
885
+ const outgoingLinks = [];
886
+ for (const link of internalLinks) {
887
+ const target = resolveInternalLink(
888
+ link.href,
889
+ filePath,
890
+ storage.rootPath,
891
+ );
892
+ if (target) {
893
+ outgoingLinks.push(target);
894
+ if (!mutableBackward[target]) {
895
+ mutableBackward[target] = [];
896
+ }
897
+ if (!mutableBackward[target]?.includes(relativePath)) {
898
+ mutableBackward[target]?.push(relativePath);
899
+ }
900
+ linksIndexed++;
901
+ }
902
+ }
903
+ mutableForward[relativePath] = outgoingLinks;
904
+ }).pipe(
905
+ // Note: catchAll is intentional for batch file processing.
906
+ // Individual file failures should be collected in errors array
907
+ // rather than stopping the entire index build operation.
908
+ Effect4.catchAll((error) => {
909
+ const message =
910
+ "message" in error && typeof error.message === "string"
911
+ ? error.message
912
+ : String(error);
913
+ errors.push({
914
+ path: relativePath,
915
+ message,
916
+ });
917
+ return Effect4.void;
918
+ }),
919
+ );
920
+ yield* processFile;
921
+ }
922
+ for (const [_from, targets] of Object.entries(mutableForward)) {
923
+ for (const target of targets) {
924
+ if (!mutableDocuments[target] && !brokenLinks.includes(target)) {
925
+ brokenLinks.push(target);
926
+ }
927
+ }
928
+ }
929
+ yield* saveDocumentIndex(storage, {
930
+ version: docIndex.version,
931
+ rootPath: storage.rootPath,
932
+ documents: mutableDocuments,
933
+ });
934
+ yield* saveSectionIndex(storage, {
935
+ version: sectionIndex.version,
936
+ sections: mutableSections,
937
+ byHeading: mutableByHeading,
938
+ byDocument: mutableByDocument,
939
+ });
940
+ yield* saveLinkIndex(storage, {
941
+ version: linkIndex.version,
942
+ forward: mutableForward,
943
+ backward: mutableBackward,
944
+ broken: brokenLinks,
945
+ });
946
+ const duration = Date.now() - startTime;
947
+ const totalLinks = Object.values(mutableForward).reduce(
948
+ (sum, links) => sum + links.length,
949
+ 0,
950
+ );
951
+ const skipped = {
952
+ unchanged: unchangedCount,
953
+ excluded: walkSkipped.excluded,
954
+ hidden: walkSkipped.hidden,
955
+ total: unchangedCount + walkSkipped.excluded + walkSkipped.hidden,
956
+ };
957
+ return {
958
+ documentsIndexed,
959
+ sectionsIndexed,
960
+ linksIndexed,
961
+ totalDocuments: Object.keys(mutableDocuments).length,
962
+ totalSections: Object.keys(mutableSections).length,
963
+ totalLinks,
964
+ duration,
965
+ errors,
966
+ skipped,
967
+ };
968
+ });
969
+ var getOutgoingLinks = (rootPath, filePath) =>
970
+ Effect4.gen(function* () {
971
+ const storage = createStorage(rootPath);
972
+ const linkIndex = yield* loadLinkIndex(storage);
973
+ if (!linkIndex) {
974
+ return [];
975
+ }
976
+ const relativePath = path2.relative(
977
+ storage.rootPath,
978
+ path2.resolve(filePath),
979
+ );
980
+ return linkIndex.forward[relativePath] ?? [];
981
+ });
982
+ var getIncomingLinks = (rootPath, filePath) =>
983
+ Effect4.gen(function* () {
984
+ const storage = createStorage(rootPath);
985
+ const linkIndex = yield* loadLinkIndex(storage);
986
+ if (!linkIndex) {
987
+ return [];
988
+ }
989
+ const relativePath = path2.relative(
990
+ storage.rootPath,
991
+ path2.resolve(filePath),
992
+ );
993
+ return linkIndex.backward[relativePath] ?? [];
994
+ });
995
+ var getBrokenLinks = (rootPath) =>
996
+ Effect4.gen(function* () {
997
+ const storage = createStorage(rootPath);
998
+ const linkIndex = yield* loadLinkIndex(storage);
999
+ if (!linkIndex) {
1000
+ return [];
1001
+ }
1002
+ return linkIndex.broken;
1003
+ });
1004
+
1005
+ export {
1006
+ FileReadError,
1007
+ DirectoryWalkError,
1008
+ ParseError,
1009
+ ApiKeyMissingError,
1010
+ ApiKeyInvalidError,
1011
+ EmbeddingError,
1012
+ IndexNotFoundError,
1013
+ VectorStoreError,
1014
+ EmbeddingsNotFoundError,
1015
+ WatchError,
1016
+ CliValidationError,
1017
+ countTokens,
1018
+ countTokensApprox,
1019
+ countWords,
1020
+ freeEncoder,
1021
+ parse,
1022
+ parseFile,
1023
+ INDEX_DIR,
1024
+ INDEX_VERSION,
1025
+ getIndexPaths,
1026
+ computeHash,
1027
+ createStorage,
1028
+ initializeIndex,
1029
+ loadConfig,
1030
+ saveConfig,
1031
+ loadDocumentIndex,
1032
+ saveDocumentIndex,
1033
+ createEmptyDocumentIndex,
1034
+ loadSectionIndex,
1035
+ saveSectionIndex,
1036
+ createEmptySectionIndex,
1037
+ loadLinkIndex,
1038
+ saveLinkIndex,
1039
+ createEmptyLinkIndex,
1040
+ indexExists,
1041
+ buildIndex,
1042
+ getOutgoingLinks,
1043
+ getIncomingLinks,
1044
+ getBrokenLinks,
1045
+ };