mdcontext 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. package/.changeset/config.json +9 -9
  2. package/.claude/settings.local.json +25 -0
  3. package/.github/workflows/claude-code-review.yml +44 -0
  4. package/.github/workflows/claude.yml +85 -0
  5. package/CONTRIBUTING.md +186 -0
  6. package/NOTES/NOTES +44 -0
  7. package/README.md +206 -3
  8. package/biome.json +1 -1
  9. package/dist/chunk-23UPXDNL.js +3044 -0
  10. package/dist/chunk-2W7MO2DL.js +1366 -0
  11. package/dist/chunk-3NUAZGMA.js +1689 -0
  12. package/dist/chunk-7TOWB2XB.js +366 -0
  13. package/dist/chunk-7XOTOADQ.js +3065 -0
  14. package/dist/chunk-AH2PDM2K.js +3042 -0
  15. package/dist/chunk-BNXWSZ63.js +3742 -0
  16. package/dist/chunk-BTL5DJVU.js +3222 -0
  17. package/dist/chunk-HDHYG7E4.js +104 -0
  18. package/dist/chunk-HLR4KZBP.js +3234 -0
  19. package/dist/chunk-IP3FRFEB.js +1045 -0
  20. package/dist/chunk-KHU56VDO.js +3042 -0
  21. package/dist/chunk-KRYIFLQR.js +85 -89
  22. package/dist/chunk-LBSDNLEM.js +287 -0
  23. package/dist/chunk-MNTQ7HCP.js +2643 -0
  24. package/dist/chunk-MUJELQQ6.js +1387 -0
  25. package/dist/chunk-MXJGMSLV.js +2199 -0
  26. package/dist/chunk-N6QJGC3Z.js +2636 -0
  27. package/dist/chunk-OBELGBPM.js +1713 -0
  28. package/dist/chunk-OT7R5XTA.js +3192 -0
  29. package/dist/chunk-P7X4RA2T.js +106 -0
  30. package/dist/chunk-PIDUQNC2.js +3185 -0
  31. package/dist/chunk-POGCDIH4.js +3187 -0
  32. package/dist/chunk-PSIEOQGZ.js +3043 -0
  33. package/dist/chunk-PVRT3IHA.js +3238 -0
  34. package/dist/chunk-QNN4TT23.js +1430 -0
  35. package/dist/chunk-RE3R45RJ.js +3042 -0
  36. package/dist/chunk-S7E6TFX6.js +718 -657
  37. package/dist/chunk-SG6GLU4U.js +1378 -0
  38. package/dist/chunk-SJCDV2ST.js +274 -0
  39. package/dist/chunk-SYE5XLF3.js +104 -0
  40. package/dist/chunk-T5VLYBZD.js +103 -0
  41. package/dist/chunk-TOQB7VWU.js +3238 -0
  42. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  43. package/dist/chunk-VVTGZNBT.js +1533 -1423
  44. package/dist/chunk-W7Q4RFEV.js +104 -0
  45. package/dist/chunk-XTYYVRLO.js +3190 -0
  46. package/dist/chunk-Y6MDYVJD.js +3063 -0
  47. package/dist/cli/main.js +4072 -629
  48. package/dist/index.d.ts +420 -33
  49. package/dist/index.js +8 -15
  50. package/dist/mcp/server.js +103 -7
  51. package/dist/schema-BAWSG7KY.js +22 -0
  52. package/dist/schema-E3QUPL26.js +20 -0
  53. package/dist/schema-EHL7WUT6.js +20 -0
  54. package/docs/019-USAGE.md +44 -5
  55. package/docs/020-current-implementation.md +8 -8
  56. package/docs/021-DOGFOODING-FINDINGS.md +1 -1
  57. package/docs/CONFIG.md +1123 -0
  58. package/docs/ERRORS.md +383 -0
  59. package/docs/summarization.md +320 -0
  60. package/justfile +40 -0
  61. package/package.json +39 -33
  62. package/research/INDEX.md +315 -0
  63. package/research/code-review/README.md +90 -0
  64. package/research/code-review/cli-error-handling-review.md +979 -0
  65. package/research/code-review/code-review-validation-report.md +464 -0
  66. package/research/code-review/main-ts-review.md +1128 -0
  67. package/research/config-docs/SUMMARY.md +357 -0
  68. package/research/config-docs/TEST-RESULTS.md +776 -0
  69. package/research/config-docs/TODO.md +542 -0
  70. package/research/config-docs/analysis.md +744 -0
  71. package/research/config-docs/fix-validation.md +502 -0
  72. package/research/config-docs/help-audit.md +264 -0
  73. package/research/config-docs/help-system-analysis.md +890 -0
  74. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  75. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  76. package/research/issue-review.md +603 -0
  77. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  78. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  79. package/research/llm-summarization/anthropic-2026.md +367 -0
  80. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  81. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  82. package/research/llm-summarization/openai-2026.md +473 -0
  83. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  84. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  85. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  86. package/research/llm-summarization/prototype-results.md +56 -0
  87. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  88. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  89. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  90. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  91. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  92. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  93. package/research/mdcontext-pudding/02-search.md +970 -0
  94. package/research/mdcontext-pudding/03-context.md +779 -0
  95. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  96. package/research/mdcontext-pudding/04-tree.md +704 -0
  97. package/research/mdcontext-pudding/05-config.md +1038 -0
  98. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  99. package/research/mdcontext-pudding/06-links.md +679 -0
  100. package/research/mdcontext-pudding/07-stats.md +693 -0
  101. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  102. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  103. package/research/mdcontext-pudding/README.md +168 -0
  104. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  105. package/research/research-quality-review.md +834 -0
  106. package/research/semantic-search/embedding-text-analysis.md +156 -0
  107. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  108. package/research/semantic-search/query-processing-analysis.md +207 -0
  109. package/research/semantic-search/root-cause-and-solution.md +114 -0
  110. package/research/semantic-search/threshold-validation-report.md +69 -0
  111. package/research/semantic-search/vector-search-analysis.md +63 -0
  112. package/research/test-path-issues.md +276 -0
  113. package/review/ALP-76/1-error-type-design.md +962 -0
  114. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  115. package/review/ALP-76/3-error-presentation.md +624 -0
  116. package/review/ALP-76/4-test-coverage.md +625 -0
  117. package/review/ALP-76/5-migration-completeness.md +440 -0
  118. package/review/ALP-76/6-effect-best-practices.md +755 -0
  119. package/scripts/apply-branch-protection.sh +47 -0
  120. package/scripts/branch-protection-templates.json +79 -0
  121. package/scripts/prototype-summarization.ts +346 -0
  122. package/scripts/rebuild-hnswlib.js +32 -37
  123. package/scripts/setup-branch-protection.sh +64 -0
  124. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  125. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  126. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  127. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  128. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  129. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  130. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  131. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  132. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  133. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  134. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  135. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  136. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  137. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  138. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  139. package/src/cli/argv-preprocessor.test.ts +2 -2
  140. package/src/cli/cli.test.ts +230 -33
  141. package/src/cli/commands/config-cmd.ts +642 -0
  142. package/src/cli/commands/context.ts +97 -9
  143. package/src/cli/commands/duplicates.ts +122 -0
  144. package/src/cli/commands/embeddings.ts +529 -0
  145. package/src/cli/commands/index-cmd.ts +210 -30
  146. package/src/cli/commands/index.ts +3 -0
  147. package/src/cli/commands/search.ts +894 -64
  148. package/src/cli/commands/stats.ts +3 -0
  149. package/src/cli/commands/tree.ts +26 -5
  150. package/src/cli/config-layer.ts +176 -0
  151. package/src/cli/error-handler.test.ts +235 -0
  152. package/src/cli/error-handler.ts +655 -0
  153. package/src/cli/flag-schemas.ts +66 -0
  154. package/src/cli/help.ts +209 -7
  155. package/src/cli/main.ts +348 -58
  156. package/src/cli/options.ts +10 -0
  157. package/src/cli/shared-error-handling.ts +199 -0
  158. package/src/cli/utils.ts +150 -17
  159. package/src/config/file-provider.test.ts +320 -0
  160. package/src/config/file-provider.ts +273 -0
  161. package/src/config/index.ts +72 -0
  162. package/src/config/integration.test.ts +667 -0
  163. package/src/config/precedence.test.ts +277 -0
  164. package/src/config/precedence.ts +451 -0
  165. package/src/config/schema.test.ts +414 -0
  166. package/src/config/schema.ts +603 -0
  167. package/src/config/service.test.ts +320 -0
  168. package/src/config/service.ts +243 -0
  169. package/src/config/testing.test.ts +264 -0
  170. package/src/config/testing.ts +110 -0
  171. package/src/core/types.ts +6 -33
  172. package/src/duplicates/detector.test.ts +183 -0
  173. package/src/duplicates/detector.ts +414 -0
  174. package/src/duplicates/index.ts +18 -0
  175. package/src/embeddings/embedding-namespace.test.ts +300 -0
  176. package/src/embeddings/embedding-namespace.ts +947 -0
  177. package/src/embeddings/heading-boost.test.ts +222 -0
  178. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  179. package/src/embeddings/hyde.test.ts +272 -0
  180. package/src/embeddings/hyde.ts +264 -0
  181. package/src/embeddings/index.ts +2 -0
  182. package/src/embeddings/openai-provider.ts +332 -83
  183. package/src/embeddings/pricing.json +22 -0
  184. package/src/embeddings/provider-constants.ts +204 -0
  185. package/src/embeddings/provider-errors.test.ts +967 -0
  186. package/src/embeddings/provider-errors.ts +565 -0
  187. package/src/embeddings/provider-factory.test.ts +240 -0
  188. package/src/embeddings/provider-factory.ts +225 -0
  189. package/src/embeddings/provider-integration.test.ts +788 -0
  190. package/src/embeddings/query-preprocessing.test.ts +187 -0
  191. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  192. package/src/embeddings/semantic-search.ts +780 -93
  193. package/src/embeddings/types.ts +293 -16
  194. package/src/embeddings/vector-store.ts +486 -77
  195. package/src/embeddings/voyage-provider.ts +313 -0
  196. package/src/errors/errors.test.ts +845 -0
  197. package/src/errors/index.ts +533 -0
  198. package/src/index/ignore-patterns.test.ts +354 -0
  199. package/src/index/ignore-patterns.ts +305 -0
  200. package/src/index/indexer.ts +286 -48
  201. package/src/index/storage.ts +94 -30
  202. package/src/index/types.ts +40 -2
  203. package/src/index/watcher.ts +67 -9
  204. package/src/index.ts +22 -0
  205. package/src/integration/search-keyword.test.ts +678 -0
  206. package/src/mcp/server.ts +135 -6
  207. package/src/parser/parser.ts +18 -19
  208. package/src/parser/section-filter.test.ts +277 -0
  209. package/src/parser/section-filter.ts +125 -3
  210. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  211. package/src/search/bm25-store.ts +366 -0
  212. package/src/search/cross-encoder.test.ts +253 -0
  213. package/src/search/cross-encoder.ts +406 -0
  214. package/src/search/fuzzy-search.test.ts +419 -0
  215. package/src/search/fuzzy-search.ts +273 -0
  216. package/src/search/hybrid-search.ts +448 -0
  217. package/src/search/path-matcher.test.ts +276 -0
  218. package/src/search/path-matcher.ts +33 -0
  219. package/src/search/searcher.test.ts +99 -1
  220. package/src/search/searcher.ts +189 -67
  221. package/src/search/wink-bm25.d.ts +30 -0
  222. package/src/summarization/cli-providers/claude.ts +202 -0
  223. package/src/summarization/cli-providers/detection.test.ts +273 -0
  224. package/src/summarization/cli-providers/detection.ts +118 -0
  225. package/src/summarization/cli-providers/index.ts +8 -0
  226. package/src/summarization/cost.test.ts +139 -0
  227. package/src/summarization/cost.ts +102 -0
  228. package/src/summarization/error-handler.test.ts +127 -0
  229. package/src/summarization/error-handler.ts +111 -0
  230. package/src/summarization/index.ts +102 -0
  231. package/src/summarization/pipeline.test.ts +498 -0
  232. package/src/summarization/pipeline.ts +231 -0
  233. package/src/summarization/prompts.test.ts +269 -0
  234. package/src/summarization/prompts.ts +133 -0
  235. package/src/summarization/provider-factory.test.ts +396 -0
  236. package/src/summarization/provider-factory.ts +178 -0
  237. package/src/summarization/types.ts +184 -0
  238. package/src/summarize/summarizer.ts +104 -35
  239. package/src/types/huggingface-transformers.d.ts +66 -0
  240. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  241. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  242. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  243. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
  244. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
  245. package/tests/integration/embed-index.test.ts +712 -0
  246. package/tests/integration/search-context.test.ts +469 -0
  247. package/tests/integration/search-semantic.test.ts +522 -0
  248. package/vitest.config.ts +1 -6
  249. package/AGENTS.md +0 -46
  250. package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
  251. package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
@@ -5,8 +5,18 @@
5
5
  import * as fs from 'node:fs/promises'
6
6
  import * as path from 'node:path'
7
7
  import { Effect } from 'effect'
8
+ import type { Ignore } from 'ignore'
8
9
  import type { MdSection } from '../core/types.js'
10
+ import {
11
+ type DirectoryCreateError,
12
+ DirectoryWalkError,
13
+ type FileReadError,
14
+ type FileWriteError,
15
+ type IndexCorruptedError,
16
+ ParseError,
17
+ } from '../errors/index.js'
9
18
  import { parse } from '../parser/parser.js'
19
+ import { createIgnoreFilter, shouldIgnore } from './ignore-patterns.js'
10
20
  import {
11
21
  computeHash,
12
22
  createEmptyDocumentIndex,
@@ -24,9 +34,10 @@ import {
24
34
  import type {
25
35
  DocumentEntry,
26
36
  DocumentIndex,
27
- IndexBuildError,
37
+ FileProcessingError,
28
38
  IndexResult,
29
39
  SectionEntry,
40
+ SkipSummary,
30
41
  } from './types.js'
31
42
 
32
43
  // ============================================================================
@@ -36,52 +47,68 @@ import type {
36
47
  const isMarkdownFile = (filename: string): boolean =>
37
48
  filename.endsWith('.md') || filename.endsWith('.mdx')
38
49
 
39
- const shouldExclude = (
40
- filePath: string,
41
- exclude: readonly string[],
42
- ): boolean => {
43
- const normalized = filePath.toLowerCase()
44
- for (const pattern of exclude) {
45
- if (
46
- pattern.includes('node_modules') &&
47
- normalized.includes('node_modules')
48
- ) {
49
- return true
50
- }
51
- if (pattern.startsWith('**/.*') && normalized.includes('/.')) {
52
- return true
53
- }
50
+ /**
51
+ * Result of directory walk including tracked skip counts
52
+ */
53
+ interface WalkResult {
54
+ readonly files: string[]
55
+ readonly skipped: {
56
+ hidden: number
57
+ excluded: number
54
58
  }
55
- return false
56
59
  }
57
60
 
61
+ /**
62
+ * Walk directory using ignore filter for pattern matching.
63
+ *
64
+ * @param dir - Directory to walk
65
+ * @param rootPath - Root path for computing relative paths
66
+ * @param filter - Ignore filter instance
67
+ * @returns Walk result with files and skip counts
68
+ */
58
69
  const walkDirectory = async (
59
70
  dir: string,
60
- exclude: readonly string[],
61
- ): Promise<string[]> => {
71
+ rootPath: string,
72
+ filter: Ignore,
73
+ ): Promise<WalkResult> => {
62
74
  const files: string[] = []
75
+ let hiddenCount = 0
76
+ let excludedCount = 0
63
77
  const entries = await fs.readdir(dir, { withFileTypes: true })
64
78
 
65
79
  for (const entry of entries) {
66
80
  const fullPath = path.join(dir, entry.name)
81
+ const relativePath = path.relative(rootPath, fullPath)
67
82
 
68
- if (entry.name.startsWith('.') || entry.name === 'node_modules') {
83
+ // Skip hidden files/directories (starting with .)
84
+ if (entry.name.startsWith('.')) {
85
+ if (entry.isDirectory()) {
86
+ hiddenCount++
87
+ }
69
88
  continue
70
89
  }
71
90
 
72
- if (shouldExclude(fullPath, exclude)) {
91
+ // Check ignore filter for both files and directories
92
+ if (shouldIgnore(relativePath, filter)) {
93
+ if (entry.isDirectory()) {
94
+ excludedCount++
95
+ } else {
96
+ excludedCount++
97
+ }
73
98
  continue
74
99
  }
75
100
 
76
101
  if (entry.isDirectory()) {
77
- const subFiles = await walkDirectory(fullPath, exclude)
78
- files.push(...subFiles)
102
+ const subResult = await walkDirectory(fullPath, rootPath, filter)
103
+ files.push(...subResult.files)
104
+ hiddenCount += subResult.skipped.hidden
105
+ excludedCount += subResult.skipped.excluded
79
106
  } else if (entry.isFile() && isMarkdownFile(entry.name)) {
80
107
  files.push(fullPath)
81
108
  }
82
109
  }
83
110
 
84
- return files
111
+ return { files, skipped: { hidden: hiddenCount, excluded: excludedCount } }
85
112
  }
86
113
 
87
114
  // ============================================================================
@@ -156,19 +183,39 @@ const resolveInternalLink = (
156
183
  // Index Building
157
184
  // ============================================================================
158
185
 
186
+ export interface IndexProgress {
187
+ readonly current: number
188
+ readonly total: number
189
+ readonly filePath: string
190
+ }
191
+
159
192
  export interface IndexOptions {
160
- readonly force?: boolean
161
- readonly exclude?: readonly string[]
193
+ readonly force?: boolean | undefined
194
+ /** CLI/config exclude patterns (overrides ignore files) */
195
+ readonly exclude?: readonly string[] | undefined
196
+ /** Whether to honor .gitignore (default: true) */
197
+ readonly honorGitignore?: boolean | undefined
198
+ /** Whether to honor .mdcontextignore (default: true) */
199
+ readonly honorMdcontextignore?: boolean | undefined
200
+ /** Callback for progress updates during file indexing */
201
+ readonly onProgress?: ((progress: IndexProgress) => void) | undefined
162
202
  }
163
203
 
164
204
  export const buildIndex = (
165
205
  rootPath: string,
166
206
  options: IndexOptions = {},
167
- ): Effect.Effect<IndexResult, Error> =>
207
+ ): Effect.Effect<
208
+ IndexResult,
209
+ | DirectoryWalkError
210
+ | DirectoryCreateError
211
+ | FileReadError
212
+ | FileWriteError
213
+ | IndexCorruptedError
214
+ > =>
168
215
  Effect.gen(function* () {
169
216
  const startTime = Date.now()
170
217
  const storage = createStorage(rootPath)
171
- const errors: IndexBuildError[] = []
218
+ const errors: FileProcessingError[] = []
172
219
 
173
220
  // Initialize storage
174
221
  yield* initializeIndex(storage)
@@ -186,42 +233,82 @@ export const buildIndex = (
186
233
  const sectionIndex = existingSectionIndex ?? createEmptySectionIndex()
187
234
  const linkIndex = existingLinkIndex ?? createEmptyLinkIndex()
188
235
 
189
- // Discover files
190
- const exclude = options.exclude ?? ['**/node_modules/**', '**/.*/**']
191
- const files = yield* Effect.tryPromise({
192
- try: () => walkDirectory(storage.rootPath, exclude),
193
- catch: (e) => new Error(`Failed to walk directory: ${e}`),
236
+ // Build ignore filter with proper precedence:
237
+ // CLI/config patterns > .mdcontextignore > .gitignore > defaults
238
+ const ignoreResult = yield* createIgnoreFilter({
239
+ rootPath: storage.rootPath,
240
+ cliPatterns: options.exclude,
241
+ honorGitignore: options.honorGitignore ?? true,
242
+ honorMdcontextignore: options.honorMdcontextignore ?? true,
243
+ })
244
+
245
+ // Discover files using the ignore filter
246
+ const walkResult = yield* Effect.tryPromise({
247
+ try: () =>
248
+ walkDirectory(storage.rootPath, storage.rootPath, ignoreResult.filter),
249
+ catch: (e) =>
250
+ new DirectoryWalkError({
251
+ path: storage.rootPath,
252
+ message: `Failed to traverse directory: ${e instanceof Error ? e.message : String(e)}`,
253
+ cause: e,
254
+ }),
194
255
  })
195
256
 
257
+ const { files, skipped: walkSkipped } = walkResult
258
+
196
259
  // Process each file
197
260
  let documentsIndexed = 0
198
261
  let sectionsIndexed = 0
199
262
  let linksIndexed = 0
263
+ let unchangedCount = 0
200
264
 
201
265
  const mutableDocuments: Record<string, DocumentEntry> = {
202
266
  ...docIndex.documents,
203
267
  }
204
- // Initialize with existing data to preserve sections/links for unchanged files
205
268
  const mutableSections: Record<string, SectionEntry> = {
206
269
  ...sectionIndex.sections,
207
270
  }
208
- const mutableByHeading: Record<string, string[]> = Object.fromEntries(
209
- Object.entries(sectionIndex.byHeading).map(([k, v]) => [k, [...v]]),
271
+
272
+ const mutableByHeading: Record<string, string[]> = Object.assign(
273
+ Object.create(null),
274
+ Object.fromEntries(
275
+ Object.entries(sectionIndex.byHeading).map(([k, v]) => [k, [...v]]),
276
+ ),
210
277
  )
211
- const mutableByDocument: Record<string, string[]> = Object.fromEntries(
212
- Object.entries(sectionIndex.byDocument).map(([k, v]) => [k, [...v]]),
278
+ const mutableByDocument: Record<string, string[]> = Object.assign(
279
+ Object.create(null),
280
+ Object.fromEntries(
281
+ Object.entries(sectionIndex.byDocument).map(([k, v]) => [k, [...v]]),
282
+ ),
213
283
  )
214
- const mutableForward: Record<string, string[]> = Object.fromEntries(
215
- Object.entries(linkIndex.forward).map(([k, v]) => [k, [...v]]),
284
+ const mutableForward: Record<string, string[]> = Object.assign(
285
+ Object.create(null),
286
+ Object.fromEntries(
287
+ Object.entries(linkIndex.forward).map(([k, v]) => [k, [...v]]),
288
+ ),
216
289
  )
217
- const mutableBackward: Record<string, string[]> = Object.fromEntries(
218
- Object.entries(linkIndex.backward).map(([k, v]) => [k, [...v]]),
290
+ const mutableBackward: Record<string, string[]> = Object.assign(
291
+ Object.create(null),
292
+ Object.fromEntries(
293
+ Object.entries(linkIndex.backward).map(([k, v]) => [k, [...v]]),
294
+ ),
219
295
  )
220
296
  const brokenLinks: string[] = [...linkIndex.broken]
297
+ const totalFiles = files.length
221
298
 
222
- for (const filePath of files) {
299
+ for (let fileIndex = 0; fileIndex < files.length; fileIndex++) {
300
+ const filePath = files[fileIndex]!
223
301
  const relativePath = path.relative(storage.rootPath, filePath)
224
302
 
303
+ // Report progress
304
+ if (options.onProgress) {
305
+ options.onProgress({
306
+ current: fileIndex + 1,
307
+ total: totalFiles,
308
+ filePath: relativePath,
309
+ })
310
+ }
311
+
225
312
  // Process each file, collecting errors instead of failing
226
313
  const processFile = Effect.gen(function* () {
227
314
  // Read file content and stats
@@ -239,6 +326,7 @@ export const buildIndex = (
239
326
  existingEntry.hash === hash &&
240
327
  existingEntry.mtime === stats.mtime.getTime()
241
328
  ) {
329
+ unchangedCount++
242
330
  return // File unchanged, skip processing
243
331
  }
244
332
 
@@ -248,7 +336,13 @@ export const buildIndex = (
248
336
  lastModified: stats.mtime,
249
337
  }).pipe(
250
338
  Effect.mapError(
251
- (e) => new Error(`Parse error in ${relativePath}: ${e.message}`),
339
+ (e) =>
340
+ new ParseError({
341
+ message: e.message,
342
+ path: relativePath,
343
+ ...(e.line !== undefined && { line: e.line }),
344
+ ...(e.column !== undefined && { column: e.column }),
345
+ }),
252
346
  ),
253
347
  )
254
348
 
@@ -333,10 +427,18 @@ export const buildIndex = (
333
427
 
334
428
  mutableForward[relativePath] = outgoingLinks
335
429
  }).pipe(
430
+ // Note: catchAll is intentional for batch file processing.
431
+ // Individual file failures should be collected in errors array
432
+ // rather than stopping the entire index build operation.
336
433
  Effect.catchAll((error) => {
434
+ // Extract message from typed errors or generic errors
435
+ const message =
436
+ 'message' in error && typeof error.message === 'string'
437
+ ? error.message
438
+ : String(error)
337
439
  errors.push({
338
440
  path: relativePath,
339
- message: error instanceof Error ? error.message : String(error),
441
+ message,
340
442
  })
341
443
  return Effect.void
342
444
  }),
@@ -383,6 +485,14 @@ export const buildIndex = (
383
485
  0,
384
486
  )
385
487
 
488
+ // Build skip summary
489
+ const skipped: SkipSummary = {
490
+ unchanged: unchangedCount,
491
+ excluded: walkSkipped.excluded,
492
+ hidden: walkSkipped.hidden,
493
+ total: unchangedCount + walkSkipped.excluded + walkSkipped.hidden,
494
+ }
495
+
386
496
  return {
387
497
  documentsIndexed,
388
498
  sectionsIndexed,
@@ -392,6 +502,7 @@ export const buildIndex = (
392
502
  totalLinks,
393
503
  duration,
394
504
  errors,
505
+ skipped,
395
506
  }
396
507
  })
397
508
 
@@ -402,7 +513,7 @@ export const buildIndex = (
402
513
  export const getOutgoingLinks = (
403
514
  rootPath: string,
404
515
  filePath: string,
405
- ): Effect.Effect<readonly string[], Error> =>
516
+ ): Effect.Effect<readonly string[], FileReadError | IndexCorruptedError> =>
406
517
  Effect.gen(function* () {
407
518
  const storage = createStorage(rootPath)
408
519
  const linkIndex = yield* loadLinkIndex(storage)
@@ -418,7 +529,7 @@ export const getOutgoingLinks = (
418
529
  export const getIncomingLinks = (
419
530
  rootPath: string,
420
531
  filePath: string,
421
- ): Effect.Effect<readonly string[], Error> =>
532
+ ): Effect.Effect<readonly string[], FileReadError | IndexCorruptedError> =>
422
533
  Effect.gen(function* () {
423
534
  const storage = createStorage(rootPath)
424
535
  const linkIndex = yield* loadLinkIndex(storage)
@@ -433,7 +544,7 @@ export const getIncomingLinks = (
433
544
 
434
545
  export const getBrokenLinks = (
435
546
  rootPath: string,
436
- ): Effect.Effect<readonly string[], Error> =>
547
+ ): Effect.Effect<readonly string[], FileReadError | IndexCorruptedError> =>
437
548
  Effect.gen(function* () {
438
549
  const storage = createStorage(rootPath)
439
550
  const linkIndex = yield* loadLinkIndex(storage)
@@ -444,3 +555,130 @@ export const getBrokenLinks = (
444
555
 
445
556
  return linkIndex.broken
446
557
  })
558
+
559
+ // ============================================================================
560
+ // BM25 Index Building
561
+ // ============================================================================
562
+
563
+ import { type BM25Document, createBM25Store } from '../search/bm25-store.js'
564
+
565
+ export interface BuildBM25Options {
566
+ readonly force?: boolean
567
+ readonly onProgress?: (progress: { current: number; total: number }) => void
568
+ }
569
+
570
+ export interface BuildBM25Result {
571
+ readonly sectionsIndexed: number
572
+ readonly duration: number
573
+ }
574
+
575
+ /**
576
+ * Build BM25 keyword index for all sections.
577
+ *
578
+ * @param rootPath - Root directory containing indexed markdown files
579
+ * @param options - Build options (force rebuild, progress callback)
580
+ * @returns Result with section count and timing
581
+ */
582
+ export const buildBM25Index = (
583
+ rootPath: string,
584
+ options: BuildBM25Options = {},
585
+ ): Effect.Effect<
586
+ BuildBM25Result,
587
+ FileReadError | IndexCorruptedError | FileWriteError
588
+ > =>
589
+ Effect.gen(function* () {
590
+ const startTime = Date.now()
591
+ const storage = createStorage(rootPath)
592
+
593
+ // Load section index
594
+ const docIndex = yield* loadDocumentIndex(storage)
595
+ const sectionIndex = yield* loadSectionIndex(storage)
596
+
597
+ if (!docIndex || !sectionIndex) {
598
+ return { sectionsIndexed: 0, duration: 0 }
599
+ }
600
+
601
+ // Create BM25 store
602
+ const bm25Store = createBM25Store(storage.rootPath)
603
+
604
+ // Check if we can skip
605
+ if (!options.force) {
606
+ const loaded = yield* bm25Store.load()
607
+ if (loaded) {
608
+ const stats = bm25Store.getStats()
609
+ if (stats.count > 0) {
610
+ return { sectionsIndexed: 0, duration: Date.now() - startTime }
611
+ }
612
+ }
613
+ }
614
+
615
+ // Clear and rebuild
616
+ bm25Store.clear()
617
+
618
+ // Group sections by document for efficient file reading
619
+ const sectionsByDoc: Map<string, SectionEntry[]> = new Map()
620
+ for (const section of Object.values(sectionIndex.sections)) {
621
+ if (section.tokenCount < 10) continue
622
+ const existing = sectionsByDoc.get(section.documentPath)
623
+ if (existing) {
624
+ existing.push(section)
625
+ } else {
626
+ sectionsByDoc.set(section.documentPath, [section])
627
+ }
628
+ }
629
+
630
+ const totalDocs = sectionsByDoc.size
631
+ let processedDocs = 0
632
+ let sectionsIndexed = 0
633
+
634
+ // Process each document
635
+ for (const [docPath, sections] of sectionsByDoc) {
636
+ const filePath = path.join(storage.rootPath, docPath)
637
+
638
+ // Read file content
639
+ const fileContentResult = yield* Effect.promise(() =>
640
+ fs.readFile(filePath, 'utf-8'),
641
+ ).pipe(
642
+ Effect.map((content) => ({ ok: true as const, content })),
643
+ Effect.catchAll(() =>
644
+ Effect.succeed({ ok: false as const, content: '' }),
645
+ ),
646
+ )
647
+
648
+ if (!fileContentResult.ok) continue
649
+
650
+ const lines = fileContentResult.content.split('\n')
651
+ const docs: BM25Document[] = []
652
+
653
+ for (const section of sections) {
654
+ const content = lines
655
+ .slice(section.startLine - 1, section.endLine)
656
+ .join('\n')
657
+
658
+ docs.push({
659
+ id: section.id,
660
+ sectionId: section.id,
661
+ documentPath: section.documentPath,
662
+ heading: section.heading,
663
+ content,
664
+ })
665
+ sectionsIndexed++
666
+ }
667
+
668
+ yield* bm25Store.add(docs)
669
+
670
+ processedDocs++
671
+ if (options.onProgress) {
672
+ options.onProgress({ current: processedDocs, total: totalDocs })
673
+ }
674
+ }
675
+
676
+ // Consolidate and save
677
+ yield* bm25Store.consolidate()
678
+ yield* bm25Store.save()
679
+
680
+ return {
681
+ sectionsIndexed,
682
+ duration: Date.now() - startTime,
683
+ }
684
+ })