mdcontext 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.claude/settings.local.json +25 -0
  4. package/.github/workflows/ci.yml +83 -0
  5. package/.github/workflows/claude-code-review.yml +44 -0
  6. package/.github/workflows/claude.yml +85 -0
  7. package/.github/workflows/release.yml +113 -0
  8. package/.tldrignore +112 -0
  9. package/BACKLOG.md +338 -0
  10. package/CONTRIBUTING.md +186 -0
  11. package/NOTES/NOTES +44 -0
  12. package/README.md +434 -11
  13. package/biome.json +36 -0
  14. package/cspell.config.yaml +14 -0
  15. package/dist/chunk-23UPXDNL.js +3044 -0
  16. package/dist/chunk-2W7MO2DL.js +1366 -0
  17. package/dist/chunk-3NUAZGMA.js +1689 -0
  18. package/dist/chunk-7TOWB2XB.js +366 -0
  19. package/dist/chunk-7XOTOADQ.js +3065 -0
  20. package/dist/chunk-AH2PDM2K.js +3042 -0
  21. package/dist/chunk-BNXWSZ63.js +3742 -0
  22. package/dist/chunk-BTL5DJVU.js +3222 -0
  23. package/dist/chunk-HDHYG7E4.js +104 -0
  24. package/dist/chunk-HLR4KZBP.js +3234 -0
  25. package/dist/chunk-IP3FRFEB.js +1045 -0
  26. package/dist/chunk-KHU56VDO.js +3042 -0
  27. package/dist/chunk-KRYIFLQR.js +88 -0
  28. package/dist/chunk-LBSDNLEM.js +287 -0
  29. package/dist/chunk-MNTQ7HCP.js +2643 -0
  30. package/dist/chunk-MUJELQQ6.js +1387 -0
  31. package/dist/chunk-MXJGMSLV.js +2199 -0
  32. package/dist/chunk-N6QJGC3Z.js +2636 -0
  33. package/dist/chunk-OBELGBPM.js +1713 -0
  34. package/dist/chunk-OT7R5XTA.js +3192 -0
  35. package/dist/chunk-P7X4RA2T.js +106 -0
  36. package/dist/chunk-PIDUQNC2.js +3185 -0
  37. package/dist/chunk-POGCDIH4.js +3187 -0
  38. package/dist/chunk-PSIEOQGZ.js +3043 -0
  39. package/dist/chunk-PVRT3IHA.js +3238 -0
  40. package/dist/chunk-QNN4TT23.js +1430 -0
  41. package/dist/chunk-RE3R45RJ.js +3042 -0
  42. package/dist/chunk-S7E6TFX6.js +803 -0
  43. package/dist/chunk-SG6GLU4U.js +1378 -0
  44. package/dist/chunk-SJCDV2ST.js +274 -0
  45. package/dist/chunk-SYE5XLF3.js +104 -0
  46. package/dist/chunk-T5VLYBZD.js +103 -0
  47. package/dist/chunk-TOQB7VWU.js +3238 -0
  48. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  49. package/dist/chunk-VVTGZNBT.js +1629 -0
  50. package/dist/chunk-W7Q4RFEV.js +104 -0
  51. package/dist/chunk-XTYYVRLO.js +3190 -0
  52. package/dist/chunk-Y6MDYVJD.js +3063 -0
  53. package/dist/cli/main.d.ts +1 -0
  54. package/dist/cli/main.js +5458 -0
  55. package/dist/index.d.ts +653 -0
  56. package/dist/index.js +79 -0
  57. package/dist/mcp/server.d.ts +1 -0
  58. package/dist/mcp/server.js +472 -0
  59. package/dist/schema-BAWSG7KY.js +22 -0
  60. package/dist/schema-E3QUPL26.js +20 -0
  61. package/dist/schema-EHL7WUT6.js +20 -0
  62. package/docs/019-USAGE.md +625 -0
  63. package/docs/020-current-implementation.md +364 -0
  64. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  65. package/docs/BACKLOG.md +80 -0
  66. package/docs/CONFIG.md +1123 -0
  67. package/docs/DESIGN.md +439 -0
  68. package/docs/ERRORS.md +383 -0
  69. package/docs/PROJECT.md +88 -0
  70. package/docs/ROADMAP.md +407 -0
  71. package/docs/summarization.md +320 -0
  72. package/docs/test-links.md +9 -0
  73. package/justfile +40 -0
  74. package/package.json +74 -9
  75. package/pnpm-workspace.yaml +5 -0
  76. package/research/INDEX.md +315 -0
  77. package/research/code-review/README.md +90 -0
  78. package/research/code-review/cli-error-handling-review.md +979 -0
  79. package/research/code-review/code-review-validation-report.md +464 -0
  80. package/research/code-review/main-ts-review.md +1128 -0
  81. package/research/config-analysis/01-current-implementation.md +470 -0
  82. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  83. package/research/config-analysis/03-task-candidates.md +715 -0
  84. package/research/config-analysis/033-research-configuration-management.md +828 -0
  85. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  86. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  87. package/research/config-docs/SUMMARY.md +357 -0
  88. package/research/config-docs/TEST-RESULTS.md +776 -0
  89. package/research/config-docs/TODO.md +542 -0
  90. package/research/config-docs/analysis.md +744 -0
  91. package/research/config-docs/fix-validation.md +502 -0
  92. package/research/config-docs/help-audit.md +264 -0
  93. package/research/config-docs/help-system-analysis.md +890 -0
  94. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  95. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  96. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  97. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  98. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  99. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  100. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  101. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  102. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  103. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  104. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  105. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  106. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  107. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  108. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  109. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  110. package/research/effect-cli-error-handling.md +845 -0
  111. package/research/effect-errors-as-values.md +943 -0
  112. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  113. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  114. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  115. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  116. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  117. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  118. package/research/issue-review.md +603 -0
  119. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  120. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  121. package/research/llm-summarization/anthropic-2026.md +367 -0
  122. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  123. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  124. package/research/llm-summarization/openai-2026.md +473 -0
  125. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  126. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  127. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  128. package/research/llm-summarization/prototype-results.md +56 -0
  129. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  130. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  131. package/research/mdcontext-error-analysis.md +521 -0
  132. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  133. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  134. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  135. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  136. package/research/mdcontext-pudding/02-search.md +970 -0
  137. package/research/mdcontext-pudding/03-context.md +779 -0
  138. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  139. package/research/mdcontext-pudding/04-tree.md +704 -0
  140. package/research/mdcontext-pudding/05-config.md +1038 -0
  141. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  142. package/research/mdcontext-pudding/06-links.md +679 -0
  143. package/research/mdcontext-pudding/07-stats.md +693 -0
  144. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  145. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  146. package/research/mdcontext-pudding/README.md +168 -0
  147. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  148. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  149. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  150. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  151. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  152. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  153. package/research/research-quality-review.md +834 -0
  154. package/research/semantic-search/002-research-embedding-models.md +490 -0
  155. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  156. package/research/semantic-search/004-research-vector-search.md +841 -0
  157. package/research/semantic-search/032-research-semantic-search.md +427 -0
  158. package/research/semantic-search/embedding-text-analysis.md +156 -0
  159. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  160. package/research/semantic-search/query-processing-analysis.md +207 -0
  161. package/research/semantic-search/root-cause-and-solution.md +114 -0
  162. package/research/semantic-search/threshold-validation-report.md +69 -0
  163. package/research/semantic-search/vector-search-analysis.md +63 -0
  164. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  165. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  166. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  167. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  168. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  169. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  170. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  171. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  172. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  173. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  174. package/research/test-path-issues.md +276 -0
  175. package/review/ALP-76/1-error-type-design.md +962 -0
  176. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  177. package/review/ALP-76/3-error-presentation.md +624 -0
  178. package/review/ALP-76/4-test-coverage.md +625 -0
  179. package/review/ALP-76/5-migration-completeness.md +440 -0
  180. package/review/ALP-76/6-effect-best-practices.md +755 -0
  181. package/scripts/apply-branch-protection.sh +47 -0
  182. package/scripts/branch-protection-templates.json +79 -0
  183. package/scripts/prototype-summarization.ts +346 -0
  184. package/scripts/rebuild-hnswlib.js +58 -0
  185. package/scripts/setup-branch-protection.sh +64 -0
  186. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  187. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  188. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  189. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  190. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  191. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  192. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  193. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  194. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  195. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  196. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  197. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  198. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  199. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  200. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  201. package/src/cli/argv-preprocessor.test.ts +210 -0
  202. package/src/cli/argv-preprocessor.ts +202 -0
  203. package/src/cli/cli.test.ts +627 -0
  204. package/src/cli/commands/backlinks.ts +54 -0
  205. package/src/cli/commands/config-cmd.ts +642 -0
  206. package/src/cli/commands/context.ts +285 -0
  207. package/src/cli/commands/duplicates.ts +122 -0
  208. package/src/cli/commands/embeddings.ts +529 -0
  209. package/src/cli/commands/index-cmd.ts +480 -0
  210. package/src/cli/commands/index.ts +16 -0
  211. package/src/cli/commands/links.ts +52 -0
  212. package/src/cli/commands/search.ts +1281 -0
  213. package/src/cli/commands/stats.ts +149 -0
  214. package/src/cli/commands/tree.ts +128 -0
  215. package/src/cli/config-layer.ts +176 -0
  216. package/src/cli/error-handler.test.ts +235 -0
  217. package/src/cli/error-handler.ts +655 -0
  218. package/src/cli/flag-schemas.ts +341 -0
  219. package/src/cli/help.ts +588 -0
  220. package/src/cli/index.ts +9 -0
  221. package/src/cli/main.ts +435 -0
  222. package/src/cli/options.ts +41 -0
  223. package/src/cli/shared-error-handling.ts +199 -0
  224. package/src/cli/typo-suggester.test.ts +105 -0
  225. package/src/cli/typo-suggester.ts +130 -0
  226. package/src/cli/utils.ts +259 -0
  227. package/src/config/file-provider.test.ts +320 -0
  228. package/src/config/file-provider.ts +273 -0
  229. package/src/config/index.ts +72 -0
  230. package/src/config/integration.test.ts +667 -0
  231. package/src/config/precedence.test.ts +277 -0
  232. package/src/config/precedence.ts +451 -0
  233. package/src/config/schema.test.ts +414 -0
  234. package/src/config/schema.ts +603 -0
  235. package/src/config/service.test.ts +320 -0
  236. package/src/config/service.ts +243 -0
  237. package/src/config/testing.test.ts +264 -0
  238. package/src/config/testing.ts +110 -0
  239. package/src/core/index.ts +1 -0
  240. package/src/core/types.ts +113 -0
  241. package/src/duplicates/detector.test.ts +183 -0
  242. package/src/duplicates/detector.ts +414 -0
  243. package/src/duplicates/index.ts +18 -0
  244. package/src/embeddings/embedding-namespace.test.ts +300 -0
  245. package/src/embeddings/embedding-namespace.ts +947 -0
  246. package/src/embeddings/heading-boost.test.ts +222 -0
  247. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  248. package/src/embeddings/hyde.test.ts +272 -0
  249. package/src/embeddings/hyde.ts +264 -0
  250. package/src/embeddings/index.ts +10 -0
  251. package/src/embeddings/openai-provider.ts +414 -0
  252. package/src/embeddings/pricing.json +22 -0
  253. package/src/embeddings/provider-constants.ts +204 -0
  254. package/src/embeddings/provider-errors.test.ts +967 -0
  255. package/src/embeddings/provider-errors.ts +565 -0
  256. package/src/embeddings/provider-factory.test.ts +240 -0
  257. package/src/embeddings/provider-factory.ts +225 -0
  258. package/src/embeddings/provider-integration.test.ts +788 -0
  259. package/src/embeddings/query-preprocessing.test.ts +187 -0
  260. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  261. package/src/embeddings/semantic-search.ts +1270 -0
  262. package/src/embeddings/types.ts +359 -0
  263. package/src/embeddings/vector-store.ts +708 -0
  264. package/src/embeddings/voyage-provider.ts +313 -0
  265. package/src/errors/errors.test.ts +845 -0
  266. package/src/errors/index.ts +533 -0
  267. package/src/index/ignore-patterns.test.ts +354 -0
  268. package/src/index/ignore-patterns.ts +305 -0
  269. package/src/index/index.ts +4 -0
  270. package/src/index/indexer.ts +684 -0
  271. package/src/index/storage.ts +260 -0
  272. package/src/index/types.ts +147 -0
  273. package/src/index/watcher.ts +189 -0
  274. package/src/index.ts +30 -0
  275. package/src/integration/search-keyword.test.ts +678 -0
  276. package/src/mcp/server.ts +612 -0
  277. package/src/parser/index.ts +1 -0
  278. package/src/parser/parser.test.ts +291 -0
  279. package/src/parser/parser.ts +394 -0
  280. package/src/parser/section-filter.test.ts +277 -0
  281. package/src/parser/section-filter.ts +392 -0
  282. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  283. package/src/search/bm25-store.ts +366 -0
  284. package/src/search/cross-encoder.test.ts +253 -0
  285. package/src/search/cross-encoder.ts +406 -0
  286. package/src/search/fuzzy-search.test.ts +419 -0
  287. package/src/search/fuzzy-search.ts +273 -0
  288. package/src/search/hybrid-search.ts +448 -0
  289. package/src/search/path-matcher.test.ts +276 -0
  290. package/src/search/path-matcher.ts +33 -0
  291. package/src/search/query-parser.test.ts +260 -0
  292. package/src/search/query-parser.ts +319 -0
  293. package/src/search/searcher.test.ts +280 -0
  294. package/src/search/searcher.ts +724 -0
  295. package/src/search/wink-bm25.d.ts +30 -0
  296. package/src/summarization/cli-providers/claude.ts +202 -0
  297. package/src/summarization/cli-providers/detection.test.ts +273 -0
  298. package/src/summarization/cli-providers/detection.ts +118 -0
  299. package/src/summarization/cli-providers/index.ts +8 -0
  300. package/src/summarization/cost.test.ts +139 -0
  301. package/src/summarization/cost.ts +102 -0
  302. package/src/summarization/error-handler.test.ts +127 -0
  303. package/src/summarization/error-handler.ts +111 -0
  304. package/src/summarization/index.ts +102 -0
  305. package/src/summarization/pipeline.test.ts +498 -0
  306. package/src/summarization/pipeline.ts +231 -0
  307. package/src/summarization/prompts.test.ts +269 -0
  308. package/src/summarization/prompts.ts +133 -0
  309. package/src/summarization/provider-factory.test.ts +396 -0
  310. package/src/summarization/provider-factory.ts +178 -0
  311. package/src/summarization/types.ts +184 -0
  312. package/src/summarize/budget-bugs.test.ts +620 -0
  313. package/src/summarize/formatters.ts +419 -0
  314. package/src/summarize/index.ts +20 -0
  315. package/src/summarize/summarizer.test.ts +275 -0
  316. package/src/summarize/summarizer.ts +597 -0
  317. package/src/summarize/verify-bugs.test.ts +238 -0
  318. package/src/types/huggingface-transformers.d.ts +66 -0
  319. package/src/utils/index.ts +1 -0
  320. package/src/utils/tokens.test.ts +142 -0
  321. package/src/utils/tokens.ts +186 -0
  322. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  323. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  324. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  325. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  326. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  327. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  328. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
  329. package/tests/fixtures/cli/README.md +9 -0
  330. package/tests/fixtures/cli/api-reference.md +11 -0
  331. package/tests/fixtures/cli/getting-started.md +11 -0
  332. package/tests/integration/embed-index.test.ts +712 -0
  333. package/tests/integration/search-context.test.ts +469 -0
  334. package/tests/integration/search-semantic.test.ts +522 -0
  335. package/tsconfig.json +26 -0
  336. package/vitest.config.ts +16 -0
  337. package/vitest.setup.ts +12 -0
@@ -0,0 +1,359 @@
1
+ /**
2
+ * Embedding types for mdcontext
3
+ */
4
+
5
+ // ============================================================================
6
+ // Embedding Provider
7
+ // ============================================================================
8
+
9
+ export interface BatchProgress {
10
+ readonly batchIndex: number
11
+ readonly totalBatches: number
12
+ readonly processedTexts: number
13
+ readonly totalTexts: number
14
+ }
15
+
16
+ export interface EmbedOptions {
17
+ readonly onBatchProgress?: ((progress: BatchProgress) => void) | undefined
18
+ }
19
+
20
+ export interface EmbeddingProvider {
21
+ readonly name: string
22
+ readonly dimensions: number
23
+ embed(texts: string[], options?: EmbedOptions): Promise<EmbeddingResult>
24
+ }
25
+
26
+ /**
27
+ * Extended embedding provider with metadata about the underlying service.
28
+ * Implementations like OpenAIProvider include these additional properties.
29
+ */
30
+ export interface EmbeddingProviderWithMetadata extends EmbeddingProvider {
31
+ readonly model: string
32
+ readonly baseURL: string | undefined
33
+ }
34
+
35
+ /**
36
+ * Type guard to check if an EmbeddingProvider has extended metadata.
37
+ * Use this instead of unsafe type casting when accessing model/baseURL.
38
+ */
39
+ export const hasProviderMetadata = (
40
+ provider: EmbeddingProvider,
41
+ ): provider is EmbeddingProviderWithMetadata => {
42
+ return (
43
+ 'model' in provider &&
44
+ typeof (provider as EmbeddingProviderWithMetadata).model === 'string'
45
+ )
46
+ }
47
+
48
+ export interface EmbeddingResult {
49
+ readonly embeddings: readonly number[][]
50
+ readonly tokensUsed: number
51
+ readonly cost: number
52
+ }
53
+
54
+ // ============================================================================
55
+ // Vector Index
56
+ // ============================================================================
57
+
58
+ export interface VectorEntry {
59
+ readonly id: string
60
+ readonly sectionId: string
61
+ readonly documentPath: string
62
+ readonly heading: string
63
+ readonly embedding: readonly number[]
64
+ }
65
+
66
+ export interface VectorIndex {
67
+ readonly version: number
68
+ readonly provider: string
69
+ readonly providerModel?: string | undefined
70
+ readonly providerBaseURL?: string | undefined
71
+ readonly dimensions: number
72
+ readonly entries: Record<string, VectorEntry>
73
+ readonly totalCost: number
74
+ readonly totalTokens: number
75
+ readonly createdAt: string
76
+ readonly updatedAt: string
77
+ /**
78
+ * HNSW index build parameters (stored for validation on load).
79
+ * These affect index quality and build time - changes require rebuild.
80
+ */
81
+ readonly hnswParams?: HnswIndexParams | undefined
82
+ }
83
+
84
+ /**
85
+ * HNSW index parameters stored in metadata.
86
+ * Used to detect config/index mismatches and recommend rebuilds.
87
+ */
88
+ export interface HnswIndexParams {
89
+ /** Max connections per node (M parameter). Default: 16 */
90
+ readonly m: number
91
+ /** Construction-time search width. Default: 200 */
92
+ readonly efConstruction: number
93
+ }
94
+
95
+ // ============================================================================
96
+ // Quality Modes
97
+ // ============================================================================
98
+
99
+ /**
100
+ * Search quality modes for HNSW efSearch parameter.
101
+ * Higher efSearch values give better recall at the cost of speed.
102
+ *
103
+ * - 'fast': efSearch=64, ~40% faster, slight recall reduction
104
+ * - 'balanced': efSearch=100 (default), good balance
105
+ * - 'thorough': efSearch=256, ~30% slower, best recall
106
+ */
107
+ export type SearchQuality = 'fast' | 'balanced' | 'thorough'
108
+
109
+ /**
110
+ * efSearch values for each quality mode.
111
+ * These control the size of the dynamic candidate list during search.
112
+ */
113
+ export const QUALITY_EF_SEARCH: Record<SearchQuality, number> = {
114
+ fast: 64,
115
+ balanced: 100,
116
+ thorough: 256,
117
+ } as const
118
+
119
+ // ============================================================================
120
+ // Semantic Search
121
+ // ============================================================================
122
+
123
+ export interface SemanticSearchOptions {
124
+ /** Maximum number of results */
125
+ readonly limit?: number | undefined
126
+ /** Minimum similarity threshold (0-1) */
127
+ readonly threshold?: number | undefined
128
+ /** Filter by document path pattern */
129
+ readonly pathPattern?: string | undefined
130
+ /** Search quality mode: fast, balanced (default), or thorough */
131
+ readonly quality?: SearchQuality | undefined
132
+ /** Provider configuration override */
133
+ readonly providerConfig?:
134
+ | {
135
+ readonly provider:
136
+ | 'openai'
137
+ | 'ollama'
138
+ | 'lm-studio'
139
+ | 'openrouter'
140
+ | 'voyage'
141
+ readonly baseURL?: string | undefined
142
+ readonly model?: string | undefined
143
+ }
144
+ | undefined
145
+ /**
146
+ * Skip query preprocessing (normalize, lowercase, strip punctuation).
147
+ * Default: false (preprocessing enabled for better recall).
148
+ * Set to true for exact query matching.
149
+ */
150
+ readonly skipPreprocessing?: boolean | undefined
151
+ /**
152
+ * Boost results where query terms appear in section headings.
153
+ * Improves navigation queries like "installation guide" or "API reference".
154
+ * Default: true (heading boost enabled).
155
+ */
156
+ readonly headingBoost?: boolean | undefined
157
+ /**
158
+ * Use HyDE (Hypothetical Document Embeddings) for query expansion.
159
+ * Generates a hypothetical document answering the query using an LLM,
160
+ * then searches using that document's embedding.
161
+ *
162
+ * Best for: complex questions, "how to" queries, ambiguous searches
163
+ * Adds: ~1-2s latency, LLM API cost
164
+ * Improvement: 10-30% better recall on complex queries
165
+ *
166
+ * Default: false (disabled)
167
+ */
168
+ readonly hyde?: boolean | undefined
169
+ /**
170
+ * HyDE configuration options (only used when hyde: true).
171
+ */
172
+ readonly hydeOptions?:
173
+ | {
174
+ /** Model for hypothetical document generation. Default: gpt-4o-mini */
175
+ readonly model?: string | undefined
176
+ /** Max tokens for generation. Default: 256 */
177
+ readonly maxTokens?: number | undefined
178
+ /** Generation temperature (0-1). Default: 0.3 */
179
+ readonly temperature?: number | undefined
180
+ }
181
+ | undefined
182
+ /** Lines of context before matches */
183
+ readonly contextBefore?: number | undefined
184
+ /** Lines of context after matches */
185
+ readonly contextAfter?: number | undefined
186
+ }
187
+
188
+ // ============================================================================
189
+ // Query Preprocessing
190
+ // ============================================================================
191
+
192
+ // ============================================================================
193
+ // Heading Boost
194
+ // ============================================================================
195
+
196
+ /** Boost factor per matched term in heading (0.05 = 5% boost per term) */
197
+ const HEADING_BOOST_FACTOR = 0.05
198
+
199
+ /** Boost factor for important files like README (0.03 = 3% boost) */
200
+ const FILE_IMPORTANCE_BOOST = 0.03
201
+
202
+ /**
203
+ * Important file patterns that get ranking boost.
204
+ * These are typically entry points or high-value documentation.
205
+ */
206
+ const IMPORTANT_FILE_PATTERNS = [
207
+ /^readme\.md$/i, // Root README
208
+ /\/readme\.md$/i, // Nested README
209
+ /^index\.md$/i, // Index files
210
+ /\/index\.md$/i,
211
+ /^getting-?started/i, // Getting started guides
212
+ /\/getting-?started/i,
213
+ /^introduction/i, // Introductions
214
+ /\/introduction/i,
215
+ /^overview/i, // Overviews
216
+ /\/overview/i,
217
+ /^quickstart/i, // Quickstart guides
218
+ /\/quickstart/i,
219
+ /^changelog\.md$/i, // Changelogs (useful for "what changed" queries)
220
+ /\/changelog\.md$/i,
221
+ ]
222
+
223
+ /**
224
+ * Calculate file importance boost for a search result.
225
+ * Boosts results from important files like README, index, getting-started.
226
+ *
227
+ * @param documentPath - Path to the document
228
+ * @returns Boost value to add to similarity score (0.0 to 0.03)
229
+ */
230
+ export const calculateFileImportanceBoost = (documentPath: string): number => {
231
+ const isImportant = IMPORTANT_FILE_PATTERNS.some((pattern) =>
232
+ pattern.test(documentPath),
233
+ )
234
+ return isImportant ? FILE_IMPORTANCE_BOOST : 0
235
+ }
236
+
237
+ /**
238
+ * Calculate heading match boost for a search result.
239
+ * Boosts results where query terms appear in section headings.
240
+ *
241
+ * @param heading - Section heading to check
242
+ * @param query - Original search query (will be normalized)
243
+ * @returns Boost value to add to similarity score (0.0 to ~0.15 typically)
244
+ */
245
+ export const calculateHeadingBoost = (
246
+ heading: string,
247
+ query: string,
248
+ ): number => {
249
+ const queryTerms = query.toLowerCase().split(/\s+/).filter(Boolean)
250
+ if (queryTerms.length === 0) return 0
251
+
252
+ const headingLower = heading.toLowerCase()
253
+ const matchCount = queryTerms.filter((term) =>
254
+ headingLower.includes(term),
255
+ ).length
256
+
257
+ return matchCount * HEADING_BOOST_FACTOR
258
+ }
259
+
260
+ /**
261
+ * Calculate combined ranking boost for a search result.
262
+ * Combines heading match boost and file importance boost.
263
+ *
264
+ * @param heading - Section heading
265
+ * @param query - Search query
266
+ * @param documentPath - Path to the document
267
+ * @returns Combined boost value (0.0 to ~0.18 typically)
268
+ */
269
+ export const calculateRankingBoost = (
270
+ heading: string,
271
+ query: string,
272
+ documentPath: string,
273
+ ): number => {
274
+ const headingBoost = calculateHeadingBoost(heading, query)
275
+ const fileBoost = calculateFileImportanceBoost(documentPath)
276
+ return headingBoost + fileBoost
277
+ }
278
+
279
+ // ============================================================================
280
+ // Query Preprocessing
281
+ // ============================================================================
282
+
283
+ /**
284
+ * Preprocess a search query before embedding to reduce noise and improve recall.
285
+ *
286
+ * Transformations applied:
287
+ * - Convert to lowercase (embeddings are case-insensitive)
288
+ * - Replace punctuation with spaces (preserves word boundaries)
289
+ * - Collapse multiple spaces to single space
290
+ * - Trim leading/trailing whitespace
291
+ *
292
+ * This provides 2-5% precision improvement for most queries.
293
+ *
294
+ * @param query - Raw search query
295
+ * @returns Normalized query string
296
+ */
297
+ export const preprocessQuery = (query: string): string => {
298
+ return (
299
+ query
300
+ .toLowerCase()
301
+ // Replace punctuation with spaces (preserves word boundaries)
302
+ .replace(/[^\w\s]/g, ' ')
303
+ // Collapse multiple spaces
304
+ .replace(/\s+/g, ' ')
305
+ .trim()
306
+ )
307
+ }
308
+
309
+ export interface SemanticSearchResult {
310
+ readonly sectionId: string
311
+ readonly documentPath: string
312
+ readonly heading: string
313
+ readonly similarity: number
314
+ readonly content?: string | undefined
315
+ /** Context lines with their line numbers (when context is requested) */
316
+ readonly contextLines?: readonly ContextLine[] | undefined
317
+ }
318
+
319
+ export interface ContextLine {
320
+ /** The line number (1-based) */
321
+ readonly lineNumber: number
322
+ /** The line text */
323
+ readonly line: string
324
+ /**
325
+ * Whether this line is part of the matched result.
326
+ *
327
+ * - For keyword search: true when the line directly matches the query.
328
+ * - For semantic/hybrid search: true when the line lies within the
329
+ * selected/matched section span, even if it is not a direct text match.
330
+ */
331
+ readonly isMatch: boolean
332
+ }
333
+
334
+ /**
335
+ * Extended semantic search result including metadata about below-threshold results.
336
+ * Used to provide user feedback when 0 results pass the threshold.
337
+ */
338
+ export interface SemanticSearchResultWithStats {
339
+ readonly results: readonly SemanticSearchResult[]
340
+ /** Number of results found below threshold (only set when includeBelowThresholdStats is true) */
341
+ readonly belowThresholdCount?: number | undefined
342
+ /** Highest similarity among below-threshold results */
343
+ readonly belowThresholdHighest?: number | undefined
344
+ /** Total results available above threshold before limit was applied */
345
+ readonly totalAvailable?: number | undefined
346
+ }
347
+
348
+ // ============================================================================
349
+ // Errors
350
+ // ============================================================================
351
+ // NOTE: Embedding-related errors are defined in src/errors/index.ts:
352
+ // - EmbeddingError: For embedding operation failures (rate limits, quota, network)
353
+ // - ApiKeyMissingError: For missing API keys
354
+ // - ApiKeyInvalidError: For invalid/rejected API keys
355
+ //
356
+ // Use these centralized error types instead of defining errors here.
357
+ // Example:
358
+ // import { EmbeddingError } from '../errors/index.js'
359
+ // new EmbeddingError({ reason: 'RateLimit', message: 'Rate limited' })