mdcontext 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.claude/settings.local.json +25 -0
  4. package/.github/workflows/ci.yml +83 -0
  5. package/.github/workflows/claude-code-review.yml +44 -0
  6. package/.github/workflows/claude.yml +85 -0
  7. package/.github/workflows/release.yml +113 -0
  8. package/.tldrignore +112 -0
  9. package/BACKLOG.md +338 -0
  10. package/CONTRIBUTING.md +186 -0
  11. package/NOTES/NOTES +44 -0
  12. package/README.md +434 -11
  13. package/biome.json +36 -0
  14. package/cspell.config.yaml +14 -0
  15. package/dist/chunk-23UPXDNL.js +3044 -0
  16. package/dist/chunk-2W7MO2DL.js +1366 -0
  17. package/dist/chunk-3NUAZGMA.js +1689 -0
  18. package/dist/chunk-7TOWB2XB.js +366 -0
  19. package/dist/chunk-7XOTOADQ.js +3065 -0
  20. package/dist/chunk-AH2PDM2K.js +3042 -0
  21. package/dist/chunk-BNXWSZ63.js +3742 -0
  22. package/dist/chunk-BTL5DJVU.js +3222 -0
  23. package/dist/chunk-HDHYG7E4.js +104 -0
  24. package/dist/chunk-HLR4KZBP.js +3234 -0
  25. package/dist/chunk-IP3FRFEB.js +1045 -0
  26. package/dist/chunk-KHU56VDO.js +3042 -0
  27. package/dist/chunk-KRYIFLQR.js +88 -0
  28. package/dist/chunk-LBSDNLEM.js +287 -0
  29. package/dist/chunk-MNTQ7HCP.js +2643 -0
  30. package/dist/chunk-MUJELQQ6.js +1387 -0
  31. package/dist/chunk-MXJGMSLV.js +2199 -0
  32. package/dist/chunk-N6QJGC3Z.js +2636 -0
  33. package/dist/chunk-OBELGBPM.js +1713 -0
  34. package/dist/chunk-OT7R5XTA.js +3192 -0
  35. package/dist/chunk-P7X4RA2T.js +106 -0
  36. package/dist/chunk-PIDUQNC2.js +3185 -0
  37. package/dist/chunk-POGCDIH4.js +3187 -0
  38. package/dist/chunk-PSIEOQGZ.js +3043 -0
  39. package/dist/chunk-PVRT3IHA.js +3238 -0
  40. package/dist/chunk-QNN4TT23.js +1430 -0
  41. package/dist/chunk-RE3R45RJ.js +3042 -0
  42. package/dist/chunk-S7E6TFX6.js +803 -0
  43. package/dist/chunk-SG6GLU4U.js +1378 -0
  44. package/dist/chunk-SJCDV2ST.js +274 -0
  45. package/dist/chunk-SYE5XLF3.js +104 -0
  46. package/dist/chunk-T5VLYBZD.js +103 -0
  47. package/dist/chunk-TOQB7VWU.js +3238 -0
  48. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  49. package/dist/chunk-VVTGZNBT.js +1629 -0
  50. package/dist/chunk-W7Q4RFEV.js +104 -0
  51. package/dist/chunk-XTYYVRLO.js +3190 -0
  52. package/dist/chunk-Y6MDYVJD.js +3063 -0
  53. package/dist/cli/main.d.ts +1 -0
  54. package/dist/cli/main.js +5458 -0
  55. package/dist/index.d.ts +653 -0
  56. package/dist/index.js +79 -0
  57. package/dist/mcp/server.d.ts +1 -0
  58. package/dist/mcp/server.js +472 -0
  59. package/dist/schema-BAWSG7KY.js +22 -0
  60. package/dist/schema-E3QUPL26.js +20 -0
  61. package/dist/schema-EHL7WUT6.js +20 -0
  62. package/docs/019-USAGE.md +625 -0
  63. package/docs/020-current-implementation.md +364 -0
  64. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  65. package/docs/BACKLOG.md +80 -0
  66. package/docs/CONFIG.md +1123 -0
  67. package/docs/DESIGN.md +439 -0
  68. package/docs/ERRORS.md +383 -0
  69. package/docs/PROJECT.md +88 -0
  70. package/docs/ROADMAP.md +407 -0
  71. package/docs/summarization.md +320 -0
  72. package/docs/test-links.md +9 -0
  73. package/justfile +40 -0
  74. package/package.json +74 -9
  75. package/pnpm-workspace.yaml +5 -0
  76. package/research/INDEX.md +315 -0
  77. package/research/code-review/README.md +90 -0
  78. package/research/code-review/cli-error-handling-review.md +979 -0
  79. package/research/code-review/code-review-validation-report.md +464 -0
  80. package/research/code-review/main-ts-review.md +1128 -0
  81. package/research/config-analysis/01-current-implementation.md +470 -0
  82. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  83. package/research/config-analysis/03-task-candidates.md +715 -0
  84. package/research/config-analysis/033-research-configuration-management.md +828 -0
  85. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  86. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  87. package/research/config-docs/SUMMARY.md +357 -0
  88. package/research/config-docs/TEST-RESULTS.md +776 -0
  89. package/research/config-docs/TODO.md +542 -0
  90. package/research/config-docs/analysis.md +744 -0
  91. package/research/config-docs/fix-validation.md +502 -0
  92. package/research/config-docs/help-audit.md +264 -0
  93. package/research/config-docs/help-system-analysis.md +890 -0
  94. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  95. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  96. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  97. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  98. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  99. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  100. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  101. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  102. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  103. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  104. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  105. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  106. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  107. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  108. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  109. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  110. package/research/effect-cli-error-handling.md +845 -0
  111. package/research/effect-errors-as-values.md +943 -0
  112. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  113. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  114. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  115. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  116. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  117. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  118. package/research/issue-review.md +603 -0
  119. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  120. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  121. package/research/llm-summarization/anthropic-2026.md +367 -0
  122. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  123. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  124. package/research/llm-summarization/openai-2026.md +473 -0
  125. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  126. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  127. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  128. package/research/llm-summarization/prototype-results.md +56 -0
  129. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  130. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  131. package/research/mdcontext-error-analysis.md +521 -0
  132. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  133. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  134. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  135. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  136. package/research/mdcontext-pudding/02-search.md +970 -0
  137. package/research/mdcontext-pudding/03-context.md +779 -0
  138. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  139. package/research/mdcontext-pudding/04-tree.md +704 -0
  140. package/research/mdcontext-pudding/05-config.md +1038 -0
  141. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  142. package/research/mdcontext-pudding/06-links.md +679 -0
  143. package/research/mdcontext-pudding/07-stats.md +693 -0
  144. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  145. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  146. package/research/mdcontext-pudding/README.md +168 -0
  147. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  148. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  149. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  150. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  151. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  152. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  153. package/research/research-quality-review.md +834 -0
  154. package/research/semantic-search/002-research-embedding-models.md +490 -0
  155. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  156. package/research/semantic-search/004-research-vector-search.md +841 -0
  157. package/research/semantic-search/032-research-semantic-search.md +427 -0
  158. package/research/semantic-search/embedding-text-analysis.md +156 -0
  159. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  160. package/research/semantic-search/query-processing-analysis.md +207 -0
  161. package/research/semantic-search/root-cause-and-solution.md +114 -0
  162. package/research/semantic-search/threshold-validation-report.md +69 -0
  163. package/research/semantic-search/vector-search-analysis.md +63 -0
  164. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  165. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  166. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  167. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  168. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  169. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  170. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  171. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  172. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  173. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  174. package/research/test-path-issues.md +276 -0
  175. package/review/ALP-76/1-error-type-design.md +962 -0
  176. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  177. package/review/ALP-76/3-error-presentation.md +624 -0
  178. package/review/ALP-76/4-test-coverage.md +625 -0
  179. package/review/ALP-76/5-migration-completeness.md +440 -0
  180. package/review/ALP-76/6-effect-best-practices.md +755 -0
  181. package/scripts/apply-branch-protection.sh +47 -0
  182. package/scripts/branch-protection-templates.json +79 -0
  183. package/scripts/prototype-summarization.ts +346 -0
  184. package/scripts/rebuild-hnswlib.js +58 -0
  185. package/scripts/setup-branch-protection.sh +64 -0
  186. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  187. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  188. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  189. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  190. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  191. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  192. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  193. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  194. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  195. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  196. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  197. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  198. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  199. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  200. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  201. package/src/cli/argv-preprocessor.test.ts +210 -0
  202. package/src/cli/argv-preprocessor.ts +202 -0
  203. package/src/cli/cli.test.ts +627 -0
  204. package/src/cli/commands/backlinks.ts +54 -0
  205. package/src/cli/commands/config-cmd.ts +642 -0
  206. package/src/cli/commands/context.ts +285 -0
  207. package/src/cli/commands/duplicates.ts +122 -0
  208. package/src/cli/commands/embeddings.ts +529 -0
  209. package/src/cli/commands/index-cmd.ts +480 -0
  210. package/src/cli/commands/index.ts +16 -0
  211. package/src/cli/commands/links.ts +52 -0
  212. package/src/cli/commands/search.ts +1281 -0
  213. package/src/cli/commands/stats.ts +149 -0
  214. package/src/cli/commands/tree.ts +128 -0
  215. package/src/cli/config-layer.ts +176 -0
  216. package/src/cli/error-handler.test.ts +235 -0
  217. package/src/cli/error-handler.ts +655 -0
  218. package/src/cli/flag-schemas.ts +341 -0
  219. package/src/cli/help.ts +588 -0
  220. package/src/cli/index.ts +9 -0
  221. package/src/cli/main.ts +435 -0
  222. package/src/cli/options.ts +41 -0
  223. package/src/cli/shared-error-handling.ts +199 -0
  224. package/src/cli/typo-suggester.test.ts +105 -0
  225. package/src/cli/typo-suggester.ts +130 -0
  226. package/src/cli/utils.ts +259 -0
  227. package/src/config/file-provider.test.ts +320 -0
  228. package/src/config/file-provider.ts +273 -0
  229. package/src/config/index.ts +72 -0
  230. package/src/config/integration.test.ts +667 -0
  231. package/src/config/precedence.test.ts +277 -0
  232. package/src/config/precedence.ts +451 -0
  233. package/src/config/schema.test.ts +414 -0
  234. package/src/config/schema.ts +603 -0
  235. package/src/config/service.test.ts +320 -0
  236. package/src/config/service.ts +243 -0
  237. package/src/config/testing.test.ts +264 -0
  238. package/src/config/testing.ts +110 -0
  239. package/src/core/index.ts +1 -0
  240. package/src/core/types.ts +113 -0
  241. package/src/duplicates/detector.test.ts +183 -0
  242. package/src/duplicates/detector.ts +414 -0
  243. package/src/duplicates/index.ts +18 -0
  244. package/src/embeddings/embedding-namespace.test.ts +300 -0
  245. package/src/embeddings/embedding-namespace.ts +947 -0
  246. package/src/embeddings/heading-boost.test.ts +222 -0
  247. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  248. package/src/embeddings/hyde.test.ts +272 -0
  249. package/src/embeddings/hyde.ts +264 -0
  250. package/src/embeddings/index.ts +10 -0
  251. package/src/embeddings/openai-provider.ts +414 -0
  252. package/src/embeddings/pricing.json +22 -0
  253. package/src/embeddings/provider-constants.ts +204 -0
  254. package/src/embeddings/provider-errors.test.ts +967 -0
  255. package/src/embeddings/provider-errors.ts +565 -0
  256. package/src/embeddings/provider-factory.test.ts +240 -0
  257. package/src/embeddings/provider-factory.ts +225 -0
  258. package/src/embeddings/provider-integration.test.ts +788 -0
  259. package/src/embeddings/query-preprocessing.test.ts +187 -0
  260. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  261. package/src/embeddings/semantic-search.ts +1270 -0
  262. package/src/embeddings/types.ts +359 -0
  263. package/src/embeddings/vector-store.ts +708 -0
  264. package/src/embeddings/voyage-provider.ts +313 -0
  265. package/src/errors/errors.test.ts +845 -0
  266. package/src/errors/index.ts +533 -0
  267. package/src/index/ignore-patterns.test.ts +354 -0
  268. package/src/index/ignore-patterns.ts +305 -0
  269. package/src/index/index.ts +4 -0
  270. package/src/index/indexer.ts +684 -0
  271. package/src/index/storage.ts +260 -0
  272. package/src/index/types.ts +147 -0
  273. package/src/index/watcher.ts +189 -0
  274. package/src/index.ts +30 -0
  275. package/src/integration/search-keyword.test.ts +678 -0
  276. package/src/mcp/server.ts +612 -0
  277. package/src/parser/index.ts +1 -0
  278. package/src/parser/parser.test.ts +291 -0
  279. package/src/parser/parser.ts +394 -0
  280. package/src/parser/section-filter.test.ts +277 -0
  281. package/src/parser/section-filter.ts +392 -0
  282. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  283. package/src/search/bm25-store.ts +366 -0
  284. package/src/search/cross-encoder.test.ts +253 -0
  285. package/src/search/cross-encoder.ts +406 -0
  286. package/src/search/fuzzy-search.test.ts +419 -0
  287. package/src/search/fuzzy-search.ts +273 -0
  288. package/src/search/hybrid-search.ts +448 -0
  289. package/src/search/path-matcher.test.ts +276 -0
  290. package/src/search/path-matcher.ts +33 -0
  291. package/src/search/query-parser.test.ts +260 -0
  292. package/src/search/query-parser.ts +319 -0
  293. package/src/search/searcher.test.ts +280 -0
  294. package/src/search/searcher.ts +724 -0
  295. package/src/search/wink-bm25.d.ts +30 -0
  296. package/src/summarization/cli-providers/claude.ts +202 -0
  297. package/src/summarization/cli-providers/detection.test.ts +273 -0
  298. package/src/summarization/cli-providers/detection.ts +118 -0
  299. package/src/summarization/cli-providers/index.ts +8 -0
  300. package/src/summarization/cost.test.ts +139 -0
  301. package/src/summarization/cost.ts +102 -0
  302. package/src/summarization/error-handler.test.ts +127 -0
  303. package/src/summarization/error-handler.ts +111 -0
  304. package/src/summarization/index.ts +102 -0
  305. package/src/summarization/pipeline.test.ts +498 -0
  306. package/src/summarization/pipeline.ts +231 -0
  307. package/src/summarization/prompts.test.ts +269 -0
  308. package/src/summarization/prompts.ts +133 -0
  309. package/src/summarization/provider-factory.test.ts +396 -0
  310. package/src/summarization/provider-factory.ts +178 -0
  311. package/src/summarization/types.ts +184 -0
  312. package/src/summarize/budget-bugs.test.ts +620 -0
  313. package/src/summarize/formatters.ts +419 -0
  314. package/src/summarize/index.ts +20 -0
  315. package/src/summarize/summarizer.test.ts +275 -0
  316. package/src/summarize/summarizer.ts +597 -0
  317. package/src/summarize/verify-bugs.test.ts +238 -0
  318. package/src/types/huggingface-transformers.d.ts +66 -0
  319. package/src/utils/index.ts +1 -0
  320. package/src/utils/tokens.test.ts +142 -0
  321. package/src/utils/tokens.ts +186 -0
  322. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  323. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  324. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  325. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  326. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  327. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  328. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
  329. package/tests/fixtures/cli/README.md +9 -0
  330. package/tests/fixtures/cli/api-reference.md +11 -0
  331. package/tests/fixtures/cli/getting-started.md +11 -0
  332. package/tests/integration/embed-index.test.ts +712 -0
  333. package/tests/integration/search-context.test.ts +469 -0
  334. package/tests/integration/search-semantic.test.ts +522 -0
  335. package/tsconfig.json +26 -0
  336. package/vitest.config.ts +16 -0
  337. package/vitest.setup.ts +12 -0
@@ -0,0 +1,1281 @@
1
+ /**
2
+ * SEARCH Command
3
+ *
4
+ * Search markdown content by meaning or heading pattern.
5
+ */
6
+
7
+ import * as fs from 'node:fs/promises'
8
+ import * as path from 'node:path'
9
+ import * as readline from 'node:readline'
10
+ import { Args, Command, Options } from '@effect/cli'
11
+ import { Console, Effect, Option } from 'effect'
12
+ import { ConfigService, defaultConfig } from '../../config/index.js'
13
+ import type {
14
+ BuildEmbeddingsResult,
15
+ EmbeddingEstimate,
16
+ } from '../../embeddings/semantic-search.js'
17
+ import {
18
+ buildEmbeddings,
19
+ estimateEmbeddingCost,
20
+ semanticSearchWithStats,
21
+ } from '../../embeddings/semantic-search.js'
22
+ import type { SearchQuality } from '../../embeddings/types.js'
23
+ import { createStorage, loadSectionIndex } from '../../index/storage.js'
24
+ import { INDEX_DIR } from '../../index/types.js'
25
+ import { initializeReranker } from '../../search/cross-encoder.js'
26
+ import {
27
+ detectSearchModes,
28
+ hybridSearch,
29
+ type SearchMode,
30
+ } from '../../search/hybrid-search.js'
31
+ import { isAdvancedQuery } from '../../search/query-parser.js'
32
+ import { search, searchContent } from '../../search/searcher.js'
33
+ import {
34
+ type APIProviderName,
35
+ buildPrompt,
36
+ type CLIProviderName,
37
+ displaySummarizationError,
38
+ estimateSummaryCost,
39
+ formatResultsForSummary,
40
+ getBestAvailableSummarizer,
41
+ type SummarizableResult,
42
+ } from '../../summarization/index.js'
43
+ import { jsonOption, prettyOption } from '../options.js'
44
+ import {
45
+ createCostEstimateErrorHandler,
46
+ createEmbeddingErrorHandler,
47
+ } from '../shared-error-handling.js'
48
+ import { formatJson, getIndexInfo, isRegexPattern } from '../utils.js'
49
+
50
+ // Auto-index threshold is now configurable via search.autoIndexThreshold
51
+
52
+ /**
53
+ * Check if content contains all the refine terms (case-insensitive).
54
+ */
55
+ const contentMatchesAllTerms = (
56
+ content: string,
57
+ terms: readonly string[],
58
+ ): boolean => {
59
+ const lowerContent = content.toLowerCase()
60
+ return terms.every((term) => lowerContent.includes(term.toLowerCase()))
61
+ }
62
+
63
+ /**
64
+ * Section info for refine filtering.
65
+ */
66
+ interface SectionInfo {
67
+ readonly documentPath: string
68
+ readonly startLine: number
69
+ readonly endLine: number
70
+ }
71
+
72
+ /**
73
+ * Filter search results by refine terms using parallel file loading.
74
+ * Uses a file cache and concurrency limit for performance.
75
+ *
76
+ * @param rootPath - Root path for file loading
77
+ * @param results - Search results to filter
78
+ * @param refineTerms - Terms that must all be present in section content
79
+ * @param limit - Maximum results to return
80
+ * @param getSectionInfo - Function to extract section info from a result
81
+ */
82
+ const filterResultsByRefineTerms = <T>(
83
+ rootPath: string,
84
+ results: readonly T[],
85
+ refineTerms: readonly string[],
86
+ limit: number,
87
+ getSectionInfo: (result: T) => SectionInfo | null,
88
+ ): Effect.Effect<T[], never> =>
89
+ Effect.gen(function* () {
90
+ if (refineTerms.length === 0 || results.length === 0) {
91
+ return results.slice(0, limit) as T[]
92
+ }
93
+
94
+ // Cache for file contents to avoid re-reading files
95
+ const fileCache = new Map<string, string | null>()
96
+
97
+ const getFileContent = (
98
+ documentPath: string,
99
+ ): Effect.Effect<string | null, never> =>
100
+ Effect.gen(function* () {
101
+ if (fileCache.has(documentPath)) {
102
+ return fileCache.get(documentPath)!
103
+ }
104
+ const content = yield* Effect.promise(async () => {
105
+ try {
106
+ const filePath = path.join(rootPath, documentPath)
107
+ return await fs.readFile(filePath, 'utf-8')
108
+ } catch {
109
+ return null
110
+ }
111
+ })
112
+ fileCache.set(documentPath, content)
113
+ return content
114
+ })
115
+
116
+ // Check each result in parallel with concurrency limit
117
+ const checkedResults = yield* Effect.all(
118
+ results.map((result) =>
119
+ Effect.gen(function* () {
120
+ const info = getSectionInfo(result)
121
+ if (!info) return null
122
+
123
+ const fileContent = yield* getFileContent(info.documentPath)
124
+ if (!fileContent) return null
125
+
126
+ const lines = fileContent.split('\n')
127
+ const sectionContent = lines
128
+ .slice(info.startLine - 1, info.endLine)
129
+ .join('\n')
130
+
131
+ if (contentMatchesAllTerms(sectionContent, refineTerms)) {
132
+ return result
133
+ }
134
+ return null
135
+ }),
136
+ ),
137
+ { concurrency: 10 },
138
+ )
139
+
140
+ // Filter nulls and limit results
141
+ return checkedResults.filter((r): r is T => r !== null).slice(0, limit)
142
+ })
143
+
144
+ const promptUser = (message: string): Promise<string> => {
145
+ return new Promise((resolve) => {
146
+ const rl = readline.createInterface({
147
+ input: process.stdin,
148
+ output: process.stdout,
149
+ })
150
+ rl.question(message, (answer) => {
151
+ rl.close()
152
+ resolve(answer.trim().toLowerCase())
153
+ })
154
+ })
155
+ }
156
+
157
+ export const searchCommand = Command.make(
158
+ 'search',
159
+ {
160
+ query: Args.text({ name: 'query' }).pipe(
161
+ Args.withDescription('Search query (natural language or regex pattern)'),
162
+ ),
163
+ path: Args.directory({ name: 'path' }).pipe(
164
+ Args.withDescription('Directory to search in'),
165
+ Args.withDefault('.'),
166
+ ),
167
+ keyword: Options.boolean('keyword').pipe(
168
+ Options.withAlias('k'),
169
+ Options.withDescription('Force keyword search (content text match)'),
170
+ Options.withDefault(false),
171
+ ),
172
+ headingOnly: Options.boolean('heading-only').pipe(
173
+ Options.withAlias('H'),
174
+ Options.withDescription('Search headings only (not content)'),
175
+ Options.withDefault(false),
176
+ ),
177
+ mode: Options.choice('mode', ['hybrid', 'semantic', 'keyword']).pipe(
178
+ Options.withAlias('m'),
179
+ Options.withDescription(
180
+ 'Search mode: hybrid (BM25+semantic), semantic, or keyword',
181
+ ),
182
+ Options.optional,
183
+ ),
184
+ limit: Options.integer('limit').pipe(
185
+ Options.withAlias('n'),
186
+ Options.withDescription('Maximum results'),
187
+ Options.withDefault(10),
188
+ ),
189
+ threshold: Options.float('threshold').pipe(
190
+ Options.withDescription('Similarity threshold for semantic search (0-1)'),
191
+ Options.withDefault(0.35),
192
+ ),
193
+ context: Options.integer('context').pipe(
194
+ Options.withAlias('C'),
195
+ Options.withDescription('Lines of context around matches (like grep -C)'),
196
+ Options.optional,
197
+ ),
198
+ beforeContext: Options.integer('before-context').pipe(
199
+ Options.withAlias('B'),
200
+ Options.withDescription('Lines of context before matches (like grep -B)'),
201
+ Options.optional,
202
+ ),
203
+ afterContext: Options.integer('after-context').pipe(
204
+ Options.withAlias('A'),
205
+ Options.withDescription('Lines of context after matches (like grep -A)'),
206
+ Options.optional,
207
+ ),
208
+ autoIndexThreshold: Options.integer('auto-index-threshold').pipe(
209
+ Options.withDescription(
210
+ 'Auto-create semantic index if estimated time is under this threshold (seconds)',
211
+ ),
212
+ Options.optional,
213
+ ),
214
+ provider: Options.choice('provider', [
215
+ 'openai',
216
+ 'ollama',
217
+ 'lm-studio',
218
+ 'openrouter',
219
+ 'voyage',
220
+ ]).pipe(
221
+ Options.withDescription(
222
+ 'Embedding provider for semantic search: openai, ollama, lm-studio, openrouter, or voyage',
223
+ ),
224
+ Options.optional,
225
+ ),
226
+ rerank: Options.boolean('rerank').pipe(
227
+ Options.withAlias('r'),
228
+ Options.withDescription(
229
+ 'Re-rank results using cross-encoder for improved precision. Downloads ~90MB model on first use. Requires @huggingface/transformers.',
230
+ ),
231
+ Options.withDefault(false),
232
+ ),
233
+ quality: Options.choice('quality', ['fast', 'balanced', 'thorough']).pipe(
234
+ Options.withAlias('q'),
235
+ Options.withDescription(
236
+ 'Search quality mode: fast (quicker, lower recall), balanced (default), thorough (slower, better recall)',
237
+ ),
238
+ Options.optional,
239
+ ),
240
+ hyde: Options.boolean('hyde').pipe(
241
+ Options.withDescription(
242
+ 'Use HyDE (Hypothetical Document Embeddings) for complex queries. Generates a hypothetical answer with LLM, then searches using that embedding. Improves recall 10-30% on complex/ambiguous queries at cost of ~1-2s latency and LLM API usage.',
243
+ ),
244
+ Options.withDefault(false),
245
+ ),
246
+ rerankInit: Options.boolean('rerank-init').pipe(
247
+ Options.withDescription(
248
+ 'Pre-download the cross-encoder model (~90MB) for re-ranking. Use this before first search to avoid latency.',
249
+ ),
250
+ Options.withDefault(false),
251
+ ),
252
+ timeout: Options.integer('timeout').pipe(
253
+ Options.withDescription(
254
+ 'Request timeout in milliseconds for embedding API calls (default: 30000)',
255
+ ),
256
+ Options.optional,
257
+ ),
258
+ json: jsonOption,
259
+ pretty: prettyOption,
260
+ summarize: Options.boolean('summarize').pipe(
261
+ Options.withAlias('s'),
262
+ Options.withDescription('Generate AI summary of search results'),
263
+ Options.withDefault(false),
264
+ ),
265
+ yes: Options.boolean('yes').pipe(
266
+ Options.withAlias('y'),
267
+ Options.withDescription('Skip cost confirmation for paid AI providers'),
268
+ Options.withDefault(false),
269
+ ),
270
+ stream: Options.boolean('stream').pipe(
271
+ Options.withDescription('Stream AI summary output in real-time'),
272
+ Options.withDefault(false),
273
+ ),
274
+ fuzzy: Options.boolean('fuzzy').pipe(
275
+ Options.withAlias('f'),
276
+ Options.withDescription(
277
+ 'Enable fuzzy matching for typo tolerance (e.g., "configration" matches "configuration")',
278
+ ),
279
+ Options.withDefault(false),
280
+ ),
281
+ stem: Options.boolean('stem').pipe(
282
+ Options.withDescription(
283
+ 'Enable word stemming (e.g., "fail" matches "failure", "failed", "failing")',
284
+ ),
285
+ Options.withDefault(false),
286
+ ),
287
+ fuzzyDistance: Options.integer('fuzzy-distance').pipe(
288
+ Options.withDescription(
289
+ 'Max edit distance for fuzzy matching (default: 2)',
290
+ ),
291
+ Options.optional,
292
+ ),
293
+ refine: Options.text('refine').pipe(
294
+ Options.withDescription(
295
+ 'Additional filter terms to narrow results (can be used multiple times)',
296
+ ),
297
+ Options.repeated,
298
+ ),
299
+ },
300
+ ({
301
+ query,
302
+ path: dirPath,
303
+ keyword,
304
+ headingOnly,
305
+ mode,
306
+ limit,
307
+ threshold,
308
+ context,
309
+ beforeContext,
310
+ afterContext,
311
+ autoIndexThreshold,
312
+ provider,
313
+ rerank,
314
+ quality,
315
+ hyde,
316
+ rerankInit,
317
+ timeout,
318
+ json,
319
+ pretty,
320
+ summarize,
321
+ yes,
322
+ stream,
323
+ fuzzy,
324
+ stem,
325
+ fuzzyDistance,
326
+ refine,
327
+ }) =>
328
+ Effect.gen(function* () {
329
+ const resolvedDir = path.resolve(dirPath)
330
+
331
+ // Handle --rerank-init: pre-download model and exit
332
+ if (rerankInit) {
333
+ yield* Console.log(
334
+ 'Initializing cross-encoder model (~90MB download)...',
335
+ )
336
+
337
+ const cacheDir = path.join(resolvedDir, INDEX_DIR, 'models')
338
+
339
+ const result = yield* initializeReranker(cacheDir, (progress) => {
340
+ if (progress.status === 'loading' && progress.file) {
341
+ const pct = progress.progress
342
+ ? ` (${Math.round(progress.progress)}%)`
343
+ : ''
344
+ process.stdout.write(`\r Downloading: ${progress.file}${pct}`)
345
+ }
346
+ }).pipe(
347
+ Effect.map(() => true),
348
+ Effect.catchTag('RerankerError', (e) => {
349
+ if (e.reason === 'DependencyMissing') {
350
+ return Effect.succeed(false)
351
+ }
352
+ return Effect.fail(e)
353
+ }),
354
+ )
355
+
356
+ if (!result) {
357
+ yield* Console.log('')
358
+ yield* Console.log('Error: @huggingface/transformers not installed.')
359
+ yield* Console.log(
360
+ 'Install with: npm install @huggingface/transformers',
361
+ )
362
+ return
363
+ }
364
+
365
+ yield* Console.log('')
366
+ yield* Console.log('Cross-encoder model initialized successfully.')
367
+ yield* Console.log('Use --rerank on searches for improved precision.')
368
+ return
369
+ }
370
+
371
+ // Get configuration (with fallback to defaults if not available)
372
+ const config = yield* Effect.serviceOption(ConfigService).pipe(
373
+ Effect.map(Option.getOrElse(() => defaultConfig)),
374
+ )
375
+ const searchConfig = config.search
376
+
377
+ // Apply config-based defaults when CLI options use their static defaults
378
+ // Note: CLI options have static defaults for help text; config overrides those defaults
379
+ const effectiveLimit = limit === 10 ? searchConfig.defaultLimit : limit
380
+ const effectiveThreshold =
381
+ threshold === 0.35 ? searchConfig.minSimilarity : threshold
382
+ const effectiveAutoIndexThreshold = Option.getOrElse(
383
+ autoIndexThreshold,
384
+ () => searchConfig.autoIndexThreshold,
385
+ )
386
+
387
+ // Get index info for display
388
+ const indexInfo = yield* Effect.promise(() => getIndexInfo(resolvedDir))
389
+
390
+ // Check if no index exists
391
+ if (!indexInfo.exists && !json) {
392
+ yield* Console.log('No index found.')
393
+ yield* Console.log('')
394
+ yield* Console.log('Run: mdcontext index /path/to/docs')
395
+ yield* Console.log(' Add --embed for semantic search capabilities')
396
+ return
397
+ }
398
+
399
+ // Determine the actual index root (may be a parent directory)
400
+ const indexRoot = indexInfo.indexRoot ?? resolvedDir
401
+
402
+ // Calculate path filter for scoped search
403
+ // If searching a subdirectory, filter results to that path
404
+ let scopedPathPattern: string | undefined
405
+ if (indexInfo.indexRoot && indexInfo.indexRoot !== resolvedDir) {
406
+ // Get relative path from index root to search dir
407
+ const relativePath = path.relative(indexRoot, resolvedDir)
408
+ // Create pattern to match files in this directory and subdirectories
409
+ scopedPathPattern = `${relativePath}/*`
410
+ if (!json) {
411
+ yield* Console.log(`Searching within: ${relativePath}/`)
412
+ yield* Console.log('')
413
+ }
414
+ }
415
+
416
+ // Check available search modes
417
+ const searchModes = yield* detectSearchModes(indexRoot)
418
+ let embedsExist = searchModes.hasEmbeddings
419
+
420
+ // Determine search mode
421
+ // Priority: --mode flag > --keyword flag > advanced query > auto-detect
422
+ let effectiveMode: SearchMode
423
+ let modeReason: string
424
+
425
+ const modeValue = Option.getOrUndefined(mode)
426
+
427
+ if (modeValue === 'hybrid') {
428
+ effectiveMode = 'hybrid'
429
+ modeReason = '--mode hybrid'
430
+ } else if (modeValue === 'semantic') {
431
+ if (!embedsExist) {
432
+ embedsExist = yield* handleMissingEmbeddings(
433
+ indexRoot,
434
+ effectiveAutoIndexThreshold,
435
+ json,
436
+ )
437
+ if (!embedsExist) {
438
+ return
439
+ }
440
+ }
441
+ effectiveMode = 'semantic'
442
+ modeReason = '--mode semantic'
443
+ } else if (modeValue === 'keyword') {
444
+ effectiveMode = 'keyword'
445
+ modeReason = '--mode keyword'
446
+ } else if (keyword) {
447
+ effectiveMode = 'keyword'
448
+ modeReason = '--keyword flag'
449
+ } else if (isAdvancedQuery(query)) {
450
+ effectiveMode = 'keyword'
451
+ modeReason = 'boolean/phrase pattern detected'
452
+ } else if (isRegexPattern(query)) {
453
+ effectiveMode = 'keyword'
454
+ modeReason = 'regex pattern detected'
455
+ } else {
456
+ // Auto-detect best mode based on available indexes
457
+ effectiveMode = searchModes.recommendedMode
458
+ if (effectiveMode === 'hybrid') {
459
+ modeReason = 'both indexes available'
460
+ } else if (effectiveMode === 'semantic') {
461
+ modeReason = 'embeddings available'
462
+ } else {
463
+ modeReason = 'no embeddings'
464
+ }
465
+ }
466
+
467
+ const modeIndicator = `[${effectiveMode}]`
468
+
469
+ // Show index info (non-JSON mode)
470
+ if (!json && indexInfo.lastUpdated) {
471
+ const lastUpdatedDate = new Date(indexInfo.lastUpdated)
472
+ const dateStr = lastUpdatedDate.toLocaleDateString('en-CA')
473
+ const timeStr = lastUpdatedDate.toLocaleTimeString('en-US', {
474
+ hour: '2-digit',
475
+ minute: '2-digit',
476
+ hour12: false,
477
+ })
478
+ yield* Console.log(`Using index from ${dateStr} ${timeStr}`)
479
+ yield* Console.log(` Sections: ${indexInfo.sectionCount ?? 0}`)
480
+ if (indexInfo.embeddingsExist) {
481
+ yield* Console.log(
482
+ ` Embeddings: yes (${indexInfo.vectorCount ?? 0} vectors)`,
483
+ )
484
+ } else {
485
+ yield* Console.log(' Embeddings: no')
486
+ }
487
+ yield* Console.log('')
488
+ }
489
+
490
+ // Calculate context lines
491
+ // -C sets both before and after; -B and -A override individual sides
492
+ const contextValue = Option.getOrUndefined(context)
493
+ const beforeValue = Option.getOrUndefined(beforeContext)
494
+ const afterValue = Option.getOrUndefined(afterContext)
495
+
496
+ const contextBefore = beforeValue ?? contextValue
497
+ const contextAfter = afterValue ?? contextValue
498
+
499
+ if (effectiveMode === 'hybrid') {
500
+ // Hybrid search - combines BM25 and semantic with RRF
501
+ const effectiveQuality = Option.getOrUndefined(quality) as
502
+ | SearchQuality
503
+ | undefined
504
+ // Get more results if refinement is needed (we'll filter down later)
505
+ const refineTerms = refine.length > 0 ? refine : []
506
+ const fetchLimit =
507
+ refineTerms.length > 0 ? effectiveLimit * 5 : effectiveLimit
508
+
509
+ const { results: rawResults, stats } = yield* hybridSearch(
510
+ indexRoot,
511
+ query,
512
+ {
513
+ limit: fetchLimit,
514
+ threshold: effectiveThreshold,
515
+ mode: 'hybrid',
516
+ rerank,
517
+ quality: effectiveQuality,
518
+ contextBefore,
519
+ contextAfter,
520
+ ...(scopedPathPattern && { pathPattern: scopedPathPattern }),
521
+ },
522
+ )
523
+
524
+ // Apply refine filtering if terms provided (parallel with caching)
525
+ let results = rawResults
526
+ if (refineTerms.length > 0) {
527
+ const storage = createStorage(indexRoot)
528
+ const sectionIndex = yield* loadSectionIndex(storage)
529
+
530
+ if (sectionIndex) {
531
+ results = yield* filterResultsByRefineTerms(
532
+ indexRoot,
533
+ rawResults,
534
+ refineTerms,
535
+ effectiveLimit,
536
+ (result) => {
537
+ const section = sectionIndex.sections[result.sectionId]
538
+ return section
539
+ ? {
540
+ documentPath: result.documentPath,
541
+ startLine: section.startLine,
542
+ endLine: section.endLine,
543
+ }
544
+ : null
545
+ },
546
+ )
547
+ }
548
+ }
549
+
550
+ // Warn if reranking was requested but not applied
551
+ if (rerank && !stats.reranked && !json) {
552
+ yield* Console.log(
553
+ 'Note: --rerank requested but @huggingface/transformers not installed',
554
+ )
555
+ yield* Console.log(
556
+ ' Install with: npm install @huggingface/transformers',
557
+ )
558
+ yield* Console.log('')
559
+ }
560
+
561
+ if (json) {
562
+ const moreAvailable =
563
+ stats.totalAvailable !== undefined &&
564
+ stats.totalAvailable > results.length
565
+ ? stats.totalAvailable - results.length
566
+ : undefined
567
+ const output = {
568
+ mode: 'hybrid',
569
+ modeReason,
570
+ query,
571
+ stats,
572
+ moreAvailable,
573
+ results: results.map((r) => ({
574
+ path: r.documentPath,
575
+ heading: r.heading,
576
+ score: r.score,
577
+ similarity: r.similarity,
578
+ bm25Score: r.bm25Score,
579
+ sources: r.sources,
580
+ ...(r.contextLines && { contextLines: r.contextLines }),
581
+ })),
582
+ }
583
+ yield* Console.log(formatJson(output, pretty))
584
+ } else {
585
+ const showReason = !modeReason.startsWith('--mode')
586
+ const modeStr = showReason
587
+ ? `${modeIndicator} (${modeReason})`
588
+ : modeIndicator
589
+ yield* Console.log(`${modeStr} Searching: "${query}"`)
590
+
591
+ // Show results count with "more available" indicator if results were limited
592
+ const moreAvailable =
593
+ stats.totalAvailable !== undefined &&
594
+ stats.totalAvailable > results.length
595
+ ? stats.totalAvailable - results.length
596
+ : 0
597
+ if (moreAvailable > 0) {
598
+ yield* Console.log(
599
+ `Results: ${results.length} (${moreAvailable} more available, use --limit to see more)`,
600
+ )
601
+ } else {
602
+ yield* Console.log(`Results: ${results.length}`)
603
+ }
604
+ yield* Console.log('')
605
+
606
+ for (const result of results) {
607
+ const sources = result.sources.join('+')
608
+ const score = (result.score * 100).toFixed(1)
609
+ yield* Console.log(` ${result.documentPath}`)
610
+ yield* Console.log(
611
+ ` ${result.heading} (${score} RRF, ${sources})`,
612
+ )
613
+
614
+ if (result.contextLines && result.contextLines.length > 0) {
615
+ yield* Console.log('')
616
+ for (const ctxLine of result.contextLines) {
617
+ const marker = ctxLine.isMatch ? '>' : ' '
618
+ yield* Console.log(
619
+ ` ${marker} ${ctxLine.lineNumber}: ${ctxLine.line}`,
620
+ )
621
+ }
622
+ }
623
+
624
+ yield* Console.log('')
625
+ }
626
+ }
627
+
628
+ // Summarization for hybrid search
629
+ if (summarize && results.length > 0) {
630
+ const summarizableResults: SummarizableResult[] = results.map(
631
+ (r) => ({
632
+ documentPath: r.documentPath,
633
+ heading: r.heading,
634
+ score: r.score,
635
+ ...(r.similarity !== undefined && { similarity: r.similarity }),
636
+ }),
637
+ )
638
+ yield* runSummarization({
639
+ results: summarizableResults,
640
+ query,
641
+ searchMode: 'hybrid',
642
+ json,
643
+ yes,
644
+ stream,
645
+ config: {
646
+ mode: config.aiSummarization.mode,
647
+ provider: config.aiSummarization.provider,
648
+ },
649
+ })
650
+ }
651
+ } else if (effectiveMode === 'keyword') {
652
+ // Keyword search - content by default, heading-only if flag set
653
+ const effectiveFuzzyDistance = Option.getOrUndefined(fuzzyDistance)
654
+ const refineTerms = refine.length > 0 ? refine : []
655
+ const fetchLimit =
656
+ refineTerms.length > 0 ? effectiveLimit * 5 : effectiveLimit
657
+
658
+ let results = headingOnly
659
+ ? yield* search(indexRoot, {
660
+ heading: query,
661
+ limit: fetchLimit,
662
+ ...(scopedPathPattern && { pathPattern: scopedPathPattern }),
663
+ })
664
+ : yield* searchContent(indexRoot, {
665
+ content: query,
666
+ limit: fetchLimit,
667
+ contextBefore,
668
+ contextAfter,
669
+ fuzzy,
670
+ stem,
671
+ ...(effectiveFuzzyDistance !== undefined && {
672
+ fuzzyDistance: effectiveFuzzyDistance,
673
+ }),
674
+ ...(scopedPathPattern && { pathPattern: scopedPathPattern }),
675
+ })
676
+
677
+ // Apply refine filtering if terms provided (parallel with caching)
678
+ if (refineTerms.length > 0) {
679
+ results = yield* filterResultsByRefineTerms(
680
+ indexRoot,
681
+ results,
682
+ refineTerms,
683
+ effectiveLimit,
684
+ (result) => ({
685
+ documentPath: result.section.documentPath,
686
+ startLine: result.section.startLine,
687
+ endLine: result.section.endLine,
688
+ }),
689
+ )
690
+ }
691
+
692
+ if (json) {
693
+ const output = {
694
+ mode: 'keyword',
695
+ modeReason,
696
+ query,
697
+ contextBefore,
698
+ contextAfter,
699
+ fuzzy,
700
+ stem,
701
+ ...(effectiveFuzzyDistance !== undefined && {
702
+ fuzzyDistance: effectiveFuzzyDistance,
703
+ }),
704
+ results: results.map((r) => ({
705
+ path: r.section.documentPath,
706
+ heading: r.section.heading,
707
+ level: r.section.level,
708
+ tokens: r.section.tokenCount,
709
+ line: r.section.startLine,
710
+ matches: r.matches?.map((m) => ({
711
+ lineNumber: m.lineNumber,
712
+ line: m.line,
713
+ contextLines: m.contextLines,
714
+ })),
715
+ })),
716
+ }
717
+ yield* Console.log(formatJson(output, pretty))
718
+ } else {
719
+ const searchType = headingOnly ? 'Heading' : 'Content'
720
+ const showReason =
721
+ modeReason !== '--mode keyword' && modeReason !== '--keyword flag'
722
+ const modeStr = showReason
723
+ ? `${modeIndicator} (${modeReason})`
724
+ : modeIndicator
725
+ // Build fuzzy/stem indicator
726
+ const fuzzyIndicators: string[] = []
727
+ if (fuzzy) fuzzyIndicators.push('fuzzy')
728
+ if (stem) fuzzyIndicators.push('stem')
729
+ const fuzzyStr =
730
+ fuzzyIndicators.length > 0 ? ` [${fuzzyIndicators.join('+')}]` : ''
731
+ yield* Console.log(
732
+ `${modeStr}${fuzzyStr} ${searchType} search: "${query}"`,
733
+ )
734
+ yield* Console.log(`Results: ${results.length}`)
735
+ yield* Console.log('')
736
+
737
+ for (const result of results) {
738
+ const levelMarker = '#'.repeat(result.section.level)
739
+ yield* Console.log(
740
+ ` ${result.section.documentPath}:${result.section.startLine}`,
741
+ )
742
+ yield* Console.log(
743
+ ` ${levelMarker} ${result.section.heading} (${result.section.tokenCount} tokens)`,
744
+ )
745
+
746
+ if (result.matches && result.matches.length > 0) {
747
+ yield* Console.log('')
748
+ for (const match of result.matches.slice(0, 3)) {
749
+ if (match.contextLines && match.contextLines.length > 0) {
750
+ for (const ctxLine of match.contextLines) {
751
+ const marker = ctxLine.isMatch ? '>' : ' '
752
+ yield* Console.log(
753
+ ` ${marker} ${ctxLine.lineNumber}: ${ctxLine.line}`,
754
+ )
755
+ }
756
+ } else {
757
+ yield* Console.log(` Line ${match.lineNumber}:`)
758
+ const snippetLines = match.snippet.split('\n')
759
+ for (const line of snippetLines) {
760
+ yield* Console.log(` ${line}`)
761
+ }
762
+ }
763
+ yield* Console.log('')
764
+ }
765
+ if (result.matches.length > 3) {
766
+ yield* Console.log(
767
+ ` ... and ${result.matches.length - 3} more matches`,
768
+ )
769
+ }
770
+ }
771
+ yield* Console.log('')
772
+ }
773
+
774
+ if (!indexInfo.embeddingsExist) {
775
+ yield* Console.log(
776
+ "Tip: Run 'mdcontext index --embed' to enable semantic search",
777
+ )
778
+ }
779
+ }
780
+
781
+ // Summarization for keyword search
782
+ if (summarize && results.length > 0) {
783
+ const summarizableResults: SummarizableResult[] = results.map(
784
+ (r) => ({
785
+ documentPath: r.section.documentPath,
786
+ heading: r.section.heading,
787
+ }),
788
+ )
789
+ yield* runSummarization({
790
+ results: summarizableResults,
791
+ query,
792
+ searchMode: 'keyword',
793
+ json,
794
+ yes,
795
+ stream,
796
+ config: {
797
+ mode: config.aiSummarization.mode,
798
+ provider: config.aiSummarization.provider,
799
+ },
800
+ })
801
+ }
802
+ } else {
803
+ // Build provider config from CLI flag if specified
804
+ const cliTimeout = Option.getOrUndefined(timeout)
805
+ const providerConfig = Option.isSome(provider)
806
+ ? {
807
+ provider: provider.value as
808
+ | 'openai'
809
+ | 'ollama'
810
+ | 'lm-studio'
811
+ | 'openrouter'
812
+ | 'voyage',
813
+ timeout: cliTimeout,
814
+ }
815
+ : cliTimeout !== undefined
816
+ ? { provider: 'openai' as const, timeout: cliTimeout }
817
+ : undefined
818
+
819
+ // Semantic search with stats for below-threshold feedback
820
+ const refineTerms = refine.length > 0 ? refine : []
821
+ const fetchLimit =
822
+ refineTerms.length > 0 ? effectiveLimit * 5 : effectiveLimit
823
+
824
+ const semanticQuality = Option.getOrUndefined(quality) as
825
+ | SearchQuality
826
+ | undefined
827
+ const searchResult = yield* semanticSearchWithStats(indexRoot, query, {
828
+ limit: fetchLimit,
829
+ threshold: effectiveThreshold,
830
+ providerConfig,
831
+ quality: semanticQuality,
832
+ hyde,
833
+ contextBefore,
834
+ contextAfter,
835
+ ...(scopedPathPattern && { pathPattern: scopedPathPattern }),
836
+ })
837
+ let {
838
+ results,
839
+ belowThresholdCount,
840
+ belowThresholdHighest,
841
+ totalAvailable,
842
+ } = searchResult
843
+
844
+ // Apply refine filtering if terms provided (parallel with caching)
845
+ if (refineTerms.length > 0) {
846
+ const storage = createStorage(indexRoot)
847
+ const sectionIndex = yield* loadSectionIndex(storage)
848
+
849
+ if (sectionIndex) {
850
+ results = yield* filterResultsByRefineTerms(
851
+ indexRoot,
852
+ results,
853
+ refineTerms,
854
+ effectiveLimit,
855
+ (result) => {
856
+ const section = sectionIndex.sections[result.sectionId]
857
+ return section
858
+ ? {
859
+ documentPath: result.documentPath,
860
+ startLine: section.startLine,
861
+ endLine: section.endLine,
862
+ }
863
+ : null
864
+ },
865
+ )
866
+ }
867
+ }
868
+
869
+ if (json) {
870
+ const moreAvailableSemantic =
871
+ totalAvailable !== undefined && totalAvailable > results.length
872
+ ? totalAvailable - results.length
873
+ : undefined
874
+ const output = {
875
+ mode: 'semantic',
876
+ modeReason,
877
+ query,
878
+ hyde,
879
+ results,
880
+ belowThresholdCount,
881
+ belowThresholdHighest,
882
+ moreAvailable: moreAvailableSemantic,
883
+ }
884
+ yield* Console.log(formatJson(output, pretty))
885
+ } else {
886
+ const showSemanticReason = modeReason !== '--mode semantic'
887
+ const semanticModeStr = showSemanticReason
888
+ ? `${modeIndicator} (${modeReason})`
889
+ : modeIndicator
890
+ const hydeIndicator = hyde ? ' [HyDE]' : ''
891
+ yield* Console.log(
892
+ `${semanticModeStr}${hydeIndicator} Semantic search: "${query}"`,
893
+ )
894
+
895
+ // Show results count with "more available" indicator if results were limited
896
+ const moreAvailableSemantic =
897
+ totalAvailable !== undefined && totalAvailable > results.length
898
+ ? totalAvailable - results.length
899
+ : 0
900
+ if (moreAvailableSemantic > 0) {
901
+ yield* Console.log(
902
+ `Results: ${results.length} (${moreAvailableSemantic} more available, use --limit to see more)`,
903
+ )
904
+ } else {
905
+ yield* Console.log(`Results: ${results.length}`)
906
+ }
907
+ yield* Console.log('')
908
+
909
+ for (const result of results) {
910
+ const similarity = (result.similarity * 100).toFixed(1)
911
+ yield* Console.log(` ${result.documentPath}`)
912
+ yield* Console.log(` ${result.heading} (${similarity}% match)`)
913
+
914
+ if (result.contextLines && result.contextLines.length > 0) {
915
+ yield* Console.log('')
916
+ for (const ctxLine of result.contextLines) {
917
+ const marker = ctxLine.isMatch ? '>' : ' '
918
+ yield* Console.log(
919
+ ` ${marker} ${ctxLine.lineNumber}: ${ctxLine.line}`,
920
+ )
921
+ }
922
+ }
923
+
924
+ yield* Console.log('')
925
+ }
926
+
927
+ // Show below-threshold feedback when 0 results but content exists
928
+ if (
929
+ results.length === 0 &&
930
+ belowThresholdCount !== undefined &&
931
+ belowThresholdCount > 0 &&
932
+ belowThresholdHighest !== undefined
933
+ ) {
934
+ const highestPct = (belowThresholdHighest * 100).toFixed(1)
935
+ const suggestedThreshold = Math.max(
936
+ 0.1,
937
+ belowThresholdHighest - 0.05,
938
+ ).toFixed(2)
939
+ yield* Console.log(
940
+ `Note: ${belowThresholdCount} results found below ${(effectiveThreshold * 100).toFixed(0)}% threshold (highest: ${highestPct}%)`,
941
+ )
942
+ yield* Console.log(
943
+ `Tip: Use --threshold ${suggestedThreshold} to see more results`,
944
+ )
945
+ yield* Console.log('')
946
+ }
947
+
948
+ yield* Console.log('Tip: Use --mode keyword for exact text matching')
949
+ }
950
+
951
+ // Summarization for semantic search
952
+ if (summarize && results.length > 0) {
953
+ const summarizableResults: SummarizableResult[] = results.map(
954
+ (r) => ({
955
+ documentPath: r.documentPath,
956
+ heading: r.heading,
957
+ similarity: r.similarity,
958
+ }),
959
+ )
960
+ yield* runSummarization({
961
+ results: summarizableResults,
962
+ query,
963
+ searchMode: 'semantic',
964
+ json,
965
+ yes,
966
+ stream,
967
+ config: {
968
+ mode: config.aiSummarization.mode,
969
+ provider: config.aiSummarization.provider,
970
+ },
971
+ })
972
+ }
973
+ }
974
+ }),
975
+ ).pipe(Command.withDescription('Search by meaning or structure'))
976
+
977
+ /**
978
+ * Options for running AI summarization
979
+ */
980
+ interface SummarizationOptions {
981
+ readonly results: readonly SummarizableResult[]
982
+ readonly query: string
983
+ readonly searchMode: 'hybrid' | 'semantic' | 'keyword'
984
+ readonly json: boolean
985
+ readonly yes: boolean
986
+ readonly stream: boolean
987
+ readonly config: {
988
+ readonly mode: 'cli' | 'api'
989
+ readonly provider: CLIProviderName | APIProviderName
990
+ }
991
+ }
992
+
993
+ /**
994
+ * Run AI summarization on search results.
995
+ * Handles cost estimation, user consent, and output formatting.
996
+ *
997
+ * GRACEFUL DEGRADATION: This function never fails - on error, it displays
998
+ * an error message and returns, allowing search results to still be shown.
999
+ */
1000
+ const runSummarization = (
1001
+ options: SummarizationOptions,
1002
+ ): Effect.Effect<void, never> =>
1003
+ runSummarizationUnsafe(options).pipe(
1004
+ Effect.catchAll((error) =>
1005
+ Effect.sync(() => {
1006
+ if (!options.json) {
1007
+ displaySummarizationError(error)
1008
+ }
1009
+ }),
1010
+ ),
1011
+ )
1012
+
1013
+ /**
1014
+ * Internal implementation that may fail.
1015
+ * Wrapped by runSummarization for graceful error handling.
1016
+ */
1017
+ const runSummarizationUnsafe = (
1018
+ options: SummarizationOptions,
1019
+ ): Effect.Effect<void, Error> =>
1020
+ Effect.gen(function* () {
1021
+ const { results, query, searchMode, json, yes, stream, config } = options
1022
+
1023
+ if (results.length === 0) {
1024
+ if (!json) {
1025
+ yield* Console.log('No results to summarize.')
1026
+ }
1027
+ return
1028
+ }
1029
+
1030
+ // Get summarizer
1031
+ const summarizerData = yield* Effect.tryPromise({
1032
+ try: async () => {
1033
+ const result = await getBestAvailableSummarizer({
1034
+ mode: config.mode,
1035
+ provider: config.provider,
1036
+ })
1037
+ if (!result) {
1038
+ throw new Error('No summarization providers available')
1039
+ }
1040
+ return result
1041
+ },
1042
+ catch: (e) => new Error(`Failed to get summarizer: ${e}`),
1043
+ })
1044
+
1045
+ const { summarizer, config: resolvedConfig } = summarizerData
1046
+
1047
+ // Format results for summary input
1048
+ const resultsText = formatResultsForSummary(results)
1049
+
1050
+ // Estimate cost
1051
+ const costEstimate = estimateSummaryCost(
1052
+ resultsText,
1053
+ resolvedConfig.mode,
1054
+ resolvedConfig.provider,
1055
+ )
1056
+
1057
+ // Display cost info
1058
+ if (!json) {
1059
+ if (costEstimate.isPaid) {
1060
+ yield* Console.log('')
1061
+ yield* Console.log('Cost Estimate:')
1062
+ yield* Console.log(` Provider: ${costEstimate.provider}`)
1063
+ yield* Console.log(
1064
+ ` Input tokens: ~${costEstimate.inputTokens.toLocaleString()}`,
1065
+ )
1066
+ yield* Console.log(
1067
+ ` Output tokens: ~${costEstimate.outputTokens.toLocaleString()}`,
1068
+ )
1069
+ yield* Console.log(` Estimated cost: ${costEstimate.formattedCost}`)
1070
+
1071
+ // Get user consent if needed
1072
+ if (!yes) {
1073
+ const answer = yield* Effect.promise(() =>
1074
+ promptUser('Continue with summarization? [Y/n]: '),
1075
+ )
1076
+ if (answer === 'n' || answer === 'no') {
1077
+ yield* Console.log('Summarization cancelled.')
1078
+ return
1079
+ }
1080
+ }
1081
+ } else {
1082
+ yield* Console.log('')
1083
+ yield* Console.log(
1084
+ `Using ${resolvedConfig.provider} (subscription - FREE)`,
1085
+ )
1086
+ }
1087
+ }
1088
+
1089
+ // Build prompt
1090
+ const prompt = buildPrompt({
1091
+ query,
1092
+ resultCount: results.length,
1093
+ searchMode,
1094
+ })
1095
+
1096
+ // Generate summary
1097
+ if (!json) {
1098
+ yield* Console.log('')
1099
+ yield* Console.log('--- AI Summary ---')
1100
+ yield* Console.log('')
1101
+ }
1102
+
1103
+ const startTime = Date.now()
1104
+
1105
+ if (stream && 'summarizeStream' in summarizer) {
1106
+ // Streaming output
1107
+ yield* Effect.tryPromise({
1108
+ try: () =>
1109
+ (
1110
+ summarizer as {
1111
+ summarizeStream: (
1112
+ input: string,
1113
+ prompt: string,
1114
+ options: { onChunk: (chunk: string) => void },
1115
+ ) => Promise<void>
1116
+ }
1117
+ ).summarizeStream(resultsText, prompt, {
1118
+ onChunk: (chunk) => {
1119
+ process.stdout.write(chunk)
1120
+ },
1121
+ }),
1122
+ catch: (e) => new Error(`Summarization failed: ${e}`),
1123
+ })
1124
+ if (!json) {
1125
+ yield* Console.log('') // Final newline
1126
+ }
1127
+ } else {
1128
+ // Non-streaming output
1129
+ const summaryResult = yield* Effect.tryPromise({
1130
+ try: () => summarizer.summarize(resultsText, prompt),
1131
+ catch: (e) => new Error(`Summarization failed: ${e}`),
1132
+ })
1133
+
1134
+ if (json) {
1135
+ yield* Console.log(
1136
+ JSON.stringify(
1137
+ {
1138
+ summary: summaryResult.summary,
1139
+ provider: summaryResult.provider,
1140
+ mode: summaryResult.mode,
1141
+ durationMs: summaryResult.durationMs,
1142
+ cost: costEstimate.isPaid ? costEstimate.formattedCost : 'FREE',
1143
+ },
1144
+ null,
1145
+ 2,
1146
+ ),
1147
+ )
1148
+ } else {
1149
+ yield* Console.log(summaryResult.summary)
1150
+ }
1151
+ }
1152
+
1153
+ const durationMs = Date.now() - startTime
1154
+ if (!json) {
1155
+ yield* Console.log('')
1156
+ yield* Console.log('------------------')
1157
+ yield* Console.log(
1158
+ `Generated in ${(durationMs / 1000).toFixed(1)}s | ${costEstimate.isPaid ? costEstimate.formattedCost : 'FREE'}`,
1159
+ )
1160
+ }
1161
+ })
1162
+
1163
+ /**
1164
+ * Handle the case when embeddings don't exist.
1165
+ * Returns true if embeddings were created (or already exist), false to fall back to keyword search.
1166
+ */
1167
+ const handleMissingEmbeddings = (
1168
+ resolvedDir: string,
1169
+ autoIndexThreshold: number,
1170
+ json: boolean,
1171
+ ): Effect.Effect<boolean, Error> =>
1172
+ Effect.gen(function* () {
1173
+ // Get cost estimate
1174
+ // Note: We gracefully handle errors since this is an optional auto-index feature.
1175
+ // IndexNotFoundError is expected if index doesn't exist.
1176
+ const estimate = yield* estimateEmbeddingCost(resolvedDir).pipe(
1177
+ Effect.map((r): EmbeddingEstimate | null => r),
1178
+ Effect.catchTags(createCostEstimateErrorHandler()),
1179
+ )
1180
+
1181
+ if (!estimate) {
1182
+ yield* Console.error(
1183
+ 'No semantic index found and could not estimate cost.',
1184
+ )
1185
+ yield* Console.error('Run "mdcontext index --embed" first.')
1186
+ return false
1187
+ }
1188
+
1189
+ // Check if we should auto-index
1190
+ if (estimate.estimatedTimeSeconds <= autoIndexThreshold) {
1191
+ if (!json) {
1192
+ yield* Console.log(
1193
+ `Creating semantic index (~${estimate.estimatedTimeSeconds}s, ~$${estimate.totalCost.toFixed(4)})...`,
1194
+ )
1195
+ }
1196
+
1197
+ // Note: Graceful degradation - embedding errors fall back to keyword search
1198
+ const result = yield* buildEmbeddings(resolvedDir, {
1199
+ force: false,
1200
+ onFileProgress: (progress) => {
1201
+ if (!json) {
1202
+ console.log(
1203
+ ` [${progress.fileIndex}/${progress.totalFiles}] ${progress.filePath}`,
1204
+ )
1205
+ }
1206
+ },
1207
+ }).pipe(
1208
+ Effect.map((r): BuildEmbeddingsResult | null => r),
1209
+ Effect.catchTags(createEmbeddingErrorHandler({ silent: json })),
1210
+ )
1211
+
1212
+ if (!result) {
1213
+ return false
1214
+ }
1215
+
1216
+ if (!json) {
1217
+ yield* Console.log(
1218
+ `Index created (${result.sectionsEmbedded} sections, $${result.cost.toFixed(6)})`,
1219
+ )
1220
+ yield* Console.log('')
1221
+ }
1222
+
1223
+ return true
1224
+ }
1225
+
1226
+ // Prompt user for larger indexes
1227
+ if (!json) {
1228
+ yield* Console.log('')
1229
+ yield* Console.log('No semantic index found.')
1230
+ yield* Console.log('')
1231
+ yield* Console.log('Options:')
1232
+ yield* Console.log(
1233
+ ` 1. Create now (recommended, ~${estimate.estimatedTimeSeconds}s, ~$${estimate.totalCost.toFixed(4)})`,
1234
+ )
1235
+ yield* Console.log(' 2. Use keyword search instead')
1236
+ yield* Console.log('')
1237
+ }
1238
+
1239
+ const answer = yield* Effect.promise(() => promptUser('Choice [1]: '))
1240
+ const choice = answer === '' || answer === '1' ? '1' : answer
1241
+
1242
+ if (choice === '1') {
1243
+ if (!json) {
1244
+ yield* Console.log('')
1245
+ yield* Console.log('Building embeddings...')
1246
+ }
1247
+
1248
+ // Note: Graceful degradation - embedding errors fall back to keyword search
1249
+ const result = yield* buildEmbeddings(resolvedDir, {
1250
+ force: false,
1251
+ onFileProgress: (progress) => {
1252
+ if (!json) {
1253
+ console.log(
1254
+ ` [${progress.fileIndex}/${progress.totalFiles}] ${progress.filePath}`,
1255
+ )
1256
+ }
1257
+ },
1258
+ }).pipe(
1259
+ Effect.map((r): BuildEmbeddingsResult | null => r),
1260
+ Effect.catchTags(createEmbeddingErrorHandler({ silent: json })),
1261
+ )
1262
+
1263
+ if (!result) {
1264
+ return false
1265
+ }
1266
+
1267
+ if (!json) {
1268
+ yield* Console.log(
1269
+ `Index created (${result.sectionsEmbedded} sections, $${result.cost.toFixed(6)})`,
1270
+ )
1271
+ yield* Console.log('')
1272
+ }
1273
+
1274
+ return true
1275
+ }
1276
+
1277
+ // User chose keyword search
1278
+ yield* Console.log('')
1279
+ yield* Console.log('Falling back to keyword search.')
1280
+ return false
1281
+ })