mdcontext 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.claude/settings.local.json +25 -0
  4. package/.github/workflows/ci.yml +83 -0
  5. package/.github/workflows/claude-code-review.yml +44 -0
  6. package/.github/workflows/claude.yml +85 -0
  7. package/.github/workflows/release.yml +113 -0
  8. package/.tldrignore +112 -0
  9. package/BACKLOG.md +338 -0
  10. package/CONTRIBUTING.md +186 -0
  11. package/NOTES/NOTES +44 -0
  12. package/README.md +434 -11
  13. package/biome.json +36 -0
  14. package/cspell.config.yaml +14 -0
  15. package/dist/chunk-23UPXDNL.js +3044 -0
  16. package/dist/chunk-2W7MO2DL.js +1366 -0
  17. package/dist/chunk-3NUAZGMA.js +1689 -0
  18. package/dist/chunk-7TOWB2XB.js +366 -0
  19. package/dist/chunk-7XOTOADQ.js +3065 -0
  20. package/dist/chunk-AH2PDM2K.js +3042 -0
  21. package/dist/chunk-BNXWSZ63.js +3742 -0
  22. package/dist/chunk-BTL5DJVU.js +3222 -0
  23. package/dist/chunk-HDHYG7E4.js +104 -0
  24. package/dist/chunk-HLR4KZBP.js +3234 -0
  25. package/dist/chunk-IP3FRFEB.js +1045 -0
  26. package/dist/chunk-KHU56VDO.js +3042 -0
  27. package/dist/chunk-KRYIFLQR.js +88 -0
  28. package/dist/chunk-LBSDNLEM.js +287 -0
  29. package/dist/chunk-MNTQ7HCP.js +2643 -0
  30. package/dist/chunk-MUJELQQ6.js +1387 -0
  31. package/dist/chunk-MXJGMSLV.js +2199 -0
  32. package/dist/chunk-N6QJGC3Z.js +2636 -0
  33. package/dist/chunk-OBELGBPM.js +1713 -0
  34. package/dist/chunk-OT7R5XTA.js +3192 -0
  35. package/dist/chunk-P7X4RA2T.js +106 -0
  36. package/dist/chunk-PIDUQNC2.js +3185 -0
  37. package/dist/chunk-POGCDIH4.js +3187 -0
  38. package/dist/chunk-PSIEOQGZ.js +3043 -0
  39. package/dist/chunk-PVRT3IHA.js +3238 -0
  40. package/dist/chunk-QNN4TT23.js +1430 -0
  41. package/dist/chunk-RE3R45RJ.js +3042 -0
  42. package/dist/chunk-S7E6TFX6.js +803 -0
  43. package/dist/chunk-SG6GLU4U.js +1378 -0
  44. package/dist/chunk-SJCDV2ST.js +274 -0
  45. package/dist/chunk-SYE5XLF3.js +104 -0
  46. package/dist/chunk-T5VLYBZD.js +103 -0
  47. package/dist/chunk-TOQB7VWU.js +3238 -0
  48. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  49. package/dist/chunk-VVTGZNBT.js +1629 -0
  50. package/dist/chunk-W7Q4RFEV.js +104 -0
  51. package/dist/chunk-XTYYVRLO.js +3190 -0
  52. package/dist/chunk-Y6MDYVJD.js +3063 -0
  53. package/dist/cli/main.d.ts +1 -0
  54. package/dist/cli/main.js +5458 -0
  55. package/dist/index.d.ts +653 -0
  56. package/dist/index.js +79 -0
  57. package/dist/mcp/server.d.ts +1 -0
  58. package/dist/mcp/server.js +472 -0
  59. package/dist/schema-BAWSG7KY.js +22 -0
  60. package/dist/schema-E3QUPL26.js +20 -0
  61. package/dist/schema-EHL7WUT6.js +20 -0
  62. package/docs/019-USAGE.md +625 -0
  63. package/docs/020-current-implementation.md +364 -0
  64. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  65. package/docs/BACKLOG.md +80 -0
  66. package/docs/CONFIG.md +1123 -0
  67. package/docs/DESIGN.md +439 -0
  68. package/docs/ERRORS.md +383 -0
  69. package/docs/PROJECT.md +88 -0
  70. package/docs/ROADMAP.md +407 -0
  71. package/docs/summarization.md +320 -0
  72. package/docs/test-links.md +9 -0
  73. package/justfile +40 -0
  74. package/package.json +74 -9
  75. package/pnpm-workspace.yaml +5 -0
  76. package/research/INDEX.md +315 -0
  77. package/research/code-review/README.md +90 -0
  78. package/research/code-review/cli-error-handling-review.md +979 -0
  79. package/research/code-review/code-review-validation-report.md +464 -0
  80. package/research/code-review/main-ts-review.md +1128 -0
  81. package/research/config-analysis/01-current-implementation.md +470 -0
  82. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  83. package/research/config-analysis/03-task-candidates.md +715 -0
  84. package/research/config-analysis/033-research-configuration-management.md +828 -0
  85. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  86. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  87. package/research/config-docs/SUMMARY.md +357 -0
  88. package/research/config-docs/TEST-RESULTS.md +776 -0
  89. package/research/config-docs/TODO.md +542 -0
  90. package/research/config-docs/analysis.md +744 -0
  91. package/research/config-docs/fix-validation.md +502 -0
  92. package/research/config-docs/help-audit.md +264 -0
  93. package/research/config-docs/help-system-analysis.md +890 -0
  94. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  95. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  96. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  97. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  98. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  99. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  100. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  101. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  102. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  103. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  104. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  105. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  106. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  107. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  108. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  109. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  110. package/research/effect-cli-error-handling.md +845 -0
  111. package/research/effect-errors-as-values.md +943 -0
  112. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  113. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  114. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  115. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  116. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  117. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  118. package/research/issue-review.md +603 -0
  119. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  120. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  121. package/research/llm-summarization/anthropic-2026.md +367 -0
  122. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  123. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  124. package/research/llm-summarization/openai-2026.md +473 -0
  125. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  126. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  127. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  128. package/research/llm-summarization/prototype-results.md +56 -0
  129. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  130. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  131. package/research/mdcontext-error-analysis.md +521 -0
  132. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  133. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  134. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  135. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  136. package/research/mdcontext-pudding/02-search.md +970 -0
  137. package/research/mdcontext-pudding/03-context.md +779 -0
  138. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  139. package/research/mdcontext-pudding/04-tree.md +704 -0
  140. package/research/mdcontext-pudding/05-config.md +1038 -0
  141. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  142. package/research/mdcontext-pudding/06-links.md +679 -0
  143. package/research/mdcontext-pudding/07-stats.md +693 -0
  144. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  145. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  146. package/research/mdcontext-pudding/README.md +168 -0
  147. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  148. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  149. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  150. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  151. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  152. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  153. package/research/research-quality-review.md +834 -0
  154. package/research/semantic-search/002-research-embedding-models.md +490 -0
  155. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  156. package/research/semantic-search/004-research-vector-search.md +841 -0
  157. package/research/semantic-search/032-research-semantic-search.md +427 -0
  158. package/research/semantic-search/embedding-text-analysis.md +156 -0
  159. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  160. package/research/semantic-search/query-processing-analysis.md +207 -0
  161. package/research/semantic-search/root-cause-and-solution.md +114 -0
  162. package/research/semantic-search/threshold-validation-report.md +69 -0
  163. package/research/semantic-search/vector-search-analysis.md +63 -0
  164. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  165. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  166. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  167. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  168. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  169. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  170. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  171. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  172. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  173. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  174. package/research/test-path-issues.md +276 -0
  175. package/review/ALP-76/1-error-type-design.md +962 -0
  176. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  177. package/review/ALP-76/3-error-presentation.md +624 -0
  178. package/review/ALP-76/4-test-coverage.md +625 -0
  179. package/review/ALP-76/5-migration-completeness.md +440 -0
  180. package/review/ALP-76/6-effect-best-practices.md +755 -0
  181. package/scripts/apply-branch-protection.sh +47 -0
  182. package/scripts/branch-protection-templates.json +79 -0
  183. package/scripts/prototype-summarization.ts +346 -0
  184. package/scripts/rebuild-hnswlib.js +58 -0
  185. package/scripts/setup-branch-protection.sh +64 -0
  186. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  187. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  188. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  189. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  190. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  191. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  192. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  193. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  194. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  195. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  196. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  197. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  198. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  199. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  200. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  201. package/src/cli/argv-preprocessor.test.ts +210 -0
  202. package/src/cli/argv-preprocessor.ts +202 -0
  203. package/src/cli/cli.test.ts +627 -0
  204. package/src/cli/commands/backlinks.ts +54 -0
  205. package/src/cli/commands/config-cmd.ts +642 -0
  206. package/src/cli/commands/context.ts +285 -0
  207. package/src/cli/commands/duplicates.ts +122 -0
  208. package/src/cli/commands/embeddings.ts +529 -0
  209. package/src/cli/commands/index-cmd.ts +480 -0
  210. package/src/cli/commands/index.ts +16 -0
  211. package/src/cli/commands/links.ts +52 -0
  212. package/src/cli/commands/search.ts +1281 -0
  213. package/src/cli/commands/stats.ts +149 -0
  214. package/src/cli/commands/tree.ts +128 -0
  215. package/src/cli/config-layer.ts +176 -0
  216. package/src/cli/error-handler.test.ts +235 -0
  217. package/src/cli/error-handler.ts +655 -0
  218. package/src/cli/flag-schemas.ts +341 -0
  219. package/src/cli/help.ts +588 -0
  220. package/src/cli/index.ts +9 -0
  221. package/src/cli/main.ts +435 -0
  222. package/src/cli/options.ts +41 -0
  223. package/src/cli/shared-error-handling.ts +199 -0
  224. package/src/cli/typo-suggester.test.ts +105 -0
  225. package/src/cli/typo-suggester.ts +130 -0
  226. package/src/cli/utils.ts +259 -0
  227. package/src/config/file-provider.test.ts +320 -0
  228. package/src/config/file-provider.ts +273 -0
  229. package/src/config/index.ts +72 -0
  230. package/src/config/integration.test.ts +667 -0
  231. package/src/config/precedence.test.ts +277 -0
  232. package/src/config/precedence.ts +451 -0
  233. package/src/config/schema.test.ts +414 -0
  234. package/src/config/schema.ts +603 -0
  235. package/src/config/service.test.ts +320 -0
  236. package/src/config/service.ts +243 -0
  237. package/src/config/testing.test.ts +264 -0
  238. package/src/config/testing.ts +110 -0
  239. package/src/core/index.ts +1 -0
  240. package/src/core/types.ts +113 -0
  241. package/src/duplicates/detector.test.ts +183 -0
  242. package/src/duplicates/detector.ts +414 -0
  243. package/src/duplicates/index.ts +18 -0
  244. package/src/embeddings/embedding-namespace.test.ts +300 -0
  245. package/src/embeddings/embedding-namespace.ts +947 -0
  246. package/src/embeddings/heading-boost.test.ts +222 -0
  247. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  248. package/src/embeddings/hyde.test.ts +272 -0
  249. package/src/embeddings/hyde.ts +264 -0
  250. package/src/embeddings/index.ts +10 -0
  251. package/src/embeddings/openai-provider.ts +414 -0
  252. package/src/embeddings/pricing.json +22 -0
  253. package/src/embeddings/provider-constants.ts +204 -0
  254. package/src/embeddings/provider-errors.test.ts +967 -0
  255. package/src/embeddings/provider-errors.ts +565 -0
  256. package/src/embeddings/provider-factory.test.ts +240 -0
  257. package/src/embeddings/provider-factory.ts +225 -0
  258. package/src/embeddings/provider-integration.test.ts +788 -0
  259. package/src/embeddings/query-preprocessing.test.ts +187 -0
  260. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  261. package/src/embeddings/semantic-search.ts +1270 -0
  262. package/src/embeddings/types.ts +359 -0
  263. package/src/embeddings/vector-store.ts +708 -0
  264. package/src/embeddings/voyage-provider.ts +313 -0
  265. package/src/errors/errors.test.ts +845 -0
  266. package/src/errors/index.ts +533 -0
  267. package/src/index/ignore-patterns.test.ts +354 -0
  268. package/src/index/ignore-patterns.ts +305 -0
  269. package/src/index/index.ts +4 -0
  270. package/src/index/indexer.ts +684 -0
  271. package/src/index/storage.ts +260 -0
  272. package/src/index/types.ts +147 -0
  273. package/src/index/watcher.ts +189 -0
  274. package/src/index.ts +30 -0
  275. package/src/integration/search-keyword.test.ts +678 -0
  276. package/src/mcp/server.ts +612 -0
  277. package/src/parser/index.ts +1 -0
  278. package/src/parser/parser.test.ts +291 -0
  279. package/src/parser/parser.ts +394 -0
  280. package/src/parser/section-filter.test.ts +277 -0
  281. package/src/parser/section-filter.ts +392 -0
  282. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  283. package/src/search/bm25-store.ts +366 -0
  284. package/src/search/cross-encoder.test.ts +253 -0
  285. package/src/search/cross-encoder.ts +406 -0
  286. package/src/search/fuzzy-search.test.ts +419 -0
  287. package/src/search/fuzzy-search.ts +273 -0
  288. package/src/search/hybrid-search.ts +448 -0
  289. package/src/search/path-matcher.test.ts +276 -0
  290. package/src/search/path-matcher.ts +33 -0
  291. package/src/search/query-parser.test.ts +260 -0
  292. package/src/search/query-parser.ts +319 -0
  293. package/src/search/searcher.test.ts +280 -0
  294. package/src/search/searcher.ts +724 -0
  295. package/src/search/wink-bm25.d.ts +30 -0
  296. package/src/summarization/cli-providers/claude.ts +202 -0
  297. package/src/summarization/cli-providers/detection.test.ts +273 -0
  298. package/src/summarization/cli-providers/detection.ts +118 -0
  299. package/src/summarization/cli-providers/index.ts +8 -0
  300. package/src/summarization/cost.test.ts +139 -0
  301. package/src/summarization/cost.ts +102 -0
  302. package/src/summarization/error-handler.test.ts +127 -0
  303. package/src/summarization/error-handler.ts +111 -0
  304. package/src/summarization/index.ts +102 -0
  305. package/src/summarization/pipeline.test.ts +498 -0
  306. package/src/summarization/pipeline.ts +231 -0
  307. package/src/summarization/prompts.test.ts +269 -0
  308. package/src/summarization/prompts.ts +133 -0
  309. package/src/summarization/provider-factory.test.ts +396 -0
  310. package/src/summarization/provider-factory.ts +178 -0
  311. package/src/summarization/types.ts +184 -0
  312. package/src/summarize/budget-bugs.test.ts +620 -0
  313. package/src/summarize/formatters.ts +419 -0
  314. package/src/summarize/index.ts +20 -0
  315. package/src/summarize/summarizer.test.ts +275 -0
  316. package/src/summarize/summarizer.ts +597 -0
  317. package/src/summarize/verify-bugs.test.ts +238 -0
  318. package/src/types/huggingface-transformers.d.ts +66 -0
  319. package/src/utils/index.ts +1 -0
  320. package/src/utils/tokens.test.ts +142 -0
  321. package/src/utils/tokens.ts +186 -0
  322. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  323. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  324. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  325. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  326. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  327. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  328. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
  329. package/tests/fixtures/cli/README.md +9 -0
  330. package/tests/fixtures/cli/api-reference.md +11 -0
  331. package/tests/fixtures/cli/getting-started.md +11 -0
  332. package/tests/integration/embed-index.test.ts +712 -0
  333. package/tests/integration/search-context.test.ts +469 -0
  334. package/tests/integration/search-semantic.test.ts +522 -0
  335. package/tsconfig.json +26 -0
  336. package/vitest.config.ts +16 -0
  337. package/vitest.setup.ts +12 -0
@@ -0,0 +1,319 @@
1
+ /**
2
+ * Query Parser for mdcontext search
3
+ *
4
+ * Supports:
5
+ * - Boolean operators: AND, OR, NOT (case-insensitive)
6
+ * - Quoted phrases: "exact phrase"
7
+ * - Grouping: (term1 OR term2) AND term3
8
+ * - Precedence: NOT > AND > OR
9
+ */
10
+
11
+ // ============================================================================
12
+ // Types
13
+ // ============================================================================
14
+
15
+ export type QueryNode =
16
+ | { type: 'term'; value: string }
17
+ | { type: 'phrase'; value: string }
18
+ | { type: 'and'; left: QueryNode; right: QueryNode }
19
+ | { type: 'or'; left: QueryNode; right: QueryNode }
20
+ | { type: 'not'; operand: QueryNode }
21
+
22
+ export interface ParsedQuery {
23
+ readonly ast: QueryNode
24
+ readonly terms: readonly string[]
25
+ readonly phrases: readonly string[]
26
+ }
27
+
28
+ // ============================================================================
29
+ // Tokenizer
30
+ // ============================================================================
31
+
32
+ type TokenType = 'AND' | 'OR' | 'NOT' | 'LPAREN' | 'RPAREN' | 'PHRASE' | 'TERM'
33
+
34
+ interface Token {
35
+ type: TokenType
36
+ value: string
37
+ }
38
+
39
+ /**
40
+ * Tokenize query string into tokens
41
+ */
42
+ const tokenize = (query: string): Token[] => {
43
+ const tokens: Token[] = []
44
+ let i = 0
45
+
46
+ while (i < query.length) {
47
+ // Skip whitespace
48
+ if (/\s/.test(query[i]!)) {
49
+ i++
50
+ continue
51
+ }
52
+
53
+ // Quoted phrase
54
+ if (query[i] === '"') {
55
+ const start = i + 1
56
+ i++
57
+ while (i < query.length && query[i] !== '"') {
58
+ i++
59
+ }
60
+ const value = query.slice(start, i)
61
+ tokens.push({ type: 'PHRASE', value })
62
+ i++ // Skip closing quote
63
+ continue
64
+ }
65
+
66
+ // Parentheses
67
+ if (query[i] === '(') {
68
+ tokens.push({ type: 'LPAREN', value: '(' })
69
+ i++
70
+ continue
71
+ }
72
+ if (query[i] === ')') {
73
+ tokens.push({ type: 'RPAREN', value: ')' })
74
+ i++
75
+ continue
76
+ }
77
+
78
+ // Words (operators or terms)
79
+ const wordMatch = query.slice(i).match(/^[^\s()"]+/)
80
+ if (wordMatch) {
81
+ const word = wordMatch[0]
82
+ const upperWord = word.toUpperCase()
83
+
84
+ if (upperWord === 'AND') {
85
+ tokens.push({ type: 'AND', value: 'AND' })
86
+ } else if (upperWord === 'OR') {
87
+ tokens.push({ type: 'OR', value: 'OR' })
88
+ } else if (upperWord === 'NOT') {
89
+ tokens.push({ type: 'NOT', value: 'NOT' })
90
+ } else {
91
+ tokens.push({ type: 'TERM', value: word })
92
+ }
93
+ i += word.length
94
+ continue
95
+ }
96
+
97
+ // Unknown character, skip
98
+ i++
99
+ }
100
+
101
+ return tokens
102
+ }
103
+
104
+ // ============================================================================
105
+ // Parser (Recursive Descent)
106
+ // ============================================================================
107
+
108
+ /**
109
+ * Parser for boolean query expressions.
110
+ * Grammar:
111
+ * expr -> andExpr (OR andExpr)*
112
+ * andExpr -> notExpr (AND notExpr)*
113
+ * notExpr -> NOT notExpr | primary
114
+ * primary -> TERM | PHRASE | LPAREN expr RPAREN
115
+ */
116
+ class Parser {
117
+ private tokens: Token[]
118
+ private pos: number = 0
119
+ readonly terms: string[] = []
120
+ readonly phrases: string[] = []
121
+
122
+ constructor(tokens: Token[]) {
123
+ this.tokens = tokens
124
+ }
125
+
126
+ private current(): Token | undefined {
127
+ return this.tokens[this.pos]
128
+ }
129
+
130
+ private advance(): Token | undefined {
131
+ return this.tokens[this.pos++]
132
+ }
133
+
134
+ private match(type: TokenType): boolean {
135
+ if (this.current()?.type === type) {
136
+ this.advance()
137
+ return true
138
+ }
139
+ return false
140
+ }
141
+
142
+ parse(): QueryNode | null {
143
+ if (this.tokens.length === 0) {
144
+ return null
145
+ }
146
+ return this.parseExpr()
147
+ }
148
+
149
+ private parseExpr(): QueryNode {
150
+ let left = this.parseAndExpr()
151
+
152
+ while (this.match('OR')) {
153
+ const right = this.parseAndExpr()
154
+ left = { type: 'or', left, right }
155
+ }
156
+
157
+ return left
158
+ }
159
+
160
+ private parseAndExpr(): QueryNode {
161
+ let left = this.parseNotExpr()
162
+
163
+ // Handle implicit AND (terms without explicit AND between them)
164
+ while (this.match('AND') || this.isImplicitAnd()) {
165
+ const right = this.parseNotExpr()
166
+ left = { type: 'and', left, right }
167
+ }
168
+
169
+ return left
170
+ }
171
+
172
+ private isImplicitAnd(): boolean {
173
+ const tok = this.current()
174
+ // If next token is a TERM, PHRASE, NOT, or LPAREN, treat as implicit AND
175
+ return (
176
+ tok?.type === 'TERM' ||
177
+ tok?.type === 'PHRASE' ||
178
+ tok?.type === 'NOT' ||
179
+ tok?.type === 'LPAREN'
180
+ )
181
+ }
182
+
183
+ private parseNotExpr(): QueryNode {
184
+ if (this.match('NOT')) {
185
+ const operand = this.parseNotExpr()
186
+ return { type: 'not', operand }
187
+ }
188
+ return this.parsePrimary()
189
+ }
190
+
191
+ private parsePrimary(): QueryNode {
192
+ const tok = this.current()
193
+
194
+ if (this.match('LPAREN')) {
195
+ const expr = this.parseExpr()
196
+ this.match('RPAREN') // Consume closing paren (ignore if missing)
197
+ return expr
198
+ }
199
+
200
+ if (tok?.type === 'PHRASE') {
201
+ this.advance()
202
+ this.phrases.push(tok.value)
203
+ return { type: 'phrase', value: tok.value }
204
+ }
205
+
206
+ if (tok?.type === 'TERM') {
207
+ this.advance()
208
+ this.terms.push(tok.value)
209
+ return { type: 'term', value: tok.value }
210
+ }
211
+
212
+ // Unexpected token, return empty term
213
+ return { type: 'term', value: '' }
214
+ }
215
+ }
216
+
217
+ // ============================================================================
218
+ // Public API
219
+ // ============================================================================
220
+
221
+ /**
222
+ * Parse a search query into an AST
223
+ */
224
+ export const parseQuery = (query: string): ParsedQuery | null => {
225
+ const tokens = tokenize(query)
226
+ if (tokens.length === 0) {
227
+ return null
228
+ }
229
+
230
+ const parser = new Parser(tokens)
231
+ const ast = parser.parse()
232
+
233
+ if (!ast) {
234
+ return null
235
+ }
236
+
237
+ return {
238
+ ast,
239
+ terms: parser.terms,
240
+ phrases: parser.phrases,
241
+ }
242
+ }
243
+
244
+ /**
245
+ * Check if a query contains boolean operators or phrases
246
+ */
247
+ export const isAdvancedQuery = (query: string): boolean => {
248
+ const tokens = tokenize(query)
249
+ return tokens.some(
250
+ (t) =>
251
+ t.type === 'AND' ||
252
+ t.type === 'OR' ||
253
+ t.type === 'NOT' ||
254
+ t.type === 'PHRASE' ||
255
+ t.type === 'LPAREN',
256
+ )
257
+ }
258
+
259
+ /**
260
+ * Evaluate a parsed query against text content
261
+ * Returns true if the text matches the query
262
+ */
263
+ export const evaluateQuery = (ast: QueryNode, text: string): boolean => {
264
+ const lowerText = text.toLowerCase()
265
+
266
+ const evaluate = (node: QueryNode): boolean => {
267
+ switch (node.type) {
268
+ case 'term': {
269
+ // Empty term matches anything
270
+ if (!node.value) return true
271
+ return lowerText.includes(node.value.toLowerCase())
272
+ }
273
+ case 'phrase': {
274
+ // Phrase must match exactly (case-insensitive)
275
+ return lowerText.includes(node.value.toLowerCase())
276
+ }
277
+ case 'and': {
278
+ return evaluate(node.left) && evaluate(node.right)
279
+ }
280
+ case 'or': {
281
+ return evaluate(node.left) || evaluate(node.right)
282
+ }
283
+ case 'not': {
284
+ return !evaluate(node.operand)
285
+ }
286
+ }
287
+ }
288
+
289
+ return evaluate(ast)
290
+ }
291
+
292
+ /**
293
+ * Build a regex pattern from a parsed query for highlighting matches
294
+ * This creates a pattern that matches any of the terms/phrases
295
+ */
296
+ export const buildHighlightPattern = (parsed: ParsedQuery): RegExp => {
297
+ const patterns: string[] = []
298
+
299
+ // Escape special regex chars
300
+ const escapeChars = (s: string) => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
301
+
302
+ for (const term of parsed.terms) {
303
+ if (term) {
304
+ patterns.push(`\\b${escapeChars(term)}\\b`)
305
+ }
306
+ }
307
+
308
+ for (const phrase of parsed.phrases) {
309
+ if (phrase) {
310
+ patterns.push(escapeChars(phrase))
311
+ }
312
+ }
313
+
314
+ if (patterns.length === 0) {
315
+ return /.^/ // Match nothing
316
+ }
317
+
318
+ return new RegExp(patterns.join('|'), 'gi')
319
+ }
@@ -0,0 +1,280 @@
1
+ /**
2
+ * Tests for keyword search
3
+ */
4
+
5
+ import * as fs from 'node:fs/promises'
6
+ import * as path from 'node:path'
7
+ import { Effect } from 'effect'
8
+ import { afterAll, beforeAll, describe, expect, it } from 'vitest'
9
+ import { buildIndex } from '../index/indexer.js'
10
+ import {
11
+ formatContextForLLM,
12
+ getContext,
13
+ search,
14
+ searchContent,
15
+ } from './searcher.js'
16
+
17
+ // Test fixture directory
18
+ const TEST_DIR = path.join(process.cwd(), 'tests', 'fixtures', 'search')
19
+
20
+ // Helper to run Effect
21
+ const runEffect = <A, E>(effect: Effect.Effect<A, E>) =>
22
+ Effect.runPromise(effect)
23
+
24
+ describe('search', () => {
25
+ beforeAll(async () => {
26
+ // Create test fixtures
27
+ await fs.mkdir(TEST_DIR, { recursive: true })
28
+
29
+ // Create test markdown files
30
+ await fs.writeFile(
31
+ path.join(TEST_DIR, 'doc1.md'),
32
+ `# Document One
33
+
34
+ ## Introduction
35
+
36
+ This is the introduction section.
37
+
38
+ ## Code Example
39
+
40
+ Here's some code:
41
+
42
+ \`\`\`typescript
43
+ const x = 1;
44
+ \`\`\`
45
+
46
+ ## Summary
47
+
48
+ A brief summary.
49
+ `,
50
+ )
51
+
52
+ await fs.writeFile(
53
+ path.join(TEST_DIR, 'doc2.md'),
54
+ `# Document Two
55
+
56
+ ## Overview
57
+
58
+ An overview of the document.
59
+
60
+ ## Data Table
61
+
62
+ | Column A | Column B |
63
+ |----------|----------|
64
+ | Value 1 | Value 2 |
65
+
66
+ ## Tasks
67
+
68
+ - Task 1
69
+ - Task 2
70
+ - Task 3
71
+ `,
72
+ )
73
+
74
+ // Create test file for fuzzy/stem search
75
+ await fs.writeFile(
76
+ path.join(TEST_DIR, 'stem-test.md'),
77
+ `# Failure Handling
78
+
79
+ When the application fails, it logs the failure message.
80
+ Failed operations are retried automatically.
81
+ Failing gracefully is important for user experience.
82
+
83
+ ## Configuration
84
+
85
+ The configration (typo) file is located at config.json.
86
+ Set the configuration options carefully.
87
+ `,
88
+ )
89
+
90
+ // Build index
91
+ await runEffect(buildIndex(TEST_DIR, { force: true }))
92
+ })
93
+
94
+ afterAll(async () => {
95
+ // Clean up
96
+ await fs.rm(TEST_DIR, { recursive: true, force: true })
97
+ })
98
+
99
+ describe('search()', () => {
100
+ it('should return all sections without filters', async () => {
101
+ const results = await runEffect(search(TEST_DIR))
102
+ expect(results.length).toBeGreaterThan(0)
103
+ })
104
+
105
+ it('should filter by heading pattern', async () => {
106
+ const results = await runEffect(
107
+ search(TEST_DIR, { heading: 'Introduction|Overview' }),
108
+ )
109
+ expect(results.length).toBe(2)
110
+ expect(results.map((r) => r.section.heading)).toContain('Introduction')
111
+ expect(results.map((r) => r.section.heading)).toContain('Overview')
112
+ })
113
+
114
+ it('should filter by path pattern', async () => {
115
+ const results = await runEffect(
116
+ search(TEST_DIR, { pathPattern: 'doc1*' }),
117
+ )
118
+ expect(results.length).toBeGreaterThan(0)
119
+ for (const result of results) {
120
+ expect(result.section.documentPath).toMatch(/doc1/)
121
+ }
122
+ })
123
+
124
+ it('should filter by hasCode', async () => {
125
+ const results = await runEffect(search(TEST_DIR, { hasCode: true }))
126
+ expect(results.length).toBeGreaterThan(0)
127
+ for (const result of results) {
128
+ expect(result.section.hasCode).toBe(true)
129
+ }
130
+ })
131
+
132
+ it('should filter by hasTable', async () => {
133
+ const results = await runEffect(search(TEST_DIR, { hasTable: true }))
134
+ expect(results.length).toBeGreaterThan(0)
135
+ for (const result of results) {
136
+ expect(result.section.hasTable).toBe(true)
137
+ }
138
+ })
139
+
140
+ it('should filter by hasList', async () => {
141
+ const results = await runEffect(search(TEST_DIR, { hasList: true }))
142
+ expect(results.length).toBeGreaterThan(0)
143
+ for (const result of results) {
144
+ expect(result.section.hasList).toBe(true)
145
+ }
146
+ })
147
+
148
+ it('should respect limit', async () => {
149
+ const results = await runEffect(search(TEST_DIR, { limit: 2 }))
150
+ expect(results.length).toBe(2)
151
+ })
152
+ })
153
+
154
+ describe('getContext()', () => {
155
+ it('should return document context', async () => {
156
+ const context = await runEffect(
157
+ getContext(TEST_DIR, path.join(TEST_DIR, 'doc1.md')),
158
+ )
159
+ expect(context.title).toBe('Document One')
160
+ expect(context.sections.length).toBeGreaterThan(0)
161
+ })
162
+
163
+ it('should respect maxTokens', async () => {
164
+ const fullContext = await runEffect(
165
+ getContext(TEST_DIR, path.join(TEST_DIR, 'doc1.md')),
166
+ )
167
+ // Use a limit that's definitely smaller than the full document
168
+ const limitTokens = Math.max(10, Math.floor(fullContext.totalTokens / 2))
169
+ const limitedContext = await runEffect(
170
+ getContext(TEST_DIR, path.join(TEST_DIR, 'doc1.md'), {
171
+ maxTokens: limitTokens,
172
+ }),
173
+ )
174
+ expect(limitedContext.includedTokens).toBeLessThanOrEqual(limitTokens)
175
+ // Only check for reduction if the full context exceeds the limit
176
+ if (fullContext.totalTokens > limitTokens) {
177
+ expect(limitedContext.includedTokens).toBeLessThan(
178
+ fullContext.totalTokens,
179
+ )
180
+ }
181
+ })
182
+ })
183
+
184
+ describe('formatContextForLLM()', () => {
185
+ it('should format context as readable text', async () => {
186
+ const context = await runEffect(
187
+ getContext(TEST_DIR, path.join(TEST_DIR, 'doc1.md')),
188
+ )
189
+ const formatted = formatContextForLLM(context)
190
+ expect(formatted).toContain('# Document One')
191
+ expect(formatted).toContain('Path: doc1.md')
192
+ expect(formatted).toContain('tokens')
193
+ })
194
+
195
+ it('should include content metadata markers', async () => {
196
+ const context = await runEffect(
197
+ getContext(TEST_DIR, path.join(TEST_DIR, 'doc1.md')),
198
+ )
199
+ const formatted = formatContextForLLM(context)
200
+ expect(formatted).toContain('[code]')
201
+ })
202
+ })
203
+
204
+ describe('searchContent() with fuzzy/stem matching', () => {
205
+ it('should match stemmed variations with --stem flag', async () => {
206
+ // Search for "fail" should match "fails", "failed", "failing", "failure"
207
+ const results = await runEffect(
208
+ searchContent(TEST_DIR, {
209
+ content: 'fail',
210
+ stem: true,
211
+ pathPattern: 'stem-test*',
212
+ }),
213
+ )
214
+ expect(results.length).toBe(1)
215
+ expect(results[0]?.section.heading).toBe('Failure Handling')
216
+ // Should have multiple line matches for different word forms
217
+ expect(results[0]?.matches?.length).toBeGreaterThan(1)
218
+ })
219
+
220
+ it('should match typos with --fuzzy flag', async () => {
221
+ // Search for "configration" (typo) should match "configuration"
222
+ const results = await runEffect(
223
+ searchContent(TEST_DIR, {
224
+ content: 'configration',
225
+ fuzzy: true,
226
+ pathPattern: 'stem-test*',
227
+ }),
228
+ )
229
+ expect(results.length).toBe(1)
230
+ expect(results[0]?.section.heading).toBe('Configuration')
231
+ // Should match both the typo line and the correct spelling line
232
+ expect(results[0]?.matches?.length).toBeGreaterThanOrEqual(1)
233
+ })
234
+
235
+ it('should respect fuzzyDistance option', async () => {
236
+ // With distance 1, "fail" should NOT match "file" (distance 2)
237
+ const strictResults = await runEffect(
238
+ searchContent(TEST_DIR, {
239
+ content: 'fail',
240
+ fuzzy: true,
241
+ fuzzyDistance: 1,
242
+ pathPattern: 'stem-test*',
243
+ }),
244
+ )
245
+ // With distance 1, only exact or 1-edit matches
246
+ const matchedWords = strictResults
247
+ .flatMap((r) => r.matches?.map((m) => m.line) ?? [])
248
+ .join(' ')
249
+ .toLowerCase()
250
+ // "fail" with distance 1 matches "fails" but not "file"
251
+ expect(matchedWords).toContain('fail')
252
+ })
253
+
254
+ it('should not match without fuzzy/stem flags', async () => {
255
+ // Exact search for "fail" should NOT match "failure" or "fails"
256
+ const results = await runEffect(
257
+ searchContent(TEST_DIR, {
258
+ content: 'fail',
259
+ pathPattern: 'stem-test*',
260
+ }),
261
+ )
262
+ // With exact search, "fail" appears as substring in "fails", "failure", "failing", "failed"
263
+ // so it still matches, but checks the regex-based behavior
264
+ expect(results.length).toBeGreaterThanOrEqual(1)
265
+ })
266
+
267
+ it('should combine fuzzy and stem matching', async () => {
268
+ // Both flags together should provide broader matching
269
+ const results = await runEffect(
270
+ searchContent(TEST_DIR, {
271
+ content: 'fail',
272
+ fuzzy: true,
273
+ stem: true,
274
+ pathPattern: 'stem-test*',
275
+ }),
276
+ )
277
+ expect(results.length).toBeGreaterThanOrEqual(1)
278
+ })
279
+ })
280
+ })