mdcontext 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.claude/settings.local.json +25 -0
  4. package/.github/workflows/ci.yml +83 -0
  5. package/.github/workflows/claude-code-review.yml +44 -0
  6. package/.github/workflows/claude.yml +85 -0
  7. package/.github/workflows/release.yml +113 -0
  8. package/.tldrignore +112 -0
  9. package/BACKLOG.md +338 -0
  10. package/CONTRIBUTING.md +186 -0
  11. package/NOTES/NOTES +44 -0
  12. package/README.md +434 -11
  13. package/biome.json +36 -0
  14. package/cspell.config.yaml +14 -0
  15. package/dist/chunk-23UPXDNL.js +3044 -0
  16. package/dist/chunk-2W7MO2DL.js +1366 -0
  17. package/dist/chunk-3NUAZGMA.js +1689 -0
  18. package/dist/chunk-7TOWB2XB.js +366 -0
  19. package/dist/chunk-7XOTOADQ.js +3065 -0
  20. package/dist/chunk-AH2PDM2K.js +3042 -0
  21. package/dist/chunk-BNXWSZ63.js +3742 -0
  22. package/dist/chunk-BTL5DJVU.js +3222 -0
  23. package/dist/chunk-HDHYG7E4.js +104 -0
  24. package/dist/chunk-HLR4KZBP.js +3234 -0
  25. package/dist/chunk-IP3FRFEB.js +1045 -0
  26. package/dist/chunk-KHU56VDO.js +3042 -0
  27. package/dist/chunk-KRYIFLQR.js +88 -0
  28. package/dist/chunk-LBSDNLEM.js +287 -0
  29. package/dist/chunk-MNTQ7HCP.js +2643 -0
  30. package/dist/chunk-MUJELQQ6.js +1387 -0
  31. package/dist/chunk-MXJGMSLV.js +2199 -0
  32. package/dist/chunk-N6QJGC3Z.js +2636 -0
  33. package/dist/chunk-OBELGBPM.js +1713 -0
  34. package/dist/chunk-OT7R5XTA.js +3192 -0
  35. package/dist/chunk-P7X4RA2T.js +106 -0
  36. package/dist/chunk-PIDUQNC2.js +3185 -0
  37. package/dist/chunk-POGCDIH4.js +3187 -0
  38. package/dist/chunk-PSIEOQGZ.js +3043 -0
  39. package/dist/chunk-PVRT3IHA.js +3238 -0
  40. package/dist/chunk-QNN4TT23.js +1430 -0
  41. package/dist/chunk-RE3R45RJ.js +3042 -0
  42. package/dist/chunk-S7E6TFX6.js +803 -0
  43. package/dist/chunk-SG6GLU4U.js +1378 -0
  44. package/dist/chunk-SJCDV2ST.js +274 -0
  45. package/dist/chunk-SYE5XLF3.js +104 -0
  46. package/dist/chunk-T5VLYBZD.js +103 -0
  47. package/dist/chunk-TOQB7VWU.js +3238 -0
  48. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  49. package/dist/chunk-VVTGZNBT.js +1629 -0
  50. package/dist/chunk-W7Q4RFEV.js +104 -0
  51. package/dist/chunk-XTYYVRLO.js +3190 -0
  52. package/dist/chunk-Y6MDYVJD.js +3063 -0
  53. package/dist/cli/main.d.ts +1 -0
  54. package/dist/cli/main.js +5458 -0
  55. package/dist/index.d.ts +653 -0
  56. package/dist/index.js +79 -0
  57. package/dist/mcp/server.d.ts +1 -0
  58. package/dist/mcp/server.js +472 -0
  59. package/dist/schema-BAWSG7KY.js +22 -0
  60. package/dist/schema-E3QUPL26.js +20 -0
  61. package/dist/schema-EHL7WUT6.js +20 -0
  62. package/docs/019-USAGE.md +625 -0
  63. package/docs/020-current-implementation.md +364 -0
  64. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  65. package/docs/BACKLOG.md +80 -0
  66. package/docs/CONFIG.md +1123 -0
  67. package/docs/DESIGN.md +439 -0
  68. package/docs/ERRORS.md +383 -0
  69. package/docs/PROJECT.md +88 -0
  70. package/docs/ROADMAP.md +407 -0
  71. package/docs/summarization.md +320 -0
  72. package/docs/test-links.md +9 -0
  73. package/justfile +40 -0
  74. package/package.json +74 -9
  75. package/pnpm-workspace.yaml +5 -0
  76. package/research/INDEX.md +315 -0
  77. package/research/code-review/README.md +90 -0
  78. package/research/code-review/cli-error-handling-review.md +979 -0
  79. package/research/code-review/code-review-validation-report.md +464 -0
  80. package/research/code-review/main-ts-review.md +1128 -0
  81. package/research/config-analysis/01-current-implementation.md +470 -0
  82. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  83. package/research/config-analysis/03-task-candidates.md +715 -0
  84. package/research/config-analysis/033-research-configuration-management.md +828 -0
  85. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  86. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  87. package/research/config-docs/SUMMARY.md +357 -0
  88. package/research/config-docs/TEST-RESULTS.md +776 -0
  89. package/research/config-docs/TODO.md +542 -0
  90. package/research/config-docs/analysis.md +744 -0
  91. package/research/config-docs/fix-validation.md +502 -0
  92. package/research/config-docs/help-audit.md +264 -0
  93. package/research/config-docs/help-system-analysis.md +890 -0
  94. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  95. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  96. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  97. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  98. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  99. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  100. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  101. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  102. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  103. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  104. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  105. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  106. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  107. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  108. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  109. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  110. package/research/effect-cli-error-handling.md +845 -0
  111. package/research/effect-errors-as-values.md +943 -0
  112. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  113. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  114. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  115. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  116. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  117. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  118. package/research/issue-review.md +603 -0
  119. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  120. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  121. package/research/llm-summarization/anthropic-2026.md +367 -0
  122. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  123. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  124. package/research/llm-summarization/openai-2026.md +473 -0
  125. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  126. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  127. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  128. package/research/llm-summarization/prototype-results.md +56 -0
  129. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  130. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  131. package/research/mdcontext-error-analysis.md +521 -0
  132. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  133. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  134. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  135. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  136. package/research/mdcontext-pudding/02-search.md +970 -0
  137. package/research/mdcontext-pudding/03-context.md +779 -0
  138. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  139. package/research/mdcontext-pudding/04-tree.md +704 -0
  140. package/research/mdcontext-pudding/05-config.md +1038 -0
  141. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  142. package/research/mdcontext-pudding/06-links.md +679 -0
  143. package/research/mdcontext-pudding/07-stats.md +693 -0
  144. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  145. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  146. package/research/mdcontext-pudding/README.md +168 -0
  147. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  148. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  149. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  150. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  151. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  152. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  153. package/research/research-quality-review.md +834 -0
  154. package/research/semantic-search/002-research-embedding-models.md +490 -0
  155. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  156. package/research/semantic-search/004-research-vector-search.md +841 -0
  157. package/research/semantic-search/032-research-semantic-search.md +427 -0
  158. package/research/semantic-search/embedding-text-analysis.md +156 -0
  159. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  160. package/research/semantic-search/query-processing-analysis.md +207 -0
  161. package/research/semantic-search/root-cause-and-solution.md +114 -0
  162. package/research/semantic-search/threshold-validation-report.md +69 -0
  163. package/research/semantic-search/vector-search-analysis.md +63 -0
  164. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  165. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  166. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  167. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  168. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  169. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  170. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  171. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  172. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  173. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  174. package/research/test-path-issues.md +276 -0
  175. package/review/ALP-76/1-error-type-design.md +962 -0
  176. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  177. package/review/ALP-76/3-error-presentation.md +624 -0
  178. package/review/ALP-76/4-test-coverage.md +625 -0
  179. package/review/ALP-76/5-migration-completeness.md +440 -0
  180. package/review/ALP-76/6-effect-best-practices.md +755 -0
  181. package/scripts/apply-branch-protection.sh +47 -0
  182. package/scripts/branch-protection-templates.json +79 -0
  183. package/scripts/prototype-summarization.ts +346 -0
  184. package/scripts/rebuild-hnswlib.js +58 -0
  185. package/scripts/setup-branch-protection.sh +64 -0
  186. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  187. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  188. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  189. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  190. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  191. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  192. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  193. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  194. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  195. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  196. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  197. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  198. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  199. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  200. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  201. package/src/cli/argv-preprocessor.test.ts +210 -0
  202. package/src/cli/argv-preprocessor.ts +202 -0
  203. package/src/cli/cli.test.ts +627 -0
  204. package/src/cli/commands/backlinks.ts +54 -0
  205. package/src/cli/commands/config-cmd.ts +642 -0
  206. package/src/cli/commands/context.ts +285 -0
  207. package/src/cli/commands/duplicates.ts +122 -0
  208. package/src/cli/commands/embeddings.ts +529 -0
  209. package/src/cli/commands/index-cmd.ts +480 -0
  210. package/src/cli/commands/index.ts +16 -0
  211. package/src/cli/commands/links.ts +52 -0
  212. package/src/cli/commands/search.ts +1281 -0
  213. package/src/cli/commands/stats.ts +149 -0
  214. package/src/cli/commands/tree.ts +128 -0
  215. package/src/cli/config-layer.ts +176 -0
  216. package/src/cli/error-handler.test.ts +235 -0
  217. package/src/cli/error-handler.ts +655 -0
  218. package/src/cli/flag-schemas.ts +341 -0
  219. package/src/cli/help.ts +588 -0
  220. package/src/cli/index.ts +9 -0
  221. package/src/cli/main.ts +435 -0
  222. package/src/cli/options.ts +41 -0
  223. package/src/cli/shared-error-handling.ts +199 -0
  224. package/src/cli/typo-suggester.test.ts +105 -0
  225. package/src/cli/typo-suggester.ts +130 -0
  226. package/src/cli/utils.ts +259 -0
  227. package/src/config/file-provider.test.ts +320 -0
  228. package/src/config/file-provider.ts +273 -0
  229. package/src/config/index.ts +72 -0
  230. package/src/config/integration.test.ts +667 -0
  231. package/src/config/precedence.test.ts +277 -0
  232. package/src/config/precedence.ts +451 -0
  233. package/src/config/schema.test.ts +414 -0
  234. package/src/config/schema.ts +603 -0
  235. package/src/config/service.test.ts +320 -0
  236. package/src/config/service.ts +243 -0
  237. package/src/config/testing.test.ts +264 -0
  238. package/src/config/testing.ts +110 -0
  239. package/src/core/index.ts +1 -0
  240. package/src/core/types.ts +113 -0
  241. package/src/duplicates/detector.test.ts +183 -0
  242. package/src/duplicates/detector.ts +414 -0
  243. package/src/duplicates/index.ts +18 -0
  244. package/src/embeddings/embedding-namespace.test.ts +300 -0
  245. package/src/embeddings/embedding-namespace.ts +947 -0
  246. package/src/embeddings/heading-boost.test.ts +222 -0
  247. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  248. package/src/embeddings/hyde.test.ts +272 -0
  249. package/src/embeddings/hyde.ts +264 -0
  250. package/src/embeddings/index.ts +10 -0
  251. package/src/embeddings/openai-provider.ts +414 -0
  252. package/src/embeddings/pricing.json +22 -0
  253. package/src/embeddings/provider-constants.ts +204 -0
  254. package/src/embeddings/provider-errors.test.ts +967 -0
  255. package/src/embeddings/provider-errors.ts +565 -0
  256. package/src/embeddings/provider-factory.test.ts +240 -0
  257. package/src/embeddings/provider-factory.ts +225 -0
  258. package/src/embeddings/provider-integration.test.ts +788 -0
  259. package/src/embeddings/query-preprocessing.test.ts +187 -0
  260. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  261. package/src/embeddings/semantic-search.ts +1270 -0
  262. package/src/embeddings/types.ts +359 -0
  263. package/src/embeddings/vector-store.ts +708 -0
  264. package/src/embeddings/voyage-provider.ts +313 -0
  265. package/src/errors/errors.test.ts +845 -0
  266. package/src/errors/index.ts +533 -0
  267. package/src/index/ignore-patterns.test.ts +354 -0
  268. package/src/index/ignore-patterns.ts +305 -0
  269. package/src/index/index.ts +4 -0
  270. package/src/index/indexer.ts +684 -0
  271. package/src/index/storage.ts +260 -0
  272. package/src/index/types.ts +147 -0
  273. package/src/index/watcher.ts +189 -0
  274. package/src/index.ts +30 -0
  275. package/src/integration/search-keyword.test.ts +678 -0
  276. package/src/mcp/server.ts +612 -0
  277. package/src/parser/index.ts +1 -0
  278. package/src/parser/parser.test.ts +291 -0
  279. package/src/parser/parser.ts +394 -0
  280. package/src/parser/section-filter.test.ts +277 -0
  281. package/src/parser/section-filter.ts +392 -0
  282. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  283. package/src/search/bm25-store.ts +366 -0
  284. package/src/search/cross-encoder.test.ts +253 -0
  285. package/src/search/cross-encoder.ts +406 -0
  286. package/src/search/fuzzy-search.test.ts +419 -0
  287. package/src/search/fuzzy-search.ts +273 -0
  288. package/src/search/hybrid-search.ts +448 -0
  289. package/src/search/path-matcher.test.ts +276 -0
  290. package/src/search/path-matcher.ts +33 -0
  291. package/src/search/query-parser.test.ts +260 -0
  292. package/src/search/query-parser.ts +319 -0
  293. package/src/search/searcher.test.ts +280 -0
  294. package/src/search/searcher.ts +724 -0
  295. package/src/search/wink-bm25.d.ts +30 -0
  296. package/src/summarization/cli-providers/claude.ts +202 -0
  297. package/src/summarization/cli-providers/detection.test.ts +273 -0
  298. package/src/summarization/cli-providers/detection.ts +118 -0
  299. package/src/summarization/cli-providers/index.ts +8 -0
  300. package/src/summarization/cost.test.ts +139 -0
  301. package/src/summarization/cost.ts +102 -0
  302. package/src/summarization/error-handler.test.ts +127 -0
  303. package/src/summarization/error-handler.ts +111 -0
  304. package/src/summarization/index.ts +102 -0
  305. package/src/summarization/pipeline.test.ts +498 -0
  306. package/src/summarization/pipeline.ts +231 -0
  307. package/src/summarization/prompts.test.ts +269 -0
  308. package/src/summarization/prompts.ts +133 -0
  309. package/src/summarization/provider-factory.test.ts +396 -0
  310. package/src/summarization/provider-factory.ts +178 -0
  311. package/src/summarization/types.ts +184 -0
  312. package/src/summarize/budget-bugs.test.ts +620 -0
  313. package/src/summarize/formatters.ts +419 -0
  314. package/src/summarize/index.ts +20 -0
  315. package/src/summarize/summarizer.test.ts +275 -0
  316. package/src/summarize/summarizer.ts +597 -0
  317. package/src/summarize/verify-bugs.test.ts +238 -0
  318. package/src/types/huggingface-transformers.d.ts +66 -0
  319. package/src/utils/index.ts +1 -0
  320. package/src/utils/tokens.test.ts +142 -0
  321. package/src/utils/tokens.ts +186 -0
  322. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  323. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  324. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  325. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  326. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  327. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  328. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
  329. package/tests/fixtures/cli/README.md +9 -0
  330. package/tests/fixtures/cli/api-reference.md +11 -0
  331. package/tests/fixtures/cli/getting-started.md +11 -0
  332. package/tests/integration/embed-index.test.ts +712 -0
  333. package/tests/integration/search-context.test.ts +469 -0
  334. package/tests/integration/search-semantic.test.ts +522 -0
  335. package/tsconfig.json +26 -0
  336. package/vitest.config.ts +16 -0
  337. package/vitest.setup.ts +12 -0
@@ -0,0 +1,419 @@
1
+ /**
2
+ * Tests for fuzzy-search utilities
3
+ */
4
+
5
+ import { describe, expect, it } from 'vitest'
6
+ import {
7
+ buildFuzzyHighlightPattern,
8
+ findFuzzyMatches,
9
+ findMatchesInLine,
10
+ getStems,
11
+ isFuzzyMatch,
12
+ levenshteinDistance,
13
+ matchesWithOptions,
14
+ stem,
15
+ stemText,
16
+ } from './fuzzy-search.js'
17
+
18
+ describe('fuzzy-search', () => {
19
+ // ============================================================================
20
+ // Stemming Tests
21
+ // ============================================================================
22
+
23
+ describe('stem', () => {
24
+ it('stems words to their root form', () => {
25
+ expect(stem('running')).toBe('run')
26
+ expect(stem('runner')).toBe('runner')
27
+ expect(stem('runs')).toBe('run')
28
+ })
29
+
30
+ it('handles common word forms', () => {
31
+ expect(stem('failing')).toBe('fail')
32
+ expect(stem('failed')).toBe('fail')
33
+ expect(stem('failure')).toBe('failur')
34
+ expect(stem('fails')).toBe('fail')
35
+ })
36
+
37
+ it('handles programming terms', () => {
38
+ expect(stem('configuration')).toBe('configur')
39
+ expect(stem('configuring')).toBe('configur')
40
+ expect(stem('configured')).toBe('configur')
41
+ })
42
+
43
+ it('handles irregular words', () => {
44
+ expect(stem('testing')).toBe('test')
45
+ expect(stem('tests')).toBe('test')
46
+ expect(stem('tested')).toBe('test')
47
+ })
48
+
49
+ it('converts to lowercase', () => {
50
+ expect(stem('Running')).toBe('run')
51
+ expect(stem('RUNNING')).toBe('run')
52
+ expect(stem('RuNnInG')).toBe('run')
53
+ })
54
+
55
+ it('handles edge cases', () => {
56
+ expect(stem('')).toBe('')
57
+ expect(stem('a')).toBe('a')
58
+ expect(stem('i')).toBe('i')
59
+ })
60
+ })
61
+
62
+ describe('stemText', () => {
63
+ it('splits text and stems each word', () => {
64
+ expect(stemText('running tests')).toEqual(['run', 'test'])
65
+ expect(stemText('the quick fox')).toEqual(['the', 'quick', 'fox'])
66
+ })
67
+
68
+ it('handles punctuation and special characters', () => {
69
+ expect(stemText('hello, world!')).toEqual(['hello', 'world'])
70
+ expect(stemText('foo-bar_baz')).toEqual(['foo', 'bar', 'baz'])
71
+ })
72
+
73
+ it('filters out empty strings', () => {
74
+ expect(stemText(' multiple spaces ')).toEqual(['multipl', 'space'])
75
+ })
76
+
77
+ it('handles empty input', () => {
78
+ expect(stemText('')).toEqual([])
79
+ expect(stemText(' ')).toEqual([])
80
+ })
81
+ })
82
+
83
+ describe('getStems', () => {
84
+ it('returns unique stems as a Set', () => {
85
+ const stems = getStems('running runs runner')
86
+ expect(stems).toBeInstanceOf(Set)
87
+ expect(stems.has('run')).toBe(true)
88
+ expect(stems.has('runner')).toBe(true)
89
+ })
90
+
91
+ it('deduplicates stems', () => {
92
+ const stems = getStems('test testing tests tested')
93
+ expect(stems.size).toBe(1)
94
+ expect(stems.has('test')).toBe(true)
95
+ })
96
+
97
+ it('handles empty input', () => {
98
+ expect(getStems('')).toEqual(new Set())
99
+ })
100
+ })
101
+
102
+ // ============================================================================
103
+ // Levenshtein Distance Tests
104
+ // ============================================================================
105
+
106
+ describe('levenshteinDistance', () => {
107
+ it('returns 0 for identical strings', () => {
108
+ expect(levenshteinDistance('hello', 'hello')).toBe(0)
109
+ expect(levenshteinDistance('', '')).toBe(0)
110
+ expect(levenshteinDistance('a', 'a')).toBe(0)
111
+ })
112
+
113
+ it('calculates insertion distance', () => {
114
+ expect(levenshteinDistance('', 'abc')).toBe(3)
115
+ expect(levenshteinDistance('ab', 'abc')).toBe(1)
116
+ expect(levenshteinDistance('a', 'abc')).toBe(2)
117
+ })
118
+
119
+ it('calculates deletion distance', () => {
120
+ expect(levenshteinDistance('abc', '')).toBe(3)
121
+ expect(levenshteinDistance('abc', 'ab')).toBe(1)
122
+ expect(levenshteinDistance('abc', 'a')).toBe(2)
123
+ })
124
+
125
+ it('calculates substitution distance', () => {
126
+ expect(levenshteinDistance('abc', 'axc')).toBe(1)
127
+ expect(levenshteinDistance('abc', 'xyz')).toBe(3)
128
+ })
129
+
130
+ it('calculates mixed operations', () => {
131
+ expect(levenshteinDistance('kitten', 'sitting')).toBe(3)
132
+ expect(levenshteinDistance('saturday', 'sunday')).toBe(3)
133
+ })
134
+
135
+ it('handles common typos', () => {
136
+ expect(levenshteinDistance('configuration', 'configration')).toBe(1) // missing 'u'
137
+ expect(levenshteinDistance('function', 'funciton')).toBe(2) // transposition
138
+ expect(levenshteinDistance('receive', 'recieve')).toBe(2) // ie/ei swap
139
+ })
140
+
141
+ it('is symmetric', () => {
142
+ expect(levenshteinDistance('abc', 'xyz')).toBe(
143
+ levenshteinDistance('xyz', 'abc'),
144
+ )
145
+ expect(levenshteinDistance('hello', 'world')).toBe(
146
+ levenshteinDistance('world', 'hello'),
147
+ )
148
+ })
149
+ })
150
+
151
+ // ============================================================================
152
+ // Fuzzy Matching Tests
153
+ // ============================================================================
154
+
155
+ describe('isFuzzyMatch', () => {
156
+ it('matches identical strings', () => {
157
+ expect(isFuzzyMatch('hello', 'hello')).toBe(true)
158
+ })
159
+
160
+ it('matches within default distance (2)', () => {
161
+ expect(isFuzzyMatch('hello', 'helo')).toBe(true) // 1 deletion
162
+ expect(isFuzzyMatch('hello', 'helloo')).toBe(true) // 1 insertion
163
+ expect(isFuzzyMatch('hello', 'hallo')).toBe(true) // 1 substitution
164
+ expect(isFuzzyMatch('hello', 'hallo!')).toBe(true) // 2 edits
165
+ })
166
+
167
+ it('does not match beyond default distance', () => {
168
+ expect(isFuzzyMatch('hello', 'hi')).toBe(false)
169
+ expect(isFuzzyMatch('hello', 'goodbye')).toBe(false)
170
+ })
171
+
172
+ it('respects custom max distance', () => {
173
+ expect(isFuzzyMatch('hello', 'helo', 1)).toBe(true)
174
+ expect(isFuzzyMatch('hello', 'heo', 1)).toBe(false)
175
+ expect(isFuzzyMatch('hello', 'heo', 2)).toBe(true)
176
+ expect(isFuzzyMatch('hello', 'h', 3)).toBe(false)
177
+ expect(isFuzzyMatch('hello', 'h', 4)).toBe(true)
178
+ })
179
+
180
+ it('is case-insensitive', () => {
181
+ expect(isFuzzyMatch('Hello', 'hello')).toBe(true)
182
+ expect(isFuzzyMatch('HELLO', 'hello')).toBe(true)
183
+ expect(isFuzzyMatch('HeLLo', 'hello')).toBe(true)
184
+ })
185
+
186
+ it('handles length difference optimization', () => {
187
+ // Length difference > maxDistance should return false quickly
188
+ expect(isFuzzyMatch('ab', 'abcdef', 2)).toBe(false)
189
+ expect(isFuzzyMatch('abcdef', 'ab', 2)).toBe(false)
190
+ })
191
+
192
+ it('handles common programming typos', () => {
193
+ expect(isFuzzyMatch('function', 'funciton')).toBe(true)
194
+ expect(isFuzzyMatch('configuration', 'configration')).toBe(true)
195
+ expect(isFuzzyMatch('database', 'databse')).toBe(true)
196
+ })
197
+ })
198
+
199
+ describe('findFuzzyMatches', () => {
200
+ const words = ['hello', 'world', 'help', 'held', 'hero', 'helm']
201
+
202
+ it('finds matches within distance', () => {
203
+ const matches = findFuzzyMatches('helo', words)
204
+ expect(matches).toContain('hello')
205
+ expect(matches).toContain('help')
206
+ expect(matches).toContain('held')
207
+ expect(matches).toContain('hero')
208
+ })
209
+
210
+ it('respects max distance parameter', () => {
211
+ const matches = findFuzzyMatches('helo', words, 1)
212
+ expect(matches).toContain('hello')
213
+ expect(matches).toContain('help')
214
+ expect(matches).not.toContain('world')
215
+ })
216
+
217
+ it('returns empty array for no matches', () => {
218
+ const matches = findFuzzyMatches('xyz', words, 1)
219
+ expect(matches).toEqual([])
220
+ })
221
+
222
+ it('handles empty word list', () => {
223
+ expect(findFuzzyMatches('hello', [])).toEqual([])
224
+ })
225
+
226
+ it('is case-insensitive', () => {
227
+ const matches = findFuzzyMatches('HELO', words)
228
+ expect(matches).toContain('hello')
229
+ })
230
+ })
231
+
232
+ // ============================================================================
233
+ // Combined Matching Tests
234
+ // ============================================================================
235
+
236
+ describe('matchesWithOptions', () => {
237
+ const text = 'The configuration failed during initialization'
238
+
239
+ describe('exact matching (no options)', () => {
240
+ it('matches exact words', () => {
241
+ expect(matchesWithOptions('configuration', text)).toBe(true)
242
+ expect(matchesWithOptions('failed', text)).toBe(true)
243
+ })
244
+
245
+ it('is case-insensitive', () => {
246
+ expect(matchesWithOptions('CONFIGURATION', text)).toBe(true)
247
+ expect(matchesWithOptions('Failed', text)).toBe(true)
248
+ })
249
+
250
+ it('does not match partial words', () => {
251
+ expect(matchesWithOptions('config', text)).toBe(false)
252
+ expect(matchesWithOptions('fail', text)).toBe(false)
253
+ })
254
+
255
+ it('requires all query words to match', () => {
256
+ expect(matchesWithOptions('configuration failed', text)).toBe(true)
257
+ expect(matchesWithOptions('configuration success', text)).toBe(false)
258
+ })
259
+
260
+ it('matches empty query', () => {
261
+ expect(matchesWithOptions('', text)).toBe(true)
262
+ expect(matchesWithOptions(' ', text)).toBe(true)
263
+ })
264
+ })
265
+
266
+ describe('stemming', () => {
267
+ it('matches word variations via stemming', () => {
268
+ expect(matchesWithOptions('fail', text, { stem: true })).toBe(true)
269
+ expect(matchesWithOptions('failing', text, { stem: true })).toBe(true)
270
+ expect(matchesWithOptions('configure', text, { stem: true })).toBe(true)
271
+ })
272
+
273
+ it('matches multiple stemmed words', () => {
274
+ expect(
275
+ matchesWithOptions('fail initialize', text, { stem: true }),
276
+ ).toBe(true)
277
+ })
278
+ })
279
+
280
+ describe('fuzzy matching', () => {
281
+ it('matches typos within distance', () => {
282
+ expect(
283
+ matchesWithOptions('configration', text, { fuzzyDistance: 2 }),
284
+ ).toBe(true)
285
+ expect(matchesWithOptions('faild', text, { fuzzyDistance: 1 })).toBe(
286
+ true,
287
+ )
288
+ })
289
+
290
+ it('does not match beyond distance', () => {
291
+ expect(
292
+ matchesWithOptions('configration', text, { fuzzyDistance: 0 }),
293
+ ).toBe(false)
294
+ })
295
+ })
296
+
297
+ describe('combined stem and fuzzy', () => {
298
+ it('matches with both options enabled', () => {
299
+ expect(
300
+ matchesWithOptions('failing', text, { stem: true, fuzzyDistance: 2 }),
301
+ ).toBe(true)
302
+ expect(
303
+ matchesWithOptions('configration', text, {
304
+ stem: true,
305
+ fuzzyDistance: 2,
306
+ }),
307
+ ).toBe(true)
308
+ })
309
+ })
310
+ })
311
+
312
+ describe('findMatchesInLine', () => {
313
+ const line = 'The configuration process failed during initialization'
314
+
315
+ it('finds exact matches', () => {
316
+ const matches = findMatchesInLine(['configuration', 'failed'], line)
317
+ expect(matches).toContain('configuration')
318
+ expect(matches).toContain('failed')
319
+ })
320
+
321
+ it('returns unique matches (no duplicates)', () => {
322
+ const matches = findMatchesInLine(['the', 'the', 'the'], line)
323
+ expect(matches.filter((m) => m === 'the').length).toBe(1)
324
+ })
325
+
326
+ it('finds stemmed matches', () => {
327
+ const matches = findMatchesInLine(['fail', 'configure'], line, {
328
+ stem: true,
329
+ })
330
+ expect(matches).toContain('failed')
331
+ expect(matches).toContain('configuration')
332
+ })
333
+
334
+ it('finds fuzzy matches', () => {
335
+ const matches = findMatchesInLine(['configration'], line, {
336
+ fuzzyDistance: 2,
337
+ })
338
+ expect(matches).toContain('configuration')
339
+ })
340
+
341
+ it('handles empty query words', () => {
342
+ expect(findMatchesInLine([], line)).toEqual([])
343
+ })
344
+
345
+ it('handles empty line', () => {
346
+ expect(findMatchesInLine(['test'], '')).toEqual([])
347
+ })
348
+
349
+ it('is case-insensitive', () => {
350
+ const matches = findMatchesInLine(['CONFIGURATION', 'FAILED'], line)
351
+ expect(matches).toContain('configuration')
352
+ expect(matches).toContain('failed')
353
+ })
354
+ })
355
+
356
+ // ============================================================================
357
+ // Highlight Pattern Tests
358
+ // ============================================================================
359
+
360
+ describe('buildFuzzyHighlightPattern', () => {
361
+ it('builds pattern for exact matching with word boundaries', () => {
362
+ const pattern = buildFuzzyHighlightPattern('hello')
363
+ // Pattern matches within text
364
+ expect(pattern.test('say hello there')).toBe(true)
365
+ })
366
+
367
+ it('builds pattern for stemmed matching', () => {
368
+ const pattern = buildFuzzyHighlightPattern('fail', { stem: true })
369
+ // Stemmed pattern matches words starting with the stem 'fail'
370
+ expect(pattern.test('it will fail')).toBe(true)
371
+ })
372
+
373
+ it('escapes regex special characters', () => {
374
+ const pattern = buildFuzzyHighlightPattern('foo.bar')
375
+ expect(pattern.test('use foo.bar here')).toBe(true)
376
+ expect(pattern.test('use fooXbar here')).toBe(false)
377
+ })
378
+
379
+ it('returns non-matching pattern for empty query', () => {
380
+ const pattern = buildFuzzyHighlightPattern('')
381
+ expect(pattern.test('anything')).toBe(false)
382
+ })
383
+
384
+ it('matches word boundaries', () => {
385
+ const pattern = buildFuzzyHighlightPattern('test')
386
+ expect(pattern.test('run the test now')).toBe(true)
387
+ })
388
+ })
389
+
390
+ // ============================================================================
391
+ // Edge Cases
392
+ // ============================================================================
393
+
394
+ describe('edge cases', () => {
395
+ it('handles special characters in text', () => {
396
+ const text = 'function() { return true; }'
397
+ expect(matchesWithOptions('function', text)).toBe(true)
398
+ expect(matchesWithOptions('return', text)).toBe(true)
399
+ expect(matchesWithOptions('true', text)).toBe(true)
400
+ })
401
+
402
+ it('handles numeric content', () => {
403
+ const text = 'version 1.2.3 released on 2024'
404
+ expect(matchesWithOptions('version', text)).toBe(true)
405
+ expect(matchesWithOptions('2024', text)).toBe(true)
406
+ })
407
+
408
+ it('handles very long words', () => {
409
+ const longWord = 'a'.repeat(100)
410
+ expect(stem(longWord)).toBeDefined()
411
+ expect(levenshteinDistance(longWord, longWord)).toBe(0)
412
+ })
413
+
414
+ it('handles unicode text', () => {
415
+ const text = '日本語 configuration 中文'
416
+ expect(matchesWithOptions('configuration', text)).toBe(true)
417
+ })
418
+ })
419
+ })
@@ -0,0 +1,273 @@
1
+ /**
2
+ * Fuzzy Search Utilities
3
+ *
4
+ * Provides stemming and fuzzy matching capabilities for search:
5
+ * - Porter stemmer for word normalization (fail -> fail, failure -> failur)
6
+ * - Levenshtein distance for typo tolerance
7
+ */
8
+
9
+ import { stemmer } from 'stemmer'
10
+
11
+ // ============================================================================
12
+ // Stemming
13
+ // ============================================================================
14
+
15
+ /**
16
+ * Apply Porter stemmer to a word
17
+ */
18
+ export const stem = (word: string): string => {
19
+ return stemmer(word.toLowerCase())
20
+ }
21
+
22
+ /**
23
+ * Stem all words in a text
24
+ */
25
+ export const stemText = (text: string): string[] => {
26
+ return text
27
+ .toLowerCase()
28
+ .split(/[\W_]+/)
29
+ .filter((word) => word.length > 0)
30
+ .map((word) => stem(word))
31
+ }
32
+
33
+ /**
34
+ * Get unique stems from text
35
+ */
36
+ export const getStems = (text: string): Set<string> => {
37
+ return new Set(stemText(text))
38
+ }
39
+
40
+ // ============================================================================
41
+ // Fuzzy Matching (Levenshtein Distance)
42
+ // ============================================================================
43
+
44
+ /**
45
+ * Calculate Levenshtein distance between two strings
46
+ */
47
+ export const levenshteinDistance = (a: string, b: string): number => {
48
+ const matrix: number[][] = []
49
+
50
+ // Initialize first column
51
+ for (let i = 0; i <= a.length; i++) {
52
+ matrix[i] = [i]
53
+ }
54
+
55
+ // Initialize first row
56
+ for (let j = 0; j <= b.length; j++) {
57
+ matrix[0]![j] = j
58
+ }
59
+
60
+ // Fill in the rest
61
+ for (let i = 1; i <= a.length; i++) {
62
+ for (let j = 1; j <= b.length; j++) {
63
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1
64
+ matrix[i]![j] = Math.min(
65
+ matrix[i - 1]![j]! + 1, // deletion
66
+ matrix[i]![j - 1]! + 1, // insertion
67
+ matrix[i - 1]![j - 1]! + cost, // substitution
68
+ )
69
+ }
70
+ }
71
+
72
+ return matrix[a.length]![b.length]!
73
+ }
74
+
75
+ /**
76
+ * Check if two words are fuzzy matches within a given distance
77
+ */
78
+ export const isFuzzyMatch = (
79
+ word1: string,
80
+ word2: string,
81
+ maxDistance: number = 2,
82
+ ): boolean => {
83
+ // Quick length check - can't be a match if lengths differ too much
84
+ if (Math.abs(word1.length - word2.length) > maxDistance) {
85
+ return false
86
+ }
87
+
88
+ return (
89
+ levenshteinDistance(word1.toLowerCase(), word2.toLowerCase()) <= maxDistance
90
+ )
91
+ }
92
+
93
+ /**
94
+ * Find fuzzy matches for a word in a list of words
95
+ */
96
+ export const findFuzzyMatches = (
97
+ query: string,
98
+ words: readonly string[],
99
+ maxDistance: number = 2,
100
+ ): string[] => {
101
+ const lowerQuery = query.toLowerCase()
102
+ return words.filter((word) =>
103
+ isFuzzyMatch(lowerQuery, word.toLowerCase(), maxDistance),
104
+ )
105
+ }
106
+
107
+ // ============================================================================
108
+ // Combined Matching Options
109
+ // ============================================================================
110
+
111
+ export interface MatchOptions {
112
+ /** Use stemming for word matching */
113
+ readonly stem?: boolean | undefined
114
+ /** Use fuzzy matching with this max edit distance */
115
+ readonly fuzzyDistance?: number | undefined
116
+ }
117
+
118
+ /**
119
+ * Check if query matches text with stemming and/or fuzzy matching
120
+ */
121
+ export const matchesWithOptions = (
122
+ query: string,
123
+ text: string,
124
+ options: MatchOptions = {},
125
+ ): boolean => {
126
+ const { stem: useStemming, fuzzyDistance } = options
127
+
128
+ // Get words from query and text
129
+ const queryWords = query
130
+ .toLowerCase()
131
+ .split(/[\W_]+/)
132
+ .filter((w) => w.length > 0)
133
+ const textWords = text
134
+ .toLowerCase()
135
+ .split(/[\W_]+/)
136
+ .filter((w) => w.length > 0)
137
+
138
+ if (queryWords.length === 0) {
139
+ return true // Empty query matches everything
140
+ }
141
+
142
+ // For each query word, check if it matches any text word
143
+ for (const queryWord of queryWords) {
144
+ let found = false
145
+
146
+ for (const textWord of textWords) {
147
+ // Exact match (case-insensitive)
148
+ if (textWord === queryWord) {
149
+ found = true
150
+ break
151
+ }
152
+
153
+ // Stemming match
154
+ if (useStemming) {
155
+ if (stem(textWord) === stem(queryWord)) {
156
+ found = true
157
+ break
158
+ }
159
+ }
160
+
161
+ // Fuzzy match
162
+ if (fuzzyDistance !== undefined && fuzzyDistance > 0) {
163
+ if (isFuzzyMatch(textWord, queryWord, fuzzyDistance)) {
164
+ found = true
165
+ break
166
+ }
167
+ }
168
+ }
169
+
170
+ if (!found) {
171
+ return false // All query words must match
172
+ }
173
+ }
174
+
175
+ return true
176
+ }
177
+
178
+ /**
179
+ * Check if a line contains a match using stemming/fuzzy options
180
+ * Returns the matching word(s) if found
181
+ *
182
+ * Uses Set for O(1) duplicate checking instead of array.includes() O(k)
183
+ */
184
+ export const findMatchesInLine = (
185
+ queryWords: readonly string[],
186
+ line: string,
187
+ options: MatchOptions = {},
188
+ ): string[] => {
189
+ const { stem: useStemming, fuzzyDistance } = options
190
+ const matchesSet = new Set<string>()
191
+
192
+ const lineWords = line
193
+ .toLowerCase()
194
+ .split(/[\W_]+/)
195
+ .filter((w) => w.length > 0)
196
+
197
+ for (const queryWord of queryWords) {
198
+ const queryLower = queryWord.toLowerCase()
199
+ const queryStem = useStemming ? stem(queryWord) : null
200
+
201
+ for (const lineWord of lineWords) {
202
+ // Skip if already matched (O(1) lookup)
203
+ if (matchesSet.has(lineWord)) {
204
+ continue
205
+ }
206
+
207
+ // Exact match
208
+ if (lineWord === queryLower) {
209
+ matchesSet.add(lineWord)
210
+ continue
211
+ }
212
+
213
+ // Stemming match
214
+ if (queryStem && stem(lineWord) === queryStem) {
215
+ matchesSet.add(lineWord)
216
+ continue
217
+ }
218
+
219
+ // Fuzzy match
220
+ if (
221
+ fuzzyDistance !== undefined &&
222
+ fuzzyDistance > 0 &&
223
+ isFuzzyMatch(lineWord, queryLower, fuzzyDistance)
224
+ ) {
225
+ matchesSet.add(lineWord)
226
+ }
227
+ }
228
+ }
229
+
230
+ return Array.from(matchesSet)
231
+ }
232
+
233
+ /**
234
+ * Build a regex pattern that matches stemmed variations of query terms
235
+ * For highlighting purposes
236
+ */
237
+ export const buildFuzzyHighlightPattern = (
238
+ query: string,
239
+ options: MatchOptions = {},
240
+ ): RegExp => {
241
+ const { stem: useStemming } = options
242
+
243
+ const queryWords = query
244
+ .toLowerCase()
245
+ .split(/[\W_]+/)
246
+ .filter((w) => w.length > 0)
247
+
248
+ if (queryWords.length === 0) {
249
+ return /.^/ // Match nothing
250
+ }
251
+
252
+ // Build patterns for each query word
253
+ const patterns: string[] = []
254
+
255
+ for (const word of queryWords) {
256
+ // Escape special regex chars
257
+ const escaped = word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
258
+
259
+ if (useStemming) {
260
+ // Match words that share the same stem
261
+ // We do this by matching the stem as a prefix followed by optional suffix
262
+ const wordStem = stem(word)
263
+ const escapedStem = wordStem.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
264
+ // Match the stem followed by common suffixes
265
+ patterns.push(`\\b${escapedStem}\\w*\\b`)
266
+ } else {
267
+ // Exact word match
268
+ patterns.push(`\\b${escaped}\\b`)
269
+ }
270
+ }
271
+
272
+ return new RegExp(patterns.join('|'), 'gi')
273
+ }