mdcontext 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. package/.changeset/config.json +9 -9
  2. package/.claude/settings.local.json +25 -0
  3. package/.github/workflows/claude-code-review.yml +44 -0
  4. package/.github/workflows/claude.yml +85 -0
  5. package/CONTRIBUTING.md +186 -0
  6. package/NOTES/NOTES +44 -0
  7. package/README.md +206 -3
  8. package/biome.json +1 -1
  9. package/dist/chunk-23UPXDNL.js +3044 -0
  10. package/dist/chunk-2W7MO2DL.js +1366 -0
  11. package/dist/chunk-3NUAZGMA.js +1689 -0
  12. package/dist/chunk-7TOWB2XB.js +366 -0
  13. package/dist/chunk-7XOTOADQ.js +3065 -0
  14. package/dist/chunk-AH2PDM2K.js +3042 -0
  15. package/dist/chunk-BNXWSZ63.js +3742 -0
  16. package/dist/chunk-BTL5DJVU.js +3222 -0
  17. package/dist/chunk-HDHYG7E4.js +104 -0
  18. package/dist/chunk-HLR4KZBP.js +3234 -0
  19. package/dist/chunk-IP3FRFEB.js +1045 -0
  20. package/dist/chunk-KHU56VDO.js +3042 -0
  21. package/dist/chunk-KRYIFLQR.js +85 -89
  22. package/dist/chunk-LBSDNLEM.js +287 -0
  23. package/dist/chunk-MNTQ7HCP.js +2643 -0
  24. package/dist/chunk-MUJELQQ6.js +1387 -0
  25. package/dist/chunk-MXJGMSLV.js +2199 -0
  26. package/dist/chunk-N6QJGC3Z.js +2636 -0
  27. package/dist/chunk-OBELGBPM.js +1713 -0
  28. package/dist/chunk-OT7R5XTA.js +3192 -0
  29. package/dist/chunk-P7X4RA2T.js +106 -0
  30. package/dist/chunk-PIDUQNC2.js +3185 -0
  31. package/dist/chunk-POGCDIH4.js +3187 -0
  32. package/dist/chunk-PSIEOQGZ.js +3043 -0
  33. package/dist/chunk-PVRT3IHA.js +3238 -0
  34. package/dist/chunk-QNN4TT23.js +1430 -0
  35. package/dist/chunk-RE3R45RJ.js +3042 -0
  36. package/dist/chunk-S7E6TFX6.js +718 -657
  37. package/dist/chunk-SG6GLU4U.js +1378 -0
  38. package/dist/chunk-SJCDV2ST.js +274 -0
  39. package/dist/chunk-SYE5XLF3.js +104 -0
  40. package/dist/chunk-T5VLYBZD.js +103 -0
  41. package/dist/chunk-TOQB7VWU.js +3238 -0
  42. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  43. package/dist/chunk-VVTGZNBT.js +1533 -1423
  44. package/dist/chunk-W7Q4RFEV.js +104 -0
  45. package/dist/chunk-XTYYVRLO.js +3190 -0
  46. package/dist/chunk-Y6MDYVJD.js +3063 -0
  47. package/dist/cli/main.js +4072 -629
  48. package/dist/index.d.ts +420 -33
  49. package/dist/index.js +8 -15
  50. package/dist/mcp/server.js +103 -7
  51. package/dist/schema-BAWSG7KY.js +22 -0
  52. package/dist/schema-E3QUPL26.js +20 -0
  53. package/dist/schema-EHL7WUT6.js +20 -0
  54. package/docs/019-USAGE.md +44 -5
  55. package/docs/020-current-implementation.md +8 -8
  56. package/docs/021-DOGFOODING-FINDINGS.md +1 -1
  57. package/docs/CONFIG.md +1123 -0
  58. package/docs/ERRORS.md +383 -0
  59. package/docs/summarization.md +320 -0
  60. package/justfile +40 -0
  61. package/package.json +39 -33
  62. package/research/INDEX.md +315 -0
  63. package/research/code-review/README.md +90 -0
  64. package/research/code-review/cli-error-handling-review.md +979 -0
  65. package/research/code-review/code-review-validation-report.md +464 -0
  66. package/research/code-review/main-ts-review.md +1128 -0
  67. package/research/config-docs/SUMMARY.md +357 -0
  68. package/research/config-docs/TEST-RESULTS.md +776 -0
  69. package/research/config-docs/TODO.md +542 -0
  70. package/research/config-docs/analysis.md +744 -0
  71. package/research/config-docs/fix-validation.md +502 -0
  72. package/research/config-docs/help-audit.md +264 -0
  73. package/research/config-docs/help-system-analysis.md +890 -0
  74. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  75. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  76. package/research/issue-review.md +603 -0
  77. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  78. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  79. package/research/llm-summarization/anthropic-2026.md +367 -0
  80. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  81. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  82. package/research/llm-summarization/openai-2026.md +473 -0
  83. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  84. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  85. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  86. package/research/llm-summarization/prototype-results.md +56 -0
  87. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  88. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  89. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  90. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  91. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  92. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  93. package/research/mdcontext-pudding/02-search.md +970 -0
  94. package/research/mdcontext-pudding/03-context.md +779 -0
  95. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  96. package/research/mdcontext-pudding/04-tree.md +704 -0
  97. package/research/mdcontext-pudding/05-config.md +1038 -0
  98. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  99. package/research/mdcontext-pudding/06-links.md +679 -0
  100. package/research/mdcontext-pudding/07-stats.md +693 -0
  101. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  102. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  103. package/research/mdcontext-pudding/README.md +168 -0
  104. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  105. package/research/research-quality-review.md +834 -0
  106. package/research/semantic-search/embedding-text-analysis.md +156 -0
  107. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  108. package/research/semantic-search/query-processing-analysis.md +207 -0
  109. package/research/semantic-search/root-cause-and-solution.md +114 -0
  110. package/research/semantic-search/threshold-validation-report.md +69 -0
  111. package/research/semantic-search/vector-search-analysis.md +63 -0
  112. package/research/test-path-issues.md +276 -0
  113. package/review/ALP-76/1-error-type-design.md +962 -0
  114. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  115. package/review/ALP-76/3-error-presentation.md +624 -0
  116. package/review/ALP-76/4-test-coverage.md +625 -0
  117. package/review/ALP-76/5-migration-completeness.md +440 -0
  118. package/review/ALP-76/6-effect-best-practices.md +755 -0
  119. package/scripts/apply-branch-protection.sh +47 -0
  120. package/scripts/branch-protection-templates.json +79 -0
  121. package/scripts/prototype-summarization.ts +346 -0
  122. package/scripts/rebuild-hnswlib.js +32 -37
  123. package/scripts/setup-branch-protection.sh +64 -0
  124. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  125. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  126. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  127. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  128. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  129. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  130. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  131. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  132. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  133. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  134. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  135. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  136. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  137. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  138. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  139. package/src/cli/argv-preprocessor.test.ts +2 -2
  140. package/src/cli/cli.test.ts +230 -33
  141. package/src/cli/commands/config-cmd.ts +642 -0
  142. package/src/cli/commands/context.ts +97 -9
  143. package/src/cli/commands/duplicates.ts +122 -0
  144. package/src/cli/commands/embeddings.ts +529 -0
  145. package/src/cli/commands/index-cmd.ts +210 -30
  146. package/src/cli/commands/index.ts +3 -0
  147. package/src/cli/commands/search.ts +894 -64
  148. package/src/cli/commands/stats.ts +3 -0
  149. package/src/cli/commands/tree.ts +26 -5
  150. package/src/cli/config-layer.ts +176 -0
  151. package/src/cli/error-handler.test.ts +235 -0
  152. package/src/cli/error-handler.ts +655 -0
  153. package/src/cli/flag-schemas.ts +66 -0
  154. package/src/cli/help.ts +209 -7
  155. package/src/cli/main.ts +348 -58
  156. package/src/cli/options.ts +10 -0
  157. package/src/cli/shared-error-handling.ts +199 -0
  158. package/src/cli/utils.ts +150 -17
  159. package/src/config/file-provider.test.ts +320 -0
  160. package/src/config/file-provider.ts +273 -0
  161. package/src/config/index.ts +72 -0
  162. package/src/config/integration.test.ts +667 -0
  163. package/src/config/precedence.test.ts +277 -0
  164. package/src/config/precedence.ts +451 -0
  165. package/src/config/schema.test.ts +414 -0
  166. package/src/config/schema.ts +603 -0
  167. package/src/config/service.test.ts +320 -0
  168. package/src/config/service.ts +243 -0
  169. package/src/config/testing.test.ts +264 -0
  170. package/src/config/testing.ts +110 -0
  171. package/src/core/types.ts +6 -33
  172. package/src/duplicates/detector.test.ts +183 -0
  173. package/src/duplicates/detector.ts +414 -0
  174. package/src/duplicates/index.ts +18 -0
  175. package/src/embeddings/embedding-namespace.test.ts +300 -0
  176. package/src/embeddings/embedding-namespace.ts +947 -0
  177. package/src/embeddings/heading-boost.test.ts +222 -0
  178. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  179. package/src/embeddings/hyde.test.ts +272 -0
  180. package/src/embeddings/hyde.ts +264 -0
  181. package/src/embeddings/index.ts +2 -0
  182. package/src/embeddings/openai-provider.ts +332 -83
  183. package/src/embeddings/pricing.json +22 -0
  184. package/src/embeddings/provider-constants.ts +204 -0
  185. package/src/embeddings/provider-errors.test.ts +967 -0
  186. package/src/embeddings/provider-errors.ts +565 -0
  187. package/src/embeddings/provider-factory.test.ts +240 -0
  188. package/src/embeddings/provider-factory.ts +225 -0
  189. package/src/embeddings/provider-integration.test.ts +788 -0
  190. package/src/embeddings/query-preprocessing.test.ts +187 -0
  191. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  192. package/src/embeddings/semantic-search.ts +780 -93
  193. package/src/embeddings/types.ts +293 -16
  194. package/src/embeddings/vector-store.ts +486 -77
  195. package/src/embeddings/voyage-provider.ts +313 -0
  196. package/src/errors/errors.test.ts +845 -0
  197. package/src/errors/index.ts +533 -0
  198. package/src/index/ignore-patterns.test.ts +354 -0
  199. package/src/index/ignore-patterns.ts +305 -0
  200. package/src/index/indexer.ts +286 -48
  201. package/src/index/storage.ts +94 -30
  202. package/src/index/types.ts +40 -2
  203. package/src/index/watcher.ts +67 -9
  204. package/src/index.ts +22 -0
  205. package/src/integration/search-keyword.test.ts +678 -0
  206. package/src/mcp/server.ts +135 -6
  207. package/src/parser/parser.ts +18 -19
  208. package/src/parser/section-filter.test.ts +277 -0
  209. package/src/parser/section-filter.ts +125 -3
  210. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  211. package/src/search/bm25-store.ts +366 -0
  212. package/src/search/cross-encoder.test.ts +253 -0
  213. package/src/search/cross-encoder.ts +406 -0
  214. package/src/search/fuzzy-search.test.ts +419 -0
  215. package/src/search/fuzzy-search.ts +273 -0
  216. package/src/search/hybrid-search.ts +448 -0
  217. package/src/search/path-matcher.test.ts +276 -0
  218. package/src/search/path-matcher.ts +33 -0
  219. package/src/search/searcher.test.ts +99 -1
  220. package/src/search/searcher.ts +189 -67
  221. package/src/search/wink-bm25.d.ts +30 -0
  222. package/src/summarization/cli-providers/claude.ts +202 -0
  223. package/src/summarization/cli-providers/detection.test.ts +273 -0
  224. package/src/summarization/cli-providers/detection.ts +118 -0
  225. package/src/summarization/cli-providers/index.ts +8 -0
  226. package/src/summarization/cost.test.ts +139 -0
  227. package/src/summarization/cost.ts +102 -0
  228. package/src/summarization/error-handler.test.ts +127 -0
  229. package/src/summarization/error-handler.ts +111 -0
  230. package/src/summarization/index.ts +102 -0
  231. package/src/summarization/pipeline.test.ts +498 -0
  232. package/src/summarization/pipeline.ts +231 -0
  233. package/src/summarization/prompts.test.ts +269 -0
  234. package/src/summarization/prompts.ts +133 -0
  235. package/src/summarization/provider-factory.test.ts +396 -0
  236. package/src/summarization/provider-factory.ts +178 -0
  237. package/src/summarization/types.ts +184 -0
  238. package/src/summarize/summarizer.ts +104 -35
  239. package/src/types/huggingface-transformers.d.ts +66 -0
  240. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  241. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  242. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  243. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
  244. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
  245. package/tests/integration/embed-index.test.ts +712 -0
  246. package/tests/integration/search-context.test.ts +469 -0
  247. package/tests/integration/search-semantic.test.ts +522 -0
  248. package/vitest.config.ts +1 -6
  249. package/AGENTS.md +0 -46
  250. package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
  251. package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
@@ -0,0 +1,276 @@
1
+ /**
2
+ * Tests for path-matcher utilities
3
+ */
4
+
5
+ import { describe, expect, it } from 'vitest'
6
+ import { matchPath } from './path-matcher.js'
7
+
8
+ describe('path-matcher', () => {
9
+ describe('matchPath', () => {
10
+ describe('basic patterns', () => {
11
+ it('matches exact paths', () => {
12
+ expect(matchPath('docs/readme.md', 'docs/readme.md')).toBe(true)
13
+ expect(matchPath('src/index.ts', 'src/index.ts')).toBe(true)
14
+ })
15
+
16
+ it('does not match different paths', () => {
17
+ expect(matchPath('docs/readme.md', 'src/readme.md')).toBe(false)
18
+ expect(matchPath('docs/readme.md', 'docs/other.md')).toBe(false)
19
+ })
20
+
21
+ it('is case-insensitive', () => {
22
+ expect(matchPath('docs/README.md', 'docs/readme.md')).toBe(true)
23
+ expect(matchPath('DOCS/readme.md', 'docs/readme.md')).toBe(true)
24
+ expect(matchPath('docs/readme.MD', 'docs/readme.md')).toBe(true)
25
+ })
26
+ })
27
+
28
+ describe('asterisk wildcard (*)', () => {
29
+ it('matches any characters within filename', () => {
30
+ expect(matchPath('docs/readme.md', 'docs/*.md')).toBe(true)
31
+ expect(matchPath('docs/guide.md', 'docs/*.md')).toBe(true)
32
+ expect(matchPath('docs/api-reference.md', 'docs/*.md')).toBe(true)
33
+ })
34
+
35
+ it('matches empty string with asterisk', () => {
36
+ expect(matchPath('docs/.md', 'docs/*.md')).toBe(true)
37
+ })
38
+
39
+ it('matches patterns at start of path', () => {
40
+ expect(matchPath('src/index.ts', '*/index.ts')).toBe(true)
41
+ expect(matchPath('lib/index.ts', '*/index.ts')).toBe(true)
42
+ })
43
+
44
+ it('matches patterns in middle of path', () => {
45
+ expect(matchPath('src/utils/index.ts', 'src/*/index.ts')).toBe(true)
46
+ expect(matchPath('src/helpers/index.ts', 'src/*/index.ts')).toBe(true)
47
+ })
48
+
49
+ it('matches multiple wildcards', () => {
50
+ expect(matchPath('src/utils/test.ts', '*/*/*.ts')).toBe(true)
51
+ expect(matchPath('a/b/c.ts', '*/*/*.ts')).toBe(true)
52
+ })
53
+
54
+ it('single asterisk does NOT match directory separators', () => {
55
+ // Standard glob semantics: * matches within a segment only
56
+ expect(matchPath('file.md', '*.md')).toBe(true)
57
+ expect(matchPath('dir/file.md', '*.md')).toBe(false) // * doesn't match /
58
+ expect(matchPath('deeply/nested/path/file.md', '*')).toBe(false)
59
+ expect(matchPath('a/b/c.ts', '*.ts')).toBe(false)
60
+ })
61
+
62
+ it('does not match nested paths with single asterisk', () => {
63
+ expect(matchPath('docs/nested/api.md', 'docs/*.md')).toBe(false)
64
+ expect(matchPath('src/sub/file.ts', 'src/*.ts')).toBe(false)
65
+ })
66
+ })
67
+
68
+ describe('double asterisk wildcard (**)', () => {
69
+ it('matches across directory separators', () => {
70
+ expect(matchPath('deeply/nested/path/file.md', '**')).toBe(true)
71
+ expect(matchPath('a/b/c.ts', '**.ts')).toBe(true)
72
+ expect(matchPath('a/b/c.ts', '**/*.ts')).toBe(true)
73
+ })
74
+
75
+ it('matches nested paths recursively', () => {
76
+ expect(matchPath('docs/nested/api.md', 'docs/**/*.md')).toBe(true)
77
+ expect(matchPath('docs/deeply/nested/file.md', 'docs/**/*.md')).toBe(
78
+ true,
79
+ )
80
+ expect(matchPath('src/a/b/c/file.ts', 'src/**/*.ts')).toBe(true)
81
+ })
82
+
83
+ it('matches at beginning of pattern', () => {
84
+ expect(matchPath('any/path/to/file.md', '**/*.md')).toBe(true)
85
+ // Note: **/*.md requires at least one /; for root files use *.md or **.md
86
+ expect(matchPath('file.md', '**.md')).toBe(true)
87
+ expect(matchPath('file.md', '*.md')).toBe(true)
88
+ })
89
+ })
90
+
91
+ describe('question mark wildcard (?)', () => {
92
+ it('matches exactly one character', () => {
93
+ expect(matchPath('file1.md', 'file?.md')).toBe(true)
94
+ expect(matchPath('fileA.md', 'file?.md')).toBe(true)
95
+ expect(matchPath('file-.md', 'file?.md')).toBe(true)
96
+ })
97
+
98
+ it('does not match zero characters', () => {
99
+ expect(matchPath('file.md', 'file?.md')).toBe(false)
100
+ })
101
+
102
+ it('does not match multiple characters', () => {
103
+ expect(matchPath('file12.md', 'file?.md')).toBe(false)
104
+ expect(matchPath('fileABC.md', 'file?.md')).toBe(false)
105
+ })
106
+
107
+ it('matches multiple question marks', () => {
108
+ expect(matchPath('file12.md', 'file??.md')).toBe(true)
109
+ expect(matchPath('fileAB.md', 'file??.md')).toBe(true)
110
+ expect(matchPath('file1.md', 'file??.md')).toBe(false)
111
+ })
112
+
113
+ it('can be combined with asterisk', () => {
114
+ expect(matchPath('v1/readme.md', 'v?/*.md')).toBe(true)
115
+ expect(matchPath('v2/guide.md', 'v?/*.md')).toBe(true)
116
+ expect(matchPath('v10/readme.md', 'v?/*.md')).toBe(false)
117
+ })
118
+
119
+ it('does not match directory separators', () => {
120
+ expect(matchPath('a/b', 'a?b')).toBe(false) // ? should not match /
121
+ })
122
+ })
123
+
124
+ describe('dot handling', () => {
125
+ it('treats dot as literal character', () => {
126
+ expect(matchPath('file.md', 'file.md')).toBe(true)
127
+ expect(matchPath('fileXmd', 'file.md')).toBe(false)
128
+ })
129
+
130
+ it('escapes dots in patterns correctly', () => {
131
+ expect(matchPath('src.utils.index.ts', 'src.utils.index.ts')).toBe(true)
132
+ expect(matchPath('srcXutilsXindexXts', 'src.utils.index.ts')).toBe(
133
+ false,
134
+ )
135
+ })
136
+
137
+ it('matches file extensions correctly', () => {
138
+ expect(matchPath('readme.md', '*.md')).toBe(true)
139
+ expect(matchPath('readme.markdown', '*.md')).toBe(false)
140
+ expect(matchPath('readmeXmd', '*.md')).toBe(false)
141
+ })
142
+ })
143
+
144
+ describe('special regex characters', () => {
145
+ it('handles paths with special characters', () => {
146
+ // The path-matcher now escapes all regex special chars
147
+ expect(matchPath('file.test.md', 'file.test.md')).toBe(true)
148
+ })
149
+
150
+ it('handles patterns with multiple dots', () => {
151
+ expect(matchPath('package.config.json', '*.config.json')).toBe(true)
152
+ expect(matchPath('app.module.ts', '*.module.ts')).toBe(true)
153
+ })
154
+
155
+ it('treats parentheses as literal characters', () => {
156
+ expect(matchPath('file(1).md', 'file(1).md')).toBe(true)
157
+ expect(matchPath('file1.md', 'file(1).md')).toBe(false)
158
+ })
159
+
160
+ it('treats square brackets as literal characters', () => {
161
+ expect(matchPath('[ab].md', '[ab].md')).toBe(true)
162
+ expect(matchPath('a.md', '[ab].md')).toBe(false)
163
+ expect(matchPath('b.md', '[ab].md')).toBe(false)
164
+ })
165
+
166
+ it('treats plus as literal character', () => {
167
+ expect(matchPath('C++.md', 'C++.md')).toBe(true)
168
+ expect(matchPath('C.md', 'C++.md')).toBe(false)
169
+ })
170
+
171
+ it('treats caret as literal character', () => {
172
+ expect(matchPath('test^2.md', 'test^2.md')).toBe(true)
173
+ expect(matchPath('test2.md', 'test^2.md')).toBe(false)
174
+ })
175
+
176
+ it('treats dollar sign as literal character', () => {
177
+ expect(matchPath('price$100.md', 'price$100.md')).toBe(true)
178
+ expect(matchPath('price100.md', 'price$100.md')).toBe(false)
179
+ })
180
+
181
+ it('treats curly braces as literal characters', () => {
182
+ expect(matchPath('obj{}.md', 'obj{}.md')).toBe(true)
183
+ expect(matchPath('obj.md', 'obj{}.md')).toBe(false)
184
+ })
185
+
186
+ it('treats pipe as literal character', () => {
187
+ expect(matchPath('a|b.md', 'a|b.md')).toBe(true)
188
+ expect(matchPath('a.md', 'a|b.md')).toBe(false)
189
+ })
190
+
191
+ it('treats backslash as literal character', () => {
192
+ expect(matchPath('path\\file.md', 'path\\file.md')).toBe(true)
193
+ expect(matchPath('pathfile.md', 'path\\file.md')).toBe(false)
194
+ })
195
+ })
196
+
197
+ describe('edge cases', () => {
198
+ it('matches empty path with empty pattern', () => {
199
+ expect(matchPath('', '')).toBe(true)
200
+ })
201
+
202
+ it('does not match non-empty path with empty pattern', () => {
203
+ expect(matchPath('file.md', '')).toBe(false)
204
+ })
205
+
206
+ it('does not match empty path with non-empty pattern', () => {
207
+ expect(matchPath('', 'file.md')).toBe(false)
208
+ })
209
+
210
+ it('matches only asterisk pattern', () => {
211
+ expect(matchPath('anything', '*')).toBe(true)
212
+ expect(matchPath('', '*')).toBe(true)
213
+ expect(matchPath('a/b/c', '*')).toBe(false) // * doesn't match /
214
+ expect(matchPath('a/b/c', '**')).toBe(true) // ** matches everything
215
+ })
216
+
217
+ it('matches only question mark pattern', () => {
218
+ expect(matchPath('a', '?')).toBe(true)
219
+ expect(matchPath('ab', '?')).toBe(false)
220
+ expect(matchPath('', '?')).toBe(false)
221
+ })
222
+
223
+ it('handles very long paths', () => {
224
+ const longPath = `${'a/'.repeat(50)}file.md`
225
+ const longPattern = `${'a/'.repeat(50)}*.md`
226
+ expect(matchPath(longPath, longPattern)).toBe(true)
227
+ })
228
+
229
+ it('handles paths with spaces', () => {
230
+ expect(matchPath('my docs/readme.md', 'my docs/*.md')).toBe(true)
231
+ expect(matchPath('path with spaces/file.md', '*/file.md')).toBe(true)
232
+ // Nested requires **
233
+ expect(matchPath('a/path with spaces/file.md', '*/file.md')).toBe(false)
234
+ expect(matchPath('a/path with spaces/file.md', '**/file.md')).toBe(true)
235
+ })
236
+
237
+ it('handles unicode characters', () => {
238
+ expect(matchPath('docs/日本語.md', 'docs/*.md')).toBe(true)
239
+ expect(matchPath('文档/readme.md', '*/readme.md')).toBe(true)
240
+ expect(matchPath('a/文档/readme.md', '**/readme.md')).toBe(true)
241
+ })
242
+ })
243
+
244
+ describe('real-world patterns', () => {
245
+ it('matches markdown files in docs folder', () => {
246
+ expect(matchPath('docs/readme.md', 'docs/*.md')).toBe(true)
247
+ expect(matchPath('docs/api.md', 'docs/*.md')).toBe(true)
248
+ // * doesn't match /, use ** for nested paths
249
+ expect(matchPath('docs/nested/api.md', 'docs/*.md')).toBe(false)
250
+ expect(matchPath('docs/nested/api.md', 'docs/**/*.md')).toBe(true)
251
+ })
252
+
253
+ it('matches typescript files in src', () => {
254
+ expect(matchPath('src/index.ts', 'src/*.ts')).toBe(true)
255
+ expect(matchPath('src/utils.ts', 'src/*.ts')).toBe(true)
256
+ // Nested requires **
257
+ expect(matchPath('src/nested/index.ts', 'src/*.ts')).toBe(false)
258
+ expect(matchPath('src/nested/index.ts', 'src/**/*.ts')).toBe(true)
259
+ })
260
+
261
+ it('matches test files', () => {
262
+ expect(matchPath('test.spec.ts', '*.spec.ts')).toBe(true)
263
+ expect(matchPath('utils.test.ts', '*.test.ts')).toBe(true)
264
+ // Nested requires **
265
+ expect(matchPath('src/utils.test.ts', '*.test.ts')).toBe(false)
266
+ expect(matchPath('src/utils.test.ts', '**/*.test.ts')).toBe(true)
267
+ })
268
+
269
+ it('matches config files', () => {
270
+ expect(matchPath('tsconfig.json', '*.json')).toBe(true)
271
+ expect(matchPath('package.json', 'package.json')).toBe(true)
272
+ expect(matchPath('.eslintrc.json', '*.json')).toBe(true)
273
+ })
274
+ })
275
+ })
276
+ })
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Path matching utilities for search filtering.
3
+ *
4
+ * Simple glob-like pattern matching for document paths.
5
+ */
6
+
7
+ /**
8
+ * Match a file path against a glob-like pattern.
9
+ *
10
+ * Supports:
11
+ * - `**` matches any characters including directory separators (recursive)
12
+ * - `*` matches any characters except directory separators (single segment)
13
+ * - `?` matches exactly one character (not directory separator)
14
+ * - `.` is treated literally
15
+ *
16
+ * @param filePath - The file path to test
17
+ * @param pattern - The glob pattern (e.g., "docs/*", "src/api/*.md", "src/** /*.ts")
18
+ * @returns True if the path matches the pattern
19
+ */
20
+ export const matchPath = (filePath: string, pattern: string): boolean => {
21
+ // Use a placeholder for ** to avoid it being processed by single * replacement
22
+ const DOUBLE_STAR_PLACEHOLDER = '__DOUBLE_STAR_MARKER__'
23
+
24
+ const regexPattern = pattern
25
+ .replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape all regex special chars first
26
+ .replace(/\*\*/g, DOUBLE_STAR_PLACEHOLDER) // Preserve ** before processing *
27
+ .replace(/\*/g, '[^/]*') // Single * doesn't match slashes
28
+ .replace(/\?/g, '[^/]') // ? matches any single non-slash char
29
+ .replace(new RegExp(DOUBLE_STAR_PLACEHOLDER, 'g'), '.*') // ** matches anything
30
+
31
+ const regex = new RegExp(`^${regexPattern}$`, 'i')
32
+ return regex.test(filePath)
33
+ }
@@ -7,7 +7,12 @@ import * as path from 'node:path'
7
7
  import { Effect } from 'effect'
8
8
  import { afterAll, beforeAll, describe, expect, it } from 'vitest'
9
9
  import { buildIndex } from '../index/indexer.js'
10
- import { formatContextForLLM, getContext, search } from './searcher.js'
10
+ import {
11
+ formatContextForLLM,
12
+ getContext,
13
+ search,
14
+ searchContent,
15
+ } from './searcher.js'
11
16
 
12
17
  // Test fixture directory
13
18
  const TEST_DIR = path.join(process.cwd(), 'tests', 'fixtures', 'search')
@@ -66,6 +71,22 @@ An overview of the document.
66
71
  `,
67
72
  )
68
73
 
74
+ // Create test file for fuzzy/stem search
75
+ await fs.writeFile(
76
+ path.join(TEST_DIR, 'stem-test.md'),
77
+ `# Failure Handling
78
+
79
+ When the application fails, it logs the failure message.
80
+ Failed operations are retried automatically.
81
+ Failing gracefully is important for user experience.
82
+
83
+ ## Configuration
84
+
85
+ The configration (typo) file is located at config.json.
86
+ Set the configuration options carefully.
87
+ `,
88
+ )
89
+
69
90
  // Build index
70
91
  await runEffect(buildIndex(TEST_DIR, { force: true }))
71
92
  })
@@ -179,4 +200,81 @@ An overview of the document.
179
200
  expect(formatted).toContain('[code]')
180
201
  })
181
202
  })
203
+
204
+ describe('searchContent() with fuzzy/stem matching', () => {
205
+ it('should match stemmed variations with --stem flag', async () => {
206
+ // Search for "fail" should match "fails", "failed", "failing", "failure"
207
+ const results = await runEffect(
208
+ searchContent(TEST_DIR, {
209
+ content: 'fail',
210
+ stem: true,
211
+ pathPattern: 'stem-test*',
212
+ }),
213
+ )
214
+ expect(results.length).toBe(1)
215
+ expect(results[0]?.section.heading).toBe('Failure Handling')
216
+ // Should have multiple line matches for different word forms
217
+ expect(results[0]?.matches?.length).toBeGreaterThan(1)
218
+ })
219
+
220
+ it('should match typos with --fuzzy flag', async () => {
221
+ // Search for "configration" (typo) should match "configuration"
222
+ const results = await runEffect(
223
+ searchContent(TEST_DIR, {
224
+ content: 'configration',
225
+ fuzzy: true,
226
+ pathPattern: 'stem-test*',
227
+ }),
228
+ )
229
+ expect(results.length).toBe(1)
230
+ expect(results[0]?.section.heading).toBe('Configuration')
231
+ // Should match both the typo line and the correct spelling line
232
+ expect(results[0]?.matches?.length).toBeGreaterThanOrEqual(1)
233
+ })
234
+
235
+ it('should respect fuzzyDistance option', async () => {
236
+ // With distance 1, "fail" should NOT match "file" (distance 2)
237
+ const strictResults = await runEffect(
238
+ searchContent(TEST_DIR, {
239
+ content: 'fail',
240
+ fuzzy: true,
241
+ fuzzyDistance: 1,
242
+ pathPattern: 'stem-test*',
243
+ }),
244
+ )
245
+ // With distance 1, only exact or 1-edit matches
246
+ const matchedWords = strictResults
247
+ .flatMap((r) => r.matches?.map((m) => m.line) ?? [])
248
+ .join(' ')
249
+ .toLowerCase()
250
+ // "fail" with distance 1 matches "fails" but not "file"
251
+ expect(matchedWords).toContain('fail')
252
+ })
253
+
254
+ it('should not match without fuzzy/stem flags', async () => {
255
+ // Exact search for "fail" should NOT match "failure" or "fails"
256
+ const results = await runEffect(
257
+ searchContent(TEST_DIR, {
258
+ content: 'fail',
259
+ pathPattern: 'stem-test*',
260
+ }),
261
+ )
262
+ // With exact search, "fail" appears as substring in "fails", "failure", "failing", "failed"
263
+ // so it still matches, but checks the regex-based behavior
264
+ expect(results.length).toBeGreaterThanOrEqual(1)
265
+ })
266
+
267
+ it('should combine fuzzy and stem matching', async () => {
268
+ // Both flags together should provide broader matching
269
+ const results = await runEffect(
270
+ searchContent(TEST_DIR, {
271
+ content: 'fail',
272
+ fuzzy: true,
273
+ stem: true,
274
+ pathPattern: 'stem-test*',
275
+ }),
276
+ )
277
+ expect(results.length).toBeGreaterThanOrEqual(1)
278
+ })
279
+ })
182
280
  })