mdcontext 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/config.json +9 -9
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +206 -3
- package/biome.json +1 -1
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +85 -89
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +718 -657
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1533 -1423
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.js +4072 -629
- package/dist/index.d.ts +420 -33
- package/dist/index.js +8 -15
- package/dist/mcp/server.js +103 -7
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +44 -5
- package/docs/020-current-implementation.md +8 -8
- package/docs/021-DOGFOODING-FINDINGS.md +1 -1
- package/docs/CONFIG.md +1123 -0
- package/docs/ERRORS.md +383 -0
- package/docs/summarization.md +320 -0
- package/justfile +40 -0
- package/package.json +39 -33
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +32 -37
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +2 -2
- package/src/cli/cli.test.ts +230 -33
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +97 -9
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +210 -30
- package/src/cli/commands/index.ts +3 -0
- package/src/cli/commands/search.ts +894 -64
- package/src/cli/commands/stats.ts +3 -0
- package/src/cli/commands/tree.ts +26 -5
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +66 -0
- package/src/cli/help.ts +209 -7
- package/src/cli/main.ts +348 -58
- package/src/cli/options.ts +10 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/utils.ts +150 -17
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/types.ts +6 -33
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +2 -0
- package/src/embeddings/openai-provider.ts +332 -83
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +780 -93
- package/src/embeddings/types.ts +293 -16
- package/src/embeddings/vector-store.ts +486 -77
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/indexer.ts +286 -48
- package/src/index/storage.ts +94 -30
- package/src/index/types.ts +40 -2
- package/src/index/watcher.ts +67 -9
- package/src/index.ts +22 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +135 -6
- package/src/parser/parser.ts +18 -19
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +125 -3
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/searcher.test.ts +99 -1
- package/src/search/searcher.ts +189 -67
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/summarizer.ts +104 -35
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/vitest.config.ts +1 -6
- package/AGENTS.md +0 -46
- package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for path-matcher utilities
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, expect, it } from 'vitest'
|
|
6
|
+
import { matchPath } from './path-matcher.js'
|
|
7
|
+
|
|
8
|
+
describe('path-matcher', () => {
|
|
9
|
+
describe('matchPath', () => {
|
|
10
|
+
describe('basic patterns', () => {
|
|
11
|
+
it('matches exact paths', () => {
|
|
12
|
+
expect(matchPath('docs/readme.md', 'docs/readme.md')).toBe(true)
|
|
13
|
+
expect(matchPath('src/index.ts', 'src/index.ts')).toBe(true)
|
|
14
|
+
})
|
|
15
|
+
|
|
16
|
+
it('does not match different paths', () => {
|
|
17
|
+
expect(matchPath('docs/readme.md', 'src/readme.md')).toBe(false)
|
|
18
|
+
expect(matchPath('docs/readme.md', 'docs/other.md')).toBe(false)
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
it('is case-insensitive', () => {
|
|
22
|
+
expect(matchPath('docs/README.md', 'docs/readme.md')).toBe(true)
|
|
23
|
+
expect(matchPath('DOCS/readme.md', 'docs/readme.md')).toBe(true)
|
|
24
|
+
expect(matchPath('docs/readme.MD', 'docs/readme.md')).toBe(true)
|
|
25
|
+
})
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
describe('asterisk wildcard (*)', () => {
|
|
29
|
+
it('matches any characters within filename', () => {
|
|
30
|
+
expect(matchPath('docs/readme.md', 'docs/*.md')).toBe(true)
|
|
31
|
+
expect(matchPath('docs/guide.md', 'docs/*.md')).toBe(true)
|
|
32
|
+
expect(matchPath('docs/api-reference.md', 'docs/*.md')).toBe(true)
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
it('matches empty string with asterisk', () => {
|
|
36
|
+
expect(matchPath('docs/.md', 'docs/*.md')).toBe(true)
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
it('matches patterns at start of path', () => {
|
|
40
|
+
expect(matchPath('src/index.ts', '*/index.ts')).toBe(true)
|
|
41
|
+
expect(matchPath('lib/index.ts', '*/index.ts')).toBe(true)
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
it('matches patterns in middle of path', () => {
|
|
45
|
+
expect(matchPath('src/utils/index.ts', 'src/*/index.ts')).toBe(true)
|
|
46
|
+
expect(matchPath('src/helpers/index.ts', 'src/*/index.ts')).toBe(true)
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
it('matches multiple wildcards', () => {
|
|
50
|
+
expect(matchPath('src/utils/test.ts', '*/*/*.ts')).toBe(true)
|
|
51
|
+
expect(matchPath('a/b/c.ts', '*/*/*.ts')).toBe(true)
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
it('single asterisk does NOT match directory separators', () => {
|
|
55
|
+
// Standard glob semantics: * matches within a segment only
|
|
56
|
+
expect(matchPath('file.md', '*.md')).toBe(true)
|
|
57
|
+
expect(matchPath('dir/file.md', '*.md')).toBe(false) // * doesn't match /
|
|
58
|
+
expect(matchPath('deeply/nested/path/file.md', '*')).toBe(false)
|
|
59
|
+
expect(matchPath('a/b/c.ts', '*.ts')).toBe(false)
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
it('does not match nested paths with single asterisk', () => {
|
|
63
|
+
expect(matchPath('docs/nested/api.md', 'docs/*.md')).toBe(false)
|
|
64
|
+
expect(matchPath('src/sub/file.ts', 'src/*.ts')).toBe(false)
|
|
65
|
+
})
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
describe('double asterisk wildcard (**)', () => {
|
|
69
|
+
it('matches across directory separators', () => {
|
|
70
|
+
expect(matchPath('deeply/nested/path/file.md', '**')).toBe(true)
|
|
71
|
+
expect(matchPath('a/b/c.ts', '**.ts')).toBe(true)
|
|
72
|
+
expect(matchPath('a/b/c.ts', '**/*.ts')).toBe(true)
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
it('matches nested paths recursively', () => {
|
|
76
|
+
expect(matchPath('docs/nested/api.md', 'docs/**/*.md')).toBe(true)
|
|
77
|
+
expect(matchPath('docs/deeply/nested/file.md', 'docs/**/*.md')).toBe(
|
|
78
|
+
true,
|
|
79
|
+
)
|
|
80
|
+
expect(matchPath('src/a/b/c/file.ts', 'src/**/*.ts')).toBe(true)
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
it('matches at beginning of pattern', () => {
|
|
84
|
+
expect(matchPath('any/path/to/file.md', '**/*.md')).toBe(true)
|
|
85
|
+
// Note: **/*.md requires at least one /; for root files use *.md or **.md
|
|
86
|
+
expect(matchPath('file.md', '**.md')).toBe(true)
|
|
87
|
+
expect(matchPath('file.md', '*.md')).toBe(true)
|
|
88
|
+
})
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
describe('question mark wildcard (?)', () => {
|
|
92
|
+
it('matches exactly one character', () => {
|
|
93
|
+
expect(matchPath('file1.md', 'file?.md')).toBe(true)
|
|
94
|
+
expect(matchPath('fileA.md', 'file?.md')).toBe(true)
|
|
95
|
+
expect(matchPath('file-.md', 'file?.md')).toBe(true)
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
it('does not match zero characters', () => {
|
|
99
|
+
expect(matchPath('file.md', 'file?.md')).toBe(false)
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
it('does not match multiple characters', () => {
|
|
103
|
+
expect(matchPath('file12.md', 'file?.md')).toBe(false)
|
|
104
|
+
expect(matchPath('fileABC.md', 'file?.md')).toBe(false)
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
it('matches multiple question marks', () => {
|
|
108
|
+
expect(matchPath('file12.md', 'file??.md')).toBe(true)
|
|
109
|
+
expect(matchPath('fileAB.md', 'file??.md')).toBe(true)
|
|
110
|
+
expect(matchPath('file1.md', 'file??.md')).toBe(false)
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
it('can be combined with asterisk', () => {
|
|
114
|
+
expect(matchPath('v1/readme.md', 'v?/*.md')).toBe(true)
|
|
115
|
+
expect(matchPath('v2/guide.md', 'v?/*.md')).toBe(true)
|
|
116
|
+
expect(matchPath('v10/readme.md', 'v?/*.md')).toBe(false)
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
it('does not match directory separators', () => {
|
|
120
|
+
expect(matchPath('a/b', 'a?b')).toBe(false) // ? should not match /
|
|
121
|
+
})
|
|
122
|
+
})
|
|
123
|
+
|
|
124
|
+
describe('dot handling', () => {
|
|
125
|
+
it('treats dot as literal character', () => {
|
|
126
|
+
expect(matchPath('file.md', 'file.md')).toBe(true)
|
|
127
|
+
expect(matchPath('fileXmd', 'file.md')).toBe(false)
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
it('escapes dots in patterns correctly', () => {
|
|
131
|
+
expect(matchPath('src.utils.index.ts', 'src.utils.index.ts')).toBe(true)
|
|
132
|
+
expect(matchPath('srcXutilsXindexXts', 'src.utils.index.ts')).toBe(
|
|
133
|
+
false,
|
|
134
|
+
)
|
|
135
|
+
})
|
|
136
|
+
|
|
137
|
+
it('matches file extensions correctly', () => {
|
|
138
|
+
expect(matchPath('readme.md', '*.md')).toBe(true)
|
|
139
|
+
expect(matchPath('readme.markdown', '*.md')).toBe(false)
|
|
140
|
+
expect(matchPath('readmeXmd', '*.md')).toBe(false)
|
|
141
|
+
})
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
describe('special regex characters', () => {
|
|
145
|
+
it('handles paths with special characters', () => {
|
|
146
|
+
// The path-matcher now escapes all regex special chars
|
|
147
|
+
expect(matchPath('file.test.md', 'file.test.md')).toBe(true)
|
|
148
|
+
})
|
|
149
|
+
|
|
150
|
+
it('handles patterns with multiple dots', () => {
|
|
151
|
+
expect(matchPath('package.config.json', '*.config.json')).toBe(true)
|
|
152
|
+
expect(matchPath('app.module.ts', '*.module.ts')).toBe(true)
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
it('treats parentheses as literal characters', () => {
|
|
156
|
+
expect(matchPath('file(1).md', 'file(1).md')).toBe(true)
|
|
157
|
+
expect(matchPath('file1.md', 'file(1).md')).toBe(false)
|
|
158
|
+
})
|
|
159
|
+
|
|
160
|
+
it('treats square brackets as literal characters', () => {
|
|
161
|
+
expect(matchPath('[ab].md', '[ab].md')).toBe(true)
|
|
162
|
+
expect(matchPath('a.md', '[ab].md')).toBe(false)
|
|
163
|
+
expect(matchPath('b.md', '[ab].md')).toBe(false)
|
|
164
|
+
})
|
|
165
|
+
|
|
166
|
+
it('treats plus as literal character', () => {
|
|
167
|
+
expect(matchPath('C++.md', 'C++.md')).toBe(true)
|
|
168
|
+
expect(matchPath('C.md', 'C++.md')).toBe(false)
|
|
169
|
+
})
|
|
170
|
+
|
|
171
|
+
it('treats caret as literal character', () => {
|
|
172
|
+
expect(matchPath('test^2.md', 'test^2.md')).toBe(true)
|
|
173
|
+
expect(matchPath('test2.md', 'test^2.md')).toBe(false)
|
|
174
|
+
})
|
|
175
|
+
|
|
176
|
+
it('treats dollar sign as literal character', () => {
|
|
177
|
+
expect(matchPath('price$100.md', 'price$100.md')).toBe(true)
|
|
178
|
+
expect(matchPath('price100.md', 'price$100.md')).toBe(false)
|
|
179
|
+
})
|
|
180
|
+
|
|
181
|
+
it('treats curly braces as literal characters', () => {
|
|
182
|
+
expect(matchPath('obj{}.md', 'obj{}.md')).toBe(true)
|
|
183
|
+
expect(matchPath('obj.md', 'obj{}.md')).toBe(false)
|
|
184
|
+
})
|
|
185
|
+
|
|
186
|
+
it('treats pipe as literal character', () => {
|
|
187
|
+
expect(matchPath('a|b.md', 'a|b.md')).toBe(true)
|
|
188
|
+
expect(matchPath('a.md', 'a|b.md')).toBe(false)
|
|
189
|
+
})
|
|
190
|
+
|
|
191
|
+
it('treats backslash as literal character', () => {
|
|
192
|
+
expect(matchPath('path\\file.md', 'path\\file.md')).toBe(true)
|
|
193
|
+
expect(matchPath('pathfile.md', 'path\\file.md')).toBe(false)
|
|
194
|
+
})
|
|
195
|
+
})
|
|
196
|
+
|
|
197
|
+
describe('edge cases', () => {
|
|
198
|
+
it('matches empty path with empty pattern', () => {
|
|
199
|
+
expect(matchPath('', '')).toBe(true)
|
|
200
|
+
})
|
|
201
|
+
|
|
202
|
+
it('does not match non-empty path with empty pattern', () => {
|
|
203
|
+
expect(matchPath('file.md', '')).toBe(false)
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
it('does not match empty path with non-empty pattern', () => {
|
|
207
|
+
expect(matchPath('', 'file.md')).toBe(false)
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
it('matches only asterisk pattern', () => {
|
|
211
|
+
expect(matchPath('anything', '*')).toBe(true)
|
|
212
|
+
expect(matchPath('', '*')).toBe(true)
|
|
213
|
+
expect(matchPath('a/b/c', '*')).toBe(false) // * doesn't match /
|
|
214
|
+
expect(matchPath('a/b/c', '**')).toBe(true) // ** matches everything
|
|
215
|
+
})
|
|
216
|
+
|
|
217
|
+
it('matches only question mark pattern', () => {
|
|
218
|
+
expect(matchPath('a', '?')).toBe(true)
|
|
219
|
+
expect(matchPath('ab', '?')).toBe(false)
|
|
220
|
+
expect(matchPath('', '?')).toBe(false)
|
|
221
|
+
})
|
|
222
|
+
|
|
223
|
+
it('handles very long paths', () => {
|
|
224
|
+
const longPath = `${'a/'.repeat(50)}file.md`
|
|
225
|
+
const longPattern = `${'a/'.repeat(50)}*.md`
|
|
226
|
+
expect(matchPath(longPath, longPattern)).toBe(true)
|
|
227
|
+
})
|
|
228
|
+
|
|
229
|
+
it('handles paths with spaces', () => {
|
|
230
|
+
expect(matchPath('my docs/readme.md', 'my docs/*.md')).toBe(true)
|
|
231
|
+
expect(matchPath('path with spaces/file.md', '*/file.md')).toBe(true)
|
|
232
|
+
// Nested requires **
|
|
233
|
+
expect(matchPath('a/path with spaces/file.md', '*/file.md')).toBe(false)
|
|
234
|
+
expect(matchPath('a/path with spaces/file.md', '**/file.md')).toBe(true)
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
it('handles unicode characters', () => {
|
|
238
|
+
expect(matchPath('docs/日本語.md', 'docs/*.md')).toBe(true)
|
|
239
|
+
expect(matchPath('文档/readme.md', '*/readme.md')).toBe(true)
|
|
240
|
+
expect(matchPath('a/文档/readme.md', '**/readme.md')).toBe(true)
|
|
241
|
+
})
|
|
242
|
+
})
|
|
243
|
+
|
|
244
|
+
describe('real-world patterns', () => {
|
|
245
|
+
it('matches markdown files in docs folder', () => {
|
|
246
|
+
expect(matchPath('docs/readme.md', 'docs/*.md')).toBe(true)
|
|
247
|
+
expect(matchPath('docs/api.md', 'docs/*.md')).toBe(true)
|
|
248
|
+
// * doesn't match /, use ** for nested paths
|
|
249
|
+
expect(matchPath('docs/nested/api.md', 'docs/*.md')).toBe(false)
|
|
250
|
+
expect(matchPath('docs/nested/api.md', 'docs/**/*.md')).toBe(true)
|
|
251
|
+
})
|
|
252
|
+
|
|
253
|
+
it('matches typescript files in src', () => {
|
|
254
|
+
expect(matchPath('src/index.ts', 'src/*.ts')).toBe(true)
|
|
255
|
+
expect(matchPath('src/utils.ts', 'src/*.ts')).toBe(true)
|
|
256
|
+
// Nested requires **
|
|
257
|
+
expect(matchPath('src/nested/index.ts', 'src/*.ts')).toBe(false)
|
|
258
|
+
expect(matchPath('src/nested/index.ts', 'src/**/*.ts')).toBe(true)
|
|
259
|
+
})
|
|
260
|
+
|
|
261
|
+
it('matches test files', () => {
|
|
262
|
+
expect(matchPath('test.spec.ts', '*.spec.ts')).toBe(true)
|
|
263
|
+
expect(matchPath('utils.test.ts', '*.test.ts')).toBe(true)
|
|
264
|
+
// Nested requires **
|
|
265
|
+
expect(matchPath('src/utils.test.ts', '*.test.ts')).toBe(false)
|
|
266
|
+
expect(matchPath('src/utils.test.ts', '**/*.test.ts')).toBe(true)
|
|
267
|
+
})
|
|
268
|
+
|
|
269
|
+
it('matches config files', () => {
|
|
270
|
+
expect(matchPath('tsconfig.json', '*.json')).toBe(true)
|
|
271
|
+
expect(matchPath('package.json', 'package.json')).toBe(true)
|
|
272
|
+
expect(matchPath('.eslintrc.json', '*.json')).toBe(true)
|
|
273
|
+
})
|
|
274
|
+
})
|
|
275
|
+
})
|
|
276
|
+
})
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Path matching utilities for search filtering.
|
|
3
|
+
*
|
|
4
|
+
* Simple glob-like pattern matching for document paths.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Match a file path against a glob-like pattern.
|
|
9
|
+
*
|
|
10
|
+
* Supports:
|
|
11
|
+
* - `**` matches any characters including directory separators (recursive)
|
|
12
|
+
* - `*` matches any characters except directory separators (single segment)
|
|
13
|
+
* - `?` matches exactly one character (not directory separator)
|
|
14
|
+
* - `.` is treated literally
|
|
15
|
+
*
|
|
16
|
+
* @param filePath - The file path to test
|
|
17
|
+
* @param pattern - The glob pattern (e.g., "docs/*", "src/api/*.md", "src/** /*.ts")
|
|
18
|
+
* @returns True if the path matches the pattern
|
|
19
|
+
*/
|
|
20
|
+
export const matchPath = (filePath: string, pattern: string): boolean => {
|
|
21
|
+
// Use a placeholder for ** to avoid it being processed by single * replacement
|
|
22
|
+
const DOUBLE_STAR_PLACEHOLDER = '__DOUBLE_STAR_MARKER__'
|
|
23
|
+
|
|
24
|
+
const regexPattern = pattern
|
|
25
|
+
.replace(/[.+^${}()|[\]\\]/g, '\\$&') // Escape all regex special chars first
|
|
26
|
+
.replace(/\*\*/g, DOUBLE_STAR_PLACEHOLDER) // Preserve ** before processing *
|
|
27
|
+
.replace(/\*/g, '[^/]*') // Single * doesn't match slashes
|
|
28
|
+
.replace(/\?/g, '[^/]') // ? matches any single non-slash char
|
|
29
|
+
.replace(new RegExp(DOUBLE_STAR_PLACEHOLDER, 'g'), '.*') // ** matches anything
|
|
30
|
+
|
|
31
|
+
const regex = new RegExp(`^${regexPattern}$`, 'i')
|
|
32
|
+
return regex.test(filePath)
|
|
33
|
+
}
|
|
@@ -7,7 +7,12 @@ import * as path from 'node:path'
|
|
|
7
7
|
import { Effect } from 'effect'
|
|
8
8
|
import { afterAll, beforeAll, describe, expect, it } from 'vitest'
|
|
9
9
|
import { buildIndex } from '../index/indexer.js'
|
|
10
|
-
import {
|
|
10
|
+
import {
|
|
11
|
+
formatContextForLLM,
|
|
12
|
+
getContext,
|
|
13
|
+
search,
|
|
14
|
+
searchContent,
|
|
15
|
+
} from './searcher.js'
|
|
11
16
|
|
|
12
17
|
// Test fixture directory
|
|
13
18
|
const TEST_DIR = path.join(process.cwd(), 'tests', 'fixtures', 'search')
|
|
@@ -66,6 +71,22 @@ An overview of the document.
|
|
|
66
71
|
`,
|
|
67
72
|
)
|
|
68
73
|
|
|
74
|
+
// Create test file for fuzzy/stem search
|
|
75
|
+
await fs.writeFile(
|
|
76
|
+
path.join(TEST_DIR, 'stem-test.md'),
|
|
77
|
+
`# Failure Handling
|
|
78
|
+
|
|
79
|
+
When the application fails, it logs the failure message.
|
|
80
|
+
Failed operations are retried automatically.
|
|
81
|
+
Failing gracefully is important for user experience.
|
|
82
|
+
|
|
83
|
+
## Configuration
|
|
84
|
+
|
|
85
|
+
The configration (typo) file is located at config.json.
|
|
86
|
+
Set the configuration options carefully.
|
|
87
|
+
`,
|
|
88
|
+
)
|
|
89
|
+
|
|
69
90
|
// Build index
|
|
70
91
|
await runEffect(buildIndex(TEST_DIR, { force: true }))
|
|
71
92
|
})
|
|
@@ -179,4 +200,81 @@ An overview of the document.
|
|
|
179
200
|
expect(formatted).toContain('[code]')
|
|
180
201
|
})
|
|
181
202
|
})
|
|
203
|
+
|
|
204
|
+
describe('searchContent() with fuzzy/stem matching', () => {
|
|
205
|
+
it('should match stemmed variations with --stem flag', async () => {
|
|
206
|
+
// Search for "fail" should match "fails", "failed", "failing", "failure"
|
|
207
|
+
const results = await runEffect(
|
|
208
|
+
searchContent(TEST_DIR, {
|
|
209
|
+
content: 'fail',
|
|
210
|
+
stem: true,
|
|
211
|
+
pathPattern: 'stem-test*',
|
|
212
|
+
}),
|
|
213
|
+
)
|
|
214
|
+
expect(results.length).toBe(1)
|
|
215
|
+
expect(results[0]?.section.heading).toBe('Failure Handling')
|
|
216
|
+
// Should have multiple line matches for different word forms
|
|
217
|
+
expect(results[0]?.matches?.length).toBeGreaterThan(1)
|
|
218
|
+
})
|
|
219
|
+
|
|
220
|
+
it('should match typos with --fuzzy flag', async () => {
|
|
221
|
+
// Search for "configration" (typo) should match "configuration"
|
|
222
|
+
const results = await runEffect(
|
|
223
|
+
searchContent(TEST_DIR, {
|
|
224
|
+
content: 'configration',
|
|
225
|
+
fuzzy: true,
|
|
226
|
+
pathPattern: 'stem-test*',
|
|
227
|
+
}),
|
|
228
|
+
)
|
|
229
|
+
expect(results.length).toBe(1)
|
|
230
|
+
expect(results[0]?.section.heading).toBe('Configuration')
|
|
231
|
+
// Should match both the typo line and the correct spelling line
|
|
232
|
+
expect(results[0]?.matches?.length).toBeGreaterThanOrEqual(1)
|
|
233
|
+
})
|
|
234
|
+
|
|
235
|
+
it('should respect fuzzyDistance option', async () => {
|
|
236
|
+
// With distance 1, "fail" should NOT match "file" (distance 2)
|
|
237
|
+
const strictResults = await runEffect(
|
|
238
|
+
searchContent(TEST_DIR, {
|
|
239
|
+
content: 'fail',
|
|
240
|
+
fuzzy: true,
|
|
241
|
+
fuzzyDistance: 1,
|
|
242
|
+
pathPattern: 'stem-test*',
|
|
243
|
+
}),
|
|
244
|
+
)
|
|
245
|
+
// With distance 1, only exact or 1-edit matches
|
|
246
|
+
const matchedWords = strictResults
|
|
247
|
+
.flatMap((r) => r.matches?.map((m) => m.line) ?? [])
|
|
248
|
+
.join(' ')
|
|
249
|
+
.toLowerCase()
|
|
250
|
+
// "fail" with distance 1 matches "fails" but not "file"
|
|
251
|
+
expect(matchedWords).toContain('fail')
|
|
252
|
+
})
|
|
253
|
+
|
|
254
|
+
it('should not match without fuzzy/stem flags', async () => {
|
|
255
|
+
// Exact search for "fail" should NOT match "failure" or "fails"
|
|
256
|
+
const results = await runEffect(
|
|
257
|
+
searchContent(TEST_DIR, {
|
|
258
|
+
content: 'fail',
|
|
259
|
+
pathPattern: 'stem-test*',
|
|
260
|
+
}),
|
|
261
|
+
)
|
|
262
|
+
// With exact search, "fail" appears as substring in "fails", "failure", "failing", "failed"
|
|
263
|
+
// so it still matches, but checks the regex-based behavior
|
|
264
|
+
expect(results.length).toBeGreaterThanOrEqual(1)
|
|
265
|
+
})
|
|
266
|
+
|
|
267
|
+
it('should combine fuzzy and stem matching', async () => {
|
|
268
|
+
// Both flags together should provide broader matching
|
|
269
|
+
const results = await runEffect(
|
|
270
|
+
searchContent(TEST_DIR, {
|
|
271
|
+
content: 'fail',
|
|
272
|
+
fuzzy: true,
|
|
273
|
+
stem: true,
|
|
274
|
+
pathPattern: 'stem-test*',
|
|
275
|
+
}),
|
|
276
|
+
)
|
|
277
|
+
expect(results.length).toBeGreaterThanOrEqual(1)
|
|
278
|
+
})
|
|
279
|
+
})
|
|
182
280
|
})
|