mdcontext 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/BACKLOG.md +338 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +434 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +88 -0
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +803 -0
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1629 -0
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +5458 -0
- package/dist/index.d.ts +653 -0
- package/dist/index.js +79 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +472 -0
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +625 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/CONFIG.md +1123 -0
- package/docs/DESIGN.md +439 -0
- package/docs/ERRORS.md +383 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/summarization.md +320 -0
- package/docs/test-links.md +9 -0
- package/justfile +40 -0
- package/package.json +74 -9
- package/pnpm-workspace.yaml +5 -0
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +58 -0
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +627 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +285 -0
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +480 -0
- package/src/cli/commands/index.ts +16 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +1281 -0
- package/src/cli/commands/stats.ts +149 -0
- package/src/cli/commands/tree.ts +128 -0
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +341 -0
- package/src/cli/help.ts +588 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +435 -0
- package/src/cli/options.ts +41 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +259 -0
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +113 -0
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +10 -0
- package/src/embeddings/openai-provider.ts +414 -0
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +1270 -0
- package/src/embeddings/types.ts +359 -0
- package/src/embeddings/vector-store.ts +708 -0
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +684 -0
- package/src/index/storage.ts +260 -0
- package/src/index/types.ts +147 -0
- package/src/index/watcher.ts +189 -0
- package/src/index.ts +30 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +612 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +394 -0
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +392 -0
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +280 -0
- package/src/search/searcher.ts +724 -0
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +597 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +16 -0
- package/vitest.setup.ts +12 -0
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for fuzzy-search utilities
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, expect, it } from 'vitest'
|
|
6
|
+
import {
|
|
7
|
+
buildFuzzyHighlightPattern,
|
|
8
|
+
findFuzzyMatches,
|
|
9
|
+
findMatchesInLine,
|
|
10
|
+
getStems,
|
|
11
|
+
isFuzzyMatch,
|
|
12
|
+
levenshteinDistance,
|
|
13
|
+
matchesWithOptions,
|
|
14
|
+
stem,
|
|
15
|
+
stemText,
|
|
16
|
+
} from './fuzzy-search.js'
|
|
17
|
+
|
|
18
|
+
describe('fuzzy-search', () => {
|
|
19
|
+
// ============================================================================
|
|
20
|
+
// Stemming Tests
|
|
21
|
+
// ============================================================================
|
|
22
|
+
|
|
23
|
+
describe('stem', () => {
|
|
24
|
+
it('stems words to their root form', () => {
|
|
25
|
+
expect(stem('running')).toBe('run')
|
|
26
|
+
expect(stem('runner')).toBe('runner')
|
|
27
|
+
expect(stem('runs')).toBe('run')
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
it('handles common word forms', () => {
|
|
31
|
+
expect(stem('failing')).toBe('fail')
|
|
32
|
+
expect(stem('failed')).toBe('fail')
|
|
33
|
+
expect(stem('failure')).toBe('failur')
|
|
34
|
+
expect(stem('fails')).toBe('fail')
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
it('handles programming terms', () => {
|
|
38
|
+
expect(stem('configuration')).toBe('configur')
|
|
39
|
+
expect(stem('configuring')).toBe('configur')
|
|
40
|
+
expect(stem('configured')).toBe('configur')
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
it('handles irregular words', () => {
|
|
44
|
+
expect(stem('testing')).toBe('test')
|
|
45
|
+
expect(stem('tests')).toBe('test')
|
|
46
|
+
expect(stem('tested')).toBe('test')
|
|
47
|
+
})
|
|
48
|
+
|
|
49
|
+
it('converts to lowercase', () => {
|
|
50
|
+
expect(stem('Running')).toBe('run')
|
|
51
|
+
expect(stem('RUNNING')).toBe('run')
|
|
52
|
+
expect(stem('RuNnInG')).toBe('run')
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
it('handles edge cases', () => {
|
|
56
|
+
expect(stem('')).toBe('')
|
|
57
|
+
expect(stem('a')).toBe('a')
|
|
58
|
+
expect(stem('i')).toBe('i')
|
|
59
|
+
})
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
describe('stemText', () => {
|
|
63
|
+
it('splits text and stems each word', () => {
|
|
64
|
+
expect(stemText('running tests')).toEqual(['run', 'test'])
|
|
65
|
+
expect(stemText('the quick fox')).toEqual(['the', 'quick', 'fox'])
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
it('handles punctuation and special characters', () => {
|
|
69
|
+
expect(stemText('hello, world!')).toEqual(['hello', 'world'])
|
|
70
|
+
expect(stemText('foo-bar_baz')).toEqual(['foo', 'bar', 'baz'])
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
it('filters out empty strings', () => {
|
|
74
|
+
expect(stemText(' multiple spaces ')).toEqual(['multipl', 'space'])
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
it('handles empty input', () => {
|
|
78
|
+
expect(stemText('')).toEqual([])
|
|
79
|
+
expect(stemText(' ')).toEqual([])
|
|
80
|
+
})
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
describe('getStems', () => {
|
|
84
|
+
it('returns unique stems as a Set', () => {
|
|
85
|
+
const stems = getStems('running runs runner')
|
|
86
|
+
expect(stems).toBeInstanceOf(Set)
|
|
87
|
+
expect(stems.has('run')).toBe(true)
|
|
88
|
+
expect(stems.has('runner')).toBe(true)
|
|
89
|
+
})
|
|
90
|
+
|
|
91
|
+
it('deduplicates stems', () => {
|
|
92
|
+
const stems = getStems('test testing tests tested')
|
|
93
|
+
expect(stems.size).toBe(1)
|
|
94
|
+
expect(stems.has('test')).toBe(true)
|
|
95
|
+
})
|
|
96
|
+
|
|
97
|
+
it('handles empty input', () => {
|
|
98
|
+
expect(getStems('')).toEqual(new Set())
|
|
99
|
+
})
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
// ============================================================================
|
|
103
|
+
// Levenshtein Distance Tests
|
|
104
|
+
// ============================================================================
|
|
105
|
+
|
|
106
|
+
describe('levenshteinDistance', () => {
|
|
107
|
+
it('returns 0 for identical strings', () => {
|
|
108
|
+
expect(levenshteinDistance('hello', 'hello')).toBe(0)
|
|
109
|
+
expect(levenshteinDistance('', '')).toBe(0)
|
|
110
|
+
expect(levenshteinDistance('a', 'a')).toBe(0)
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
it('calculates insertion distance', () => {
|
|
114
|
+
expect(levenshteinDistance('', 'abc')).toBe(3)
|
|
115
|
+
expect(levenshteinDistance('ab', 'abc')).toBe(1)
|
|
116
|
+
expect(levenshteinDistance('a', 'abc')).toBe(2)
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
it('calculates deletion distance', () => {
|
|
120
|
+
expect(levenshteinDistance('abc', '')).toBe(3)
|
|
121
|
+
expect(levenshteinDistance('abc', 'ab')).toBe(1)
|
|
122
|
+
expect(levenshteinDistance('abc', 'a')).toBe(2)
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
it('calculates substitution distance', () => {
|
|
126
|
+
expect(levenshteinDistance('abc', 'axc')).toBe(1)
|
|
127
|
+
expect(levenshteinDistance('abc', 'xyz')).toBe(3)
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
it('calculates mixed operations', () => {
|
|
131
|
+
expect(levenshteinDistance('kitten', 'sitting')).toBe(3)
|
|
132
|
+
expect(levenshteinDistance('saturday', 'sunday')).toBe(3)
|
|
133
|
+
})
|
|
134
|
+
|
|
135
|
+
it('handles common typos', () => {
|
|
136
|
+
expect(levenshteinDistance('configuration', 'configration')).toBe(1) // missing 'u'
|
|
137
|
+
expect(levenshteinDistance('function', 'funciton')).toBe(2) // transposition
|
|
138
|
+
expect(levenshteinDistance('receive', 'recieve')).toBe(2) // ie/ei swap
|
|
139
|
+
})
|
|
140
|
+
|
|
141
|
+
it('is symmetric', () => {
|
|
142
|
+
expect(levenshteinDistance('abc', 'xyz')).toBe(
|
|
143
|
+
levenshteinDistance('xyz', 'abc'),
|
|
144
|
+
)
|
|
145
|
+
expect(levenshteinDistance('hello', 'world')).toBe(
|
|
146
|
+
levenshteinDistance('world', 'hello'),
|
|
147
|
+
)
|
|
148
|
+
})
|
|
149
|
+
})
|
|
150
|
+
|
|
151
|
+
// ============================================================================
|
|
152
|
+
// Fuzzy Matching Tests
|
|
153
|
+
// ============================================================================
|
|
154
|
+
|
|
155
|
+
describe('isFuzzyMatch', () => {
|
|
156
|
+
it('matches identical strings', () => {
|
|
157
|
+
expect(isFuzzyMatch('hello', 'hello')).toBe(true)
|
|
158
|
+
})
|
|
159
|
+
|
|
160
|
+
it('matches within default distance (2)', () => {
|
|
161
|
+
expect(isFuzzyMatch('hello', 'helo')).toBe(true) // 1 deletion
|
|
162
|
+
expect(isFuzzyMatch('hello', 'helloo')).toBe(true) // 1 insertion
|
|
163
|
+
expect(isFuzzyMatch('hello', 'hallo')).toBe(true) // 1 substitution
|
|
164
|
+
expect(isFuzzyMatch('hello', 'hallo!')).toBe(true) // 2 edits
|
|
165
|
+
})
|
|
166
|
+
|
|
167
|
+
it('does not match beyond default distance', () => {
|
|
168
|
+
expect(isFuzzyMatch('hello', 'hi')).toBe(false)
|
|
169
|
+
expect(isFuzzyMatch('hello', 'goodbye')).toBe(false)
|
|
170
|
+
})
|
|
171
|
+
|
|
172
|
+
it('respects custom max distance', () => {
|
|
173
|
+
expect(isFuzzyMatch('hello', 'helo', 1)).toBe(true)
|
|
174
|
+
expect(isFuzzyMatch('hello', 'heo', 1)).toBe(false)
|
|
175
|
+
expect(isFuzzyMatch('hello', 'heo', 2)).toBe(true)
|
|
176
|
+
expect(isFuzzyMatch('hello', 'h', 3)).toBe(false)
|
|
177
|
+
expect(isFuzzyMatch('hello', 'h', 4)).toBe(true)
|
|
178
|
+
})
|
|
179
|
+
|
|
180
|
+
it('is case-insensitive', () => {
|
|
181
|
+
expect(isFuzzyMatch('Hello', 'hello')).toBe(true)
|
|
182
|
+
expect(isFuzzyMatch('HELLO', 'hello')).toBe(true)
|
|
183
|
+
expect(isFuzzyMatch('HeLLo', 'hello')).toBe(true)
|
|
184
|
+
})
|
|
185
|
+
|
|
186
|
+
it('handles length difference optimization', () => {
|
|
187
|
+
// Length difference > maxDistance should return false quickly
|
|
188
|
+
expect(isFuzzyMatch('ab', 'abcdef', 2)).toBe(false)
|
|
189
|
+
expect(isFuzzyMatch('abcdef', 'ab', 2)).toBe(false)
|
|
190
|
+
})
|
|
191
|
+
|
|
192
|
+
it('handles common programming typos', () => {
|
|
193
|
+
expect(isFuzzyMatch('function', 'funciton')).toBe(true)
|
|
194
|
+
expect(isFuzzyMatch('configuration', 'configration')).toBe(true)
|
|
195
|
+
expect(isFuzzyMatch('database', 'databse')).toBe(true)
|
|
196
|
+
})
|
|
197
|
+
})
|
|
198
|
+
|
|
199
|
+
describe('findFuzzyMatches', () => {
|
|
200
|
+
const words = ['hello', 'world', 'help', 'held', 'hero', 'helm']
|
|
201
|
+
|
|
202
|
+
it('finds matches within distance', () => {
|
|
203
|
+
const matches = findFuzzyMatches('helo', words)
|
|
204
|
+
expect(matches).toContain('hello')
|
|
205
|
+
expect(matches).toContain('help')
|
|
206
|
+
expect(matches).toContain('held')
|
|
207
|
+
expect(matches).toContain('hero')
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
it('respects max distance parameter', () => {
|
|
211
|
+
const matches = findFuzzyMatches('helo', words, 1)
|
|
212
|
+
expect(matches).toContain('hello')
|
|
213
|
+
expect(matches).toContain('help')
|
|
214
|
+
expect(matches).not.toContain('world')
|
|
215
|
+
})
|
|
216
|
+
|
|
217
|
+
it('returns empty array for no matches', () => {
|
|
218
|
+
const matches = findFuzzyMatches('xyz', words, 1)
|
|
219
|
+
expect(matches).toEqual([])
|
|
220
|
+
})
|
|
221
|
+
|
|
222
|
+
it('handles empty word list', () => {
|
|
223
|
+
expect(findFuzzyMatches('hello', [])).toEqual([])
|
|
224
|
+
})
|
|
225
|
+
|
|
226
|
+
it('is case-insensitive', () => {
|
|
227
|
+
const matches = findFuzzyMatches('HELO', words)
|
|
228
|
+
expect(matches).toContain('hello')
|
|
229
|
+
})
|
|
230
|
+
})
|
|
231
|
+
|
|
232
|
+
// ============================================================================
|
|
233
|
+
// Combined Matching Tests
|
|
234
|
+
// ============================================================================
|
|
235
|
+
|
|
236
|
+
describe('matchesWithOptions', () => {
|
|
237
|
+
const text = 'The configuration failed during initialization'
|
|
238
|
+
|
|
239
|
+
describe('exact matching (no options)', () => {
|
|
240
|
+
it('matches exact words', () => {
|
|
241
|
+
expect(matchesWithOptions('configuration', text)).toBe(true)
|
|
242
|
+
expect(matchesWithOptions('failed', text)).toBe(true)
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
it('is case-insensitive', () => {
|
|
246
|
+
expect(matchesWithOptions('CONFIGURATION', text)).toBe(true)
|
|
247
|
+
expect(matchesWithOptions('Failed', text)).toBe(true)
|
|
248
|
+
})
|
|
249
|
+
|
|
250
|
+
it('does not match partial words', () => {
|
|
251
|
+
expect(matchesWithOptions('config', text)).toBe(false)
|
|
252
|
+
expect(matchesWithOptions('fail', text)).toBe(false)
|
|
253
|
+
})
|
|
254
|
+
|
|
255
|
+
it('requires all query words to match', () => {
|
|
256
|
+
expect(matchesWithOptions('configuration failed', text)).toBe(true)
|
|
257
|
+
expect(matchesWithOptions('configuration success', text)).toBe(false)
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
it('matches empty query', () => {
|
|
261
|
+
expect(matchesWithOptions('', text)).toBe(true)
|
|
262
|
+
expect(matchesWithOptions(' ', text)).toBe(true)
|
|
263
|
+
})
|
|
264
|
+
})
|
|
265
|
+
|
|
266
|
+
describe('stemming', () => {
|
|
267
|
+
it('matches word variations via stemming', () => {
|
|
268
|
+
expect(matchesWithOptions('fail', text, { stem: true })).toBe(true)
|
|
269
|
+
expect(matchesWithOptions('failing', text, { stem: true })).toBe(true)
|
|
270
|
+
expect(matchesWithOptions('configure', text, { stem: true })).toBe(true)
|
|
271
|
+
})
|
|
272
|
+
|
|
273
|
+
it('matches multiple stemmed words', () => {
|
|
274
|
+
expect(
|
|
275
|
+
matchesWithOptions('fail initialize', text, { stem: true }),
|
|
276
|
+
).toBe(true)
|
|
277
|
+
})
|
|
278
|
+
})
|
|
279
|
+
|
|
280
|
+
describe('fuzzy matching', () => {
|
|
281
|
+
it('matches typos within distance', () => {
|
|
282
|
+
expect(
|
|
283
|
+
matchesWithOptions('configration', text, { fuzzyDistance: 2 }),
|
|
284
|
+
).toBe(true)
|
|
285
|
+
expect(matchesWithOptions('faild', text, { fuzzyDistance: 1 })).toBe(
|
|
286
|
+
true,
|
|
287
|
+
)
|
|
288
|
+
})
|
|
289
|
+
|
|
290
|
+
it('does not match beyond distance', () => {
|
|
291
|
+
expect(
|
|
292
|
+
matchesWithOptions('configration', text, { fuzzyDistance: 0 }),
|
|
293
|
+
).toBe(false)
|
|
294
|
+
})
|
|
295
|
+
})
|
|
296
|
+
|
|
297
|
+
describe('combined stem and fuzzy', () => {
|
|
298
|
+
it('matches with both options enabled', () => {
|
|
299
|
+
expect(
|
|
300
|
+
matchesWithOptions('failing', text, { stem: true, fuzzyDistance: 2 }),
|
|
301
|
+
).toBe(true)
|
|
302
|
+
expect(
|
|
303
|
+
matchesWithOptions('configration', text, {
|
|
304
|
+
stem: true,
|
|
305
|
+
fuzzyDistance: 2,
|
|
306
|
+
}),
|
|
307
|
+
).toBe(true)
|
|
308
|
+
})
|
|
309
|
+
})
|
|
310
|
+
})
|
|
311
|
+
|
|
312
|
+
describe('findMatchesInLine', () => {
|
|
313
|
+
const line = 'The configuration process failed during initialization'
|
|
314
|
+
|
|
315
|
+
it('finds exact matches', () => {
|
|
316
|
+
const matches = findMatchesInLine(['configuration', 'failed'], line)
|
|
317
|
+
expect(matches).toContain('configuration')
|
|
318
|
+
expect(matches).toContain('failed')
|
|
319
|
+
})
|
|
320
|
+
|
|
321
|
+
it('returns unique matches (no duplicates)', () => {
|
|
322
|
+
const matches = findMatchesInLine(['the', 'the', 'the'], line)
|
|
323
|
+
expect(matches.filter((m) => m === 'the').length).toBe(1)
|
|
324
|
+
})
|
|
325
|
+
|
|
326
|
+
it('finds stemmed matches', () => {
|
|
327
|
+
const matches = findMatchesInLine(['fail', 'configure'], line, {
|
|
328
|
+
stem: true,
|
|
329
|
+
})
|
|
330
|
+
expect(matches).toContain('failed')
|
|
331
|
+
expect(matches).toContain('configuration')
|
|
332
|
+
})
|
|
333
|
+
|
|
334
|
+
it('finds fuzzy matches', () => {
|
|
335
|
+
const matches = findMatchesInLine(['configration'], line, {
|
|
336
|
+
fuzzyDistance: 2,
|
|
337
|
+
})
|
|
338
|
+
expect(matches).toContain('configuration')
|
|
339
|
+
})
|
|
340
|
+
|
|
341
|
+
it('handles empty query words', () => {
|
|
342
|
+
expect(findMatchesInLine([], line)).toEqual([])
|
|
343
|
+
})
|
|
344
|
+
|
|
345
|
+
it('handles empty line', () => {
|
|
346
|
+
expect(findMatchesInLine(['test'], '')).toEqual([])
|
|
347
|
+
})
|
|
348
|
+
|
|
349
|
+
it('is case-insensitive', () => {
|
|
350
|
+
const matches = findMatchesInLine(['CONFIGURATION', 'FAILED'], line)
|
|
351
|
+
expect(matches).toContain('configuration')
|
|
352
|
+
expect(matches).toContain('failed')
|
|
353
|
+
})
|
|
354
|
+
})
|
|
355
|
+
|
|
356
|
+
// ============================================================================
|
|
357
|
+
// Highlight Pattern Tests
|
|
358
|
+
// ============================================================================
|
|
359
|
+
|
|
360
|
+
describe('buildFuzzyHighlightPattern', () => {
|
|
361
|
+
it('builds pattern for exact matching with word boundaries', () => {
|
|
362
|
+
const pattern = buildFuzzyHighlightPattern('hello')
|
|
363
|
+
// Pattern matches within text
|
|
364
|
+
expect(pattern.test('say hello there')).toBe(true)
|
|
365
|
+
})
|
|
366
|
+
|
|
367
|
+
it('builds pattern for stemmed matching', () => {
|
|
368
|
+
const pattern = buildFuzzyHighlightPattern('fail', { stem: true })
|
|
369
|
+
// Stemmed pattern matches words starting with the stem 'fail'
|
|
370
|
+
expect(pattern.test('it will fail')).toBe(true)
|
|
371
|
+
})
|
|
372
|
+
|
|
373
|
+
it('escapes regex special characters', () => {
|
|
374
|
+
const pattern = buildFuzzyHighlightPattern('foo.bar')
|
|
375
|
+
expect(pattern.test('use foo.bar here')).toBe(true)
|
|
376
|
+
expect(pattern.test('use fooXbar here')).toBe(false)
|
|
377
|
+
})
|
|
378
|
+
|
|
379
|
+
it('returns non-matching pattern for empty query', () => {
|
|
380
|
+
const pattern = buildFuzzyHighlightPattern('')
|
|
381
|
+
expect(pattern.test('anything')).toBe(false)
|
|
382
|
+
})
|
|
383
|
+
|
|
384
|
+
it('matches word boundaries', () => {
|
|
385
|
+
const pattern = buildFuzzyHighlightPattern('test')
|
|
386
|
+
expect(pattern.test('run the test now')).toBe(true)
|
|
387
|
+
})
|
|
388
|
+
})
|
|
389
|
+
|
|
390
|
+
// ============================================================================
|
|
391
|
+
// Edge Cases
|
|
392
|
+
// ============================================================================
|
|
393
|
+
|
|
394
|
+
describe('edge cases', () => {
|
|
395
|
+
it('handles special characters in text', () => {
|
|
396
|
+
const text = 'function() { return true; }'
|
|
397
|
+
expect(matchesWithOptions('function', text)).toBe(true)
|
|
398
|
+
expect(matchesWithOptions('return', text)).toBe(true)
|
|
399
|
+
expect(matchesWithOptions('true', text)).toBe(true)
|
|
400
|
+
})
|
|
401
|
+
|
|
402
|
+
it('handles numeric content', () => {
|
|
403
|
+
const text = 'version 1.2.3 released on 2024'
|
|
404
|
+
expect(matchesWithOptions('version', text)).toBe(true)
|
|
405
|
+
expect(matchesWithOptions('2024', text)).toBe(true)
|
|
406
|
+
})
|
|
407
|
+
|
|
408
|
+
it('handles very long words', () => {
|
|
409
|
+
const longWord = 'a'.repeat(100)
|
|
410
|
+
expect(stem(longWord)).toBeDefined()
|
|
411
|
+
expect(levenshteinDistance(longWord, longWord)).toBe(0)
|
|
412
|
+
})
|
|
413
|
+
|
|
414
|
+
it('handles unicode text', () => {
|
|
415
|
+
const text = '日本語 configuration 中文'
|
|
416
|
+
expect(matchesWithOptions('configuration', text)).toBe(true)
|
|
417
|
+
})
|
|
418
|
+
})
|
|
419
|
+
})
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fuzzy Search Utilities
|
|
3
|
+
*
|
|
4
|
+
* Provides stemming and fuzzy matching capabilities for search:
|
|
5
|
+
* - Porter stemmer for word normalization (fail -> fail, failure -> failur)
|
|
6
|
+
* - Levenshtein distance for typo tolerance
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { stemmer } from 'stemmer'
|
|
10
|
+
|
|
11
|
+
// ============================================================================
|
|
12
|
+
// Stemming
|
|
13
|
+
// ============================================================================
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Apply Porter stemmer to a word
|
|
17
|
+
*/
|
|
18
|
+
export const stem = (word: string): string => {
|
|
19
|
+
return stemmer(word.toLowerCase())
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Stem all words in a text
|
|
24
|
+
*/
|
|
25
|
+
export const stemText = (text: string): string[] => {
|
|
26
|
+
return text
|
|
27
|
+
.toLowerCase()
|
|
28
|
+
.split(/[\W_]+/)
|
|
29
|
+
.filter((word) => word.length > 0)
|
|
30
|
+
.map((word) => stem(word))
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Get unique stems from text
|
|
35
|
+
*/
|
|
36
|
+
export const getStems = (text: string): Set<string> => {
|
|
37
|
+
return new Set(stemText(text))
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// ============================================================================
|
|
41
|
+
// Fuzzy Matching (Levenshtein Distance)
|
|
42
|
+
// ============================================================================
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Calculate Levenshtein distance between two strings
|
|
46
|
+
*/
|
|
47
|
+
export const levenshteinDistance = (a: string, b: string): number => {
|
|
48
|
+
const matrix: number[][] = []
|
|
49
|
+
|
|
50
|
+
// Initialize first column
|
|
51
|
+
for (let i = 0; i <= a.length; i++) {
|
|
52
|
+
matrix[i] = [i]
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Initialize first row
|
|
56
|
+
for (let j = 0; j <= b.length; j++) {
|
|
57
|
+
matrix[0]![j] = j
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Fill in the rest
|
|
61
|
+
for (let i = 1; i <= a.length; i++) {
|
|
62
|
+
for (let j = 1; j <= b.length; j++) {
|
|
63
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1
|
|
64
|
+
matrix[i]![j] = Math.min(
|
|
65
|
+
matrix[i - 1]![j]! + 1, // deletion
|
|
66
|
+
matrix[i]![j - 1]! + 1, // insertion
|
|
67
|
+
matrix[i - 1]![j - 1]! + cost, // substitution
|
|
68
|
+
)
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return matrix[a.length]![b.length]!
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Check if two words are fuzzy matches within a given distance
|
|
77
|
+
*/
|
|
78
|
+
export const isFuzzyMatch = (
|
|
79
|
+
word1: string,
|
|
80
|
+
word2: string,
|
|
81
|
+
maxDistance: number = 2,
|
|
82
|
+
): boolean => {
|
|
83
|
+
// Quick length check - can't be a match if lengths differ too much
|
|
84
|
+
if (Math.abs(word1.length - word2.length) > maxDistance) {
|
|
85
|
+
return false
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return (
|
|
89
|
+
levenshteinDistance(word1.toLowerCase(), word2.toLowerCase()) <= maxDistance
|
|
90
|
+
)
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Find fuzzy matches for a word in a list of words
|
|
95
|
+
*/
|
|
96
|
+
export const findFuzzyMatches = (
|
|
97
|
+
query: string,
|
|
98
|
+
words: readonly string[],
|
|
99
|
+
maxDistance: number = 2,
|
|
100
|
+
): string[] => {
|
|
101
|
+
const lowerQuery = query.toLowerCase()
|
|
102
|
+
return words.filter((word) =>
|
|
103
|
+
isFuzzyMatch(lowerQuery, word.toLowerCase(), maxDistance),
|
|
104
|
+
)
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// ============================================================================
|
|
108
|
+
// Combined Matching Options
|
|
109
|
+
// ============================================================================
|
|
110
|
+
|
|
111
|
+
export interface MatchOptions {
|
|
112
|
+
/** Use stemming for word matching */
|
|
113
|
+
readonly stem?: boolean | undefined
|
|
114
|
+
/** Use fuzzy matching with this max edit distance */
|
|
115
|
+
readonly fuzzyDistance?: number | undefined
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Check if query matches text with stemming and/or fuzzy matching
|
|
120
|
+
*/
|
|
121
|
+
export const matchesWithOptions = (
|
|
122
|
+
query: string,
|
|
123
|
+
text: string,
|
|
124
|
+
options: MatchOptions = {},
|
|
125
|
+
): boolean => {
|
|
126
|
+
const { stem: useStemming, fuzzyDistance } = options
|
|
127
|
+
|
|
128
|
+
// Get words from query and text
|
|
129
|
+
const queryWords = query
|
|
130
|
+
.toLowerCase()
|
|
131
|
+
.split(/[\W_]+/)
|
|
132
|
+
.filter((w) => w.length > 0)
|
|
133
|
+
const textWords = text
|
|
134
|
+
.toLowerCase()
|
|
135
|
+
.split(/[\W_]+/)
|
|
136
|
+
.filter((w) => w.length > 0)
|
|
137
|
+
|
|
138
|
+
if (queryWords.length === 0) {
|
|
139
|
+
return true // Empty query matches everything
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// For each query word, check if it matches any text word
|
|
143
|
+
for (const queryWord of queryWords) {
|
|
144
|
+
let found = false
|
|
145
|
+
|
|
146
|
+
for (const textWord of textWords) {
|
|
147
|
+
// Exact match (case-insensitive)
|
|
148
|
+
if (textWord === queryWord) {
|
|
149
|
+
found = true
|
|
150
|
+
break
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Stemming match
|
|
154
|
+
if (useStemming) {
|
|
155
|
+
if (stem(textWord) === stem(queryWord)) {
|
|
156
|
+
found = true
|
|
157
|
+
break
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Fuzzy match
|
|
162
|
+
if (fuzzyDistance !== undefined && fuzzyDistance > 0) {
|
|
163
|
+
if (isFuzzyMatch(textWord, queryWord, fuzzyDistance)) {
|
|
164
|
+
found = true
|
|
165
|
+
break
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (!found) {
|
|
171
|
+
return false // All query words must match
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return true
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Check if a line contains a match using stemming/fuzzy options
|
|
180
|
+
* Returns the matching word(s) if found
|
|
181
|
+
*
|
|
182
|
+
* Uses Set for O(1) duplicate checking instead of array.includes() O(k)
|
|
183
|
+
*/
|
|
184
|
+
export const findMatchesInLine = (
|
|
185
|
+
queryWords: readonly string[],
|
|
186
|
+
line: string,
|
|
187
|
+
options: MatchOptions = {},
|
|
188
|
+
): string[] => {
|
|
189
|
+
const { stem: useStemming, fuzzyDistance } = options
|
|
190
|
+
const matchesSet = new Set<string>()
|
|
191
|
+
|
|
192
|
+
const lineWords = line
|
|
193
|
+
.toLowerCase()
|
|
194
|
+
.split(/[\W_]+/)
|
|
195
|
+
.filter((w) => w.length > 0)
|
|
196
|
+
|
|
197
|
+
for (const queryWord of queryWords) {
|
|
198
|
+
const queryLower = queryWord.toLowerCase()
|
|
199
|
+
const queryStem = useStemming ? stem(queryWord) : null
|
|
200
|
+
|
|
201
|
+
for (const lineWord of lineWords) {
|
|
202
|
+
// Skip if already matched (O(1) lookup)
|
|
203
|
+
if (matchesSet.has(lineWord)) {
|
|
204
|
+
continue
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Exact match
|
|
208
|
+
if (lineWord === queryLower) {
|
|
209
|
+
matchesSet.add(lineWord)
|
|
210
|
+
continue
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Stemming match
|
|
214
|
+
if (queryStem && stem(lineWord) === queryStem) {
|
|
215
|
+
matchesSet.add(lineWord)
|
|
216
|
+
continue
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Fuzzy match
|
|
220
|
+
if (
|
|
221
|
+
fuzzyDistance !== undefined &&
|
|
222
|
+
fuzzyDistance > 0 &&
|
|
223
|
+
isFuzzyMatch(lineWord, queryLower, fuzzyDistance)
|
|
224
|
+
) {
|
|
225
|
+
matchesSet.add(lineWord)
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return Array.from(matchesSet)
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Build a regex pattern that matches stemmed variations of query terms
|
|
235
|
+
* For highlighting purposes
|
|
236
|
+
*/
|
|
237
|
+
export const buildFuzzyHighlightPattern = (
|
|
238
|
+
query: string,
|
|
239
|
+
options: MatchOptions = {},
|
|
240
|
+
): RegExp => {
|
|
241
|
+
const { stem: useStemming } = options
|
|
242
|
+
|
|
243
|
+
const queryWords = query
|
|
244
|
+
.toLowerCase()
|
|
245
|
+
.split(/[\W_]+/)
|
|
246
|
+
.filter((w) => w.length > 0)
|
|
247
|
+
|
|
248
|
+
if (queryWords.length === 0) {
|
|
249
|
+
return /.^/ // Match nothing
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Build patterns for each query word
|
|
253
|
+
const patterns: string[] = []
|
|
254
|
+
|
|
255
|
+
for (const word of queryWords) {
|
|
256
|
+
// Escape special regex chars
|
|
257
|
+
const escaped = word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
|
258
|
+
|
|
259
|
+
if (useStemming) {
|
|
260
|
+
// Match words that share the same stem
|
|
261
|
+
// We do this by matching the stem as a prefix followed by optional suffix
|
|
262
|
+
const wordStem = stem(word)
|
|
263
|
+
const escapedStem = wordStem.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
|
264
|
+
// Match the stem followed by common suffixes
|
|
265
|
+
patterns.push(`\\b${escapedStem}\\w*\\b`)
|
|
266
|
+
} else {
|
|
267
|
+
// Exact word match
|
|
268
|
+
patterns.push(`\\b${escaped}\\b`)
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
return new RegExp(patterns.join('|'), 'gi')
|
|
273
|
+
}
|