mdcontext 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/BACKLOG.md +338 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +434 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +88 -0
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +803 -0
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1629 -0
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +5458 -0
- package/dist/index.d.ts +653 -0
- package/dist/index.js +79 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +472 -0
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +625 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/CONFIG.md +1123 -0
- package/docs/DESIGN.md +439 -0
- package/docs/ERRORS.md +383 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/summarization.md +320 -0
- package/docs/test-links.md +9 -0
- package/justfile +40 -0
- package/package.json +74 -9
- package/pnpm-workspace.yaml +5 -0
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +58 -0
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +627 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +285 -0
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +480 -0
- package/src/cli/commands/index.ts +16 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +1281 -0
- package/src/cli/commands/stats.ts +149 -0
- package/src/cli/commands/tree.ts +128 -0
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +341 -0
- package/src/cli/help.ts +588 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +435 -0
- package/src/cli/options.ts +41 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +259 -0
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +113 -0
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +10 -0
- package/src/embeddings/openai-provider.ts +414 -0
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +1270 -0
- package/src/embeddings/types.ts +359 -0
- package/src/embeddings/vector-store.ts +708 -0
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +684 -0
- package/src/index/storage.ts +260 -0
- package/src/index/types.ts +147 -0
- package/src/index/watcher.ts +189 -0
- package/src/index.ts +30 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +612 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +394 -0
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +392 -0
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +280 -0
- package/src/search/searcher.ts +724 -0
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +597 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +16 -0
- package/vitest.setup.ts +12 -0
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding types for mdcontext
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
// ============================================================================
|
|
6
|
+
// Embedding Provider
|
|
7
|
+
// ============================================================================
|
|
8
|
+
|
|
9
|
+
export interface BatchProgress {
|
|
10
|
+
readonly batchIndex: number
|
|
11
|
+
readonly totalBatches: number
|
|
12
|
+
readonly processedTexts: number
|
|
13
|
+
readonly totalTexts: number
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface EmbedOptions {
|
|
17
|
+
readonly onBatchProgress?: ((progress: BatchProgress) => void) | undefined
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface EmbeddingProvider {
|
|
21
|
+
readonly name: string
|
|
22
|
+
readonly dimensions: number
|
|
23
|
+
embed(texts: string[], options?: EmbedOptions): Promise<EmbeddingResult>
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Extended embedding provider with metadata about the underlying service.
|
|
28
|
+
* Implementations like OpenAIProvider include these additional properties.
|
|
29
|
+
*/
|
|
30
|
+
export interface EmbeddingProviderWithMetadata extends EmbeddingProvider {
|
|
31
|
+
readonly model: string
|
|
32
|
+
readonly baseURL: string | undefined
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Type guard to check if an EmbeddingProvider has extended metadata.
|
|
37
|
+
* Use this instead of unsafe type casting when accessing model/baseURL.
|
|
38
|
+
*/
|
|
39
|
+
export const hasProviderMetadata = (
|
|
40
|
+
provider: EmbeddingProvider,
|
|
41
|
+
): provider is EmbeddingProviderWithMetadata => {
|
|
42
|
+
return (
|
|
43
|
+
'model' in provider &&
|
|
44
|
+
typeof (provider as EmbeddingProviderWithMetadata).model === 'string'
|
|
45
|
+
)
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface EmbeddingResult {
|
|
49
|
+
readonly embeddings: readonly number[][]
|
|
50
|
+
readonly tokensUsed: number
|
|
51
|
+
readonly cost: number
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ============================================================================
|
|
55
|
+
// Vector Index
|
|
56
|
+
// ============================================================================
|
|
57
|
+
|
|
58
|
+
export interface VectorEntry {
|
|
59
|
+
readonly id: string
|
|
60
|
+
readonly sectionId: string
|
|
61
|
+
readonly documentPath: string
|
|
62
|
+
readonly heading: string
|
|
63
|
+
readonly embedding: readonly number[]
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export interface VectorIndex {
|
|
67
|
+
readonly version: number
|
|
68
|
+
readonly provider: string
|
|
69
|
+
readonly providerModel?: string | undefined
|
|
70
|
+
readonly providerBaseURL?: string | undefined
|
|
71
|
+
readonly dimensions: number
|
|
72
|
+
readonly entries: Record<string, VectorEntry>
|
|
73
|
+
readonly totalCost: number
|
|
74
|
+
readonly totalTokens: number
|
|
75
|
+
readonly createdAt: string
|
|
76
|
+
readonly updatedAt: string
|
|
77
|
+
/**
|
|
78
|
+
* HNSW index build parameters (stored for validation on load).
|
|
79
|
+
* These affect index quality and build time - changes require rebuild.
|
|
80
|
+
*/
|
|
81
|
+
readonly hnswParams?: HnswIndexParams | undefined
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* HNSW index parameters stored in metadata.
|
|
86
|
+
* Used to detect config/index mismatches and recommend rebuilds.
|
|
87
|
+
*/
|
|
88
|
+
export interface HnswIndexParams {
|
|
89
|
+
/** Max connections per node (M parameter). Default: 16 */
|
|
90
|
+
readonly m: number
|
|
91
|
+
/** Construction-time search width. Default: 200 */
|
|
92
|
+
readonly efConstruction: number
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ============================================================================
|
|
96
|
+
// Quality Modes
|
|
97
|
+
// ============================================================================
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Search quality modes for HNSW efSearch parameter.
|
|
101
|
+
* Higher efSearch values give better recall at the cost of speed.
|
|
102
|
+
*
|
|
103
|
+
* - 'fast': efSearch=64, ~40% faster, slight recall reduction
|
|
104
|
+
* - 'balanced': efSearch=100 (default), good balance
|
|
105
|
+
* - 'thorough': efSearch=256, ~30% slower, best recall
|
|
106
|
+
*/
|
|
107
|
+
export type SearchQuality = 'fast' | 'balanced' | 'thorough'
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* efSearch values for each quality mode.
|
|
111
|
+
* These control the size of the dynamic candidate list during search.
|
|
112
|
+
*/
|
|
113
|
+
export const QUALITY_EF_SEARCH: Record<SearchQuality, number> = {
|
|
114
|
+
fast: 64,
|
|
115
|
+
balanced: 100,
|
|
116
|
+
thorough: 256,
|
|
117
|
+
} as const
|
|
118
|
+
|
|
119
|
+
// ============================================================================
|
|
120
|
+
// Semantic Search
|
|
121
|
+
// ============================================================================
|
|
122
|
+
|
|
123
|
+
export interface SemanticSearchOptions {
|
|
124
|
+
/** Maximum number of results */
|
|
125
|
+
readonly limit?: number | undefined
|
|
126
|
+
/** Minimum similarity threshold (0-1) */
|
|
127
|
+
readonly threshold?: number | undefined
|
|
128
|
+
/** Filter by document path pattern */
|
|
129
|
+
readonly pathPattern?: string | undefined
|
|
130
|
+
/** Search quality mode: fast, balanced (default), or thorough */
|
|
131
|
+
readonly quality?: SearchQuality | undefined
|
|
132
|
+
/** Provider configuration override */
|
|
133
|
+
readonly providerConfig?:
|
|
134
|
+
| {
|
|
135
|
+
readonly provider:
|
|
136
|
+
| 'openai'
|
|
137
|
+
| 'ollama'
|
|
138
|
+
| 'lm-studio'
|
|
139
|
+
| 'openrouter'
|
|
140
|
+
| 'voyage'
|
|
141
|
+
readonly baseURL?: string | undefined
|
|
142
|
+
readonly model?: string | undefined
|
|
143
|
+
}
|
|
144
|
+
| undefined
|
|
145
|
+
/**
|
|
146
|
+
* Skip query preprocessing (normalize, lowercase, strip punctuation).
|
|
147
|
+
* Default: false (preprocessing enabled for better recall).
|
|
148
|
+
* Set to true for exact query matching.
|
|
149
|
+
*/
|
|
150
|
+
readonly skipPreprocessing?: boolean | undefined
|
|
151
|
+
/**
|
|
152
|
+
* Boost results where query terms appear in section headings.
|
|
153
|
+
* Improves navigation queries like "installation guide" or "API reference".
|
|
154
|
+
* Default: true (heading boost enabled).
|
|
155
|
+
*/
|
|
156
|
+
readonly headingBoost?: boolean | undefined
|
|
157
|
+
/**
|
|
158
|
+
* Use HyDE (Hypothetical Document Embeddings) for query expansion.
|
|
159
|
+
* Generates a hypothetical document answering the query using an LLM,
|
|
160
|
+
* then searches using that document's embedding.
|
|
161
|
+
*
|
|
162
|
+
* Best for: complex questions, "how to" queries, ambiguous searches
|
|
163
|
+
* Adds: ~1-2s latency, LLM API cost
|
|
164
|
+
* Improvement: 10-30% better recall on complex queries
|
|
165
|
+
*
|
|
166
|
+
* Default: false (disabled)
|
|
167
|
+
*/
|
|
168
|
+
readonly hyde?: boolean | undefined
|
|
169
|
+
/**
|
|
170
|
+
* HyDE configuration options (only used when hyde: true).
|
|
171
|
+
*/
|
|
172
|
+
readonly hydeOptions?:
|
|
173
|
+
| {
|
|
174
|
+
/** Model for hypothetical document generation. Default: gpt-4o-mini */
|
|
175
|
+
readonly model?: string | undefined
|
|
176
|
+
/** Max tokens for generation. Default: 256 */
|
|
177
|
+
readonly maxTokens?: number | undefined
|
|
178
|
+
/** Generation temperature (0-1). Default: 0.3 */
|
|
179
|
+
readonly temperature?: number | undefined
|
|
180
|
+
}
|
|
181
|
+
| undefined
|
|
182
|
+
/** Lines of context before matches */
|
|
183
|
+
readonly contextBefore?: number | undefined
|
|
184
|
+
/** Lines of context after matches */
|
|
185
|
+
readonly contextAfter?: number | undefined
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// ============================================================================
|
|
189
|
+
// Query Preprocessing
|
|
190
|
+
// ============================================================================
|
|
191
|
+
|
|
192
|
+
// ============================================================================
|
|
193
|
+
// Heading Boost
|
|
194
|
+
// ============================================================================
|
|
195
|
+
|
|
196
|
+
/** Boost factor per matched term in heading (0.05 = 5% boost per term) */
|
|
197
|
+
const HEADING_BOOST_FACTOR = 0.05
|
|
198
|
+
|
|
199
|
+
/** Boost factor for important files like README (0.03 = 3% boost) */
|
|
200
|
+
const FILE_IMPORTANCE_BOOST = 0.03
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Important file patterns that get ranking boost.
|
|
204
|
+
* These are typically entry points or high-value documentation.
|
|
205
|
+
*/
|
|
206
|
+
const IMPORTANT_FILE_PATTERNS = [
|
|
207
|
+
/^readme\.md$/i, // Root README
|
|
208
|
+
/\/readme\.md$/i, // Nested README
|
|
209
|
+
/^index\.md$/i, // Index files
|
|
210
|
+
/\/index\.md$/i,
|
|
211
|
+
/^getting-?started/i, // Getting started guides
|
|
212
|
+
/\/getting-?started/i,
|
|
213
|
+
/^introduction/i, // Introductions
|
|
214
|
+
/\/introduction/i,
|
|
215
|
+
/^overview/i, // Overviews
|
|
216
|
+
/\/overview/i,
|
|
217
|
+
/^quickstart/i, // Quickstart guides
|
|
218
|
+
/\/quickstart/i,
|
|
219
|
+
/^changelog\.md$/i, // Changelogs (useful for "what changed" queries)
|
|
220
|
+
/\/changelog\.md$/i,
|
|
221
|
+
]
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Calculate file importance boost for a search result.
|
|
225
|
+
* Boosts results from important files like README, index, getting-started.
|
|
226
|
+
*
|
|
227
|
+
* @param documentPath - Path to the document
|
|
228
|
+
* @returns Boost value to add to similarity score (0.0 to 0.03)
|
|
229
|
+
*/
|
|
230
|
+
export const calculateFileImportanceBoost = (documentPath: string): number => {
|
|
231
|
+
const isImportant = IMPORTANT_FILE_PATTERNS.some((pattern) =>
|
|
232
|
+
pattern.test(documentPath),
|
|
233
|
+
)
|
|
234
|
+
return isImportant ? FILE_IMPORTANCE_BOOST : 0
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Calculate heading match boost for a search result.
|
|
239
|
+
* Boosts results where query terms appear in section headings.
|
|
240
|
+
*
|
|
241
|
+
* @param heading - Section heading to check
|
|
242
|
+
* @param query - Original search query (will be normalized)
|
|
243
|
+
* @returns Boost value to add to similarity score (0.0 to ~0.15 typically)
|
|
244
|
+
*/
|
|
245
|
+
export const calculateHeadingBoost = (
|
|
246
|
+
heading: string,
|
|
247
|
+
query: string,
|
|
248
|
+
): number => {
|
|
249
|
+
const queryTerms = query.toLowerCase().split(/\s+/).filter(Boolean)
|
|
250
|
+
if (queryTerms.length === 0) return 0
|
|
251
|
+
|
|
252
|
+
const headingLower = heading.toLowerCase()
|
|
253
|
+
const matchCount = queryTerms.filter((term) =>
|
|
254
|
+
headingLower.includes(term),
|
|
255
|
+
).length
|
|
256
|
+
|
|
257
|
+
return matchCount * HEADING_BOOST_FACTOR
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Calculate combined ranking boost for a search result.
|
|
262
|
+
* Combines heading match boost and file importance boost.
|
|
263
|
+
*
|
|
264
|
+
* @param heading - Section heading
|
|
265
|
+
* @param query - Search query
|
|
266
|
+
* @param documentPath - Path to the document
|
|
267
|
+
* @returns Combined boost value (0.0 to ~0.18 typically)
|
|
268
|
+
*/
|
|
269
|
+
export const calculateRankingBoost = (
|
|
270
|
+
heading: string,
|
|
271
|
+
query: string,
|
|
272
|
+
documentPath: string,
|
|
273
|
+
): number => {
|
|
274
|
+
const headingBoost = calculateHeadingBoost(heading, query)
|
|
275
|
+
const fileBoost = calculateFileImportanceBoost(documentPath)
|
|
276
|
+
return headingBoost + fileBoost
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// ============================================================================
|
|
280
|
+
// Query Preprocessing
|
|
281
|
+
// ============================================================================
|
|
282
|
+
|
|
283
|
+
/**
|
|
284
|
+
* Preprocess a search query before embedding to reduce noise and improve recall.
|
|
285
|
+
*
|
|
286
|
+
* Transformations applied:
|
|
287
|
+
* - Convert to lowercase (embeddings are case-insensitive)
|
|
288
|
+
* - Replace punctuation with spaces (preserves word boundaries)
|
|
289
|
+
* - Collapse multiple spaces to single space
|
|
290
|
+
* - Trim leading/trailing whitespace
|
|
291
|
+
*
|
|
292
|
+
* This provides 2-5% precision improvement for most queries.
|
|
293
|
+
*
|
|
294
|
+
* @param query - Raw search query
|
|
295
|
+
* @returns Normalized query string
|
|
296
|
+
*/
|
|
297
|
+
export const preprocessQuery = (query: string): string => {
|
|
298
|
+
return (
|
|
299
|
+
query
|
|
300
|
+
.toLowerCase()
|
|
301
|
+
// Replace punctuation with spaces (preserves word boundaries)
|
|
302
|
+
.replace(/[^\w\s]/g, ' ')
|
|
303
|
+
// Collapse multiple spaces
|
|
304
|
+
.replace(/\s+/g, ' ')
|
|
305
|
+
.trim()
|
|
306
|
+
)
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
export interface SemanticSearchResult {
|
|
310
|
+
readonly sectionId: string
|
|
311
|
+
readonly documentPath: string
|
|
312
|
+
readonly heading: string
|
|
313
|
+
readonly similarity: number
|
|
314
|
+
readonly content?: string | undefined
|
|
315
|
+
/** Context lines with their line numbers (when context is requested) */
|
|
316
|
+
readonly contextLines?: readonly ContextLine[] | undefined
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
export interface ContextLine {
|
|
320
|
+
/** The line number (1-based) */
|
|
321
|
+
readonly lineNumber: number
|
|
322
|
+
/** The line text */
|
|
323
|
+
readonly line: string
|
|
324
|
+
/**
|
|
325
|
+
* Whether this line is part of the matched result.
|
|
326
|
+
*
|
|
327
|
+
* - For keyword search: true when the line directly matches the query.
|
|
328
|
+
* - For semantic/hybrid search: true when the line lies within the
|
|
329
|
+
* selected/matched section span, even if it is not a direct text match.
|
|
330
|
+
*/
|
|
331
|
+
readonly isMatch: boolean
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* Extended semantic search result including metadata about below-threshold results.
|
|
336
|
+
* Used to provide user feedback when 0 results pass the threshold.
|
|
337
|
+
*/
|
|
338
|
+
export interface SemanticSearchResultWithStats {
|
|
339
|
+
readonly results: readonly SemanticSearchResult[]
|
|
340
|
+
/** Number of results found below threshold (only set when includeBelowThresholdStats is true) */
|
|
341
|
+
readonly belowThresholdCount?: number | undefined
|
|
342
|
+
/** Highest similarity among below-threshold results */
|
|
343
|
+
readonly belowThresholdHighest?: number | undefined
|
|
344
|
+
/** Total results available above threshold before limit was applied */
|
|
345
|
+
readonly totalAvailable?: number | undefined
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// ============================================================================
|
|
349
|
+
// Errors
|
|
350
|
+
// ============================================================================
|
|
351
|
+
// NOTE: Embedding-related errors are defined in src/errors/index.ts:
|
|
352
|
+
// - EmbeddingError: For embedding operation failures (rate limits, quota, network)
|
|
353
|
+
// - ApiKeyMissingError: For missing API keys
|
|
354
|
+
// - ApiKeyInvalidError: For invalid/rejected API keys
|
|
355
|
+
//
|
|
356
|
+
// Use these centralized error types instead of defining errors here.
|
|
357
|
+
// Example:
|
|
358
|
+
// import { EmbeddingError } from '../errors/index.js'
|
|
359
|
+
// new EmbeddingError({ reason: 'RateLimit', message: 'Rate limited' })
|