mdcontext 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/BACKLOG.md +338 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +434 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +88 -0
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +803 -0
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1629 -0
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +5458 -0
- package/dist/index.d.ts +653 -0
- package/dist/index.js +79 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +472 -0
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +625 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/CONFIG.md +1123 -0
- package/docs/DESIGN.md +439 -0
- package/docs/ERRORS.md +383 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/summarization.md +320 -0
- package/docs/test-links.md +9 -0
- package/justfile +40 -0
- package/package.json +74 -9
- package/pnpm-workspace.yaml +5 -0
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +58 -0
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +627 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +285 -0
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +480 -0
- package/src/cli/commands/index.ts +16 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +1281 -0
- package/src/cli/commands/stats.ts +149 -0
- package/src/cli/commands/tree.ts +128 -0
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +341 -0
- package/src/cli/help.ts +588 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +435 -0
- package/src/cli/options.ts +41 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +259 -0
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +113 -0
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +10 -0
- package/src/embeddings/openai-provider.ts +414 -0
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +1270 -0
- package/src/embeddings/types.ts +359 -0
- package/src/embeddings/vector-store.ts +708 -0
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +684 -0
- package/src/index/storage.ts +260 -0
- package/src/index/types.ts +147 -0
- package/src/index/watcher.ts +189 -0
- package/src/index.ts +30 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +612 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +394 -0
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +392 -0
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +280 -0
- package/src/search/searcher.ts +724 -0
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +597 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +16 -0
- package/vitest.setup.ts +12 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
# mdcontext Semantic Search: Current Implementation
|
|
2
|
+
|
|
3
|
+
This document describes the current semantic search implementation in mdcontext, covering architecture, components, data flow, and known limitations.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
mdcontext provides semantic search capabilities that allow users to search markdown documentation by meaning rather than exact text matching. The system uses OpenAI's text-embedding-3-small model to generate vector embeddings and HNSW (Hierarchical Navigable Small World) for approximate nearest neighbor search.
|
|
8
|
+
|
|
9
|
+
## Architecture
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
13
|
+
│ CLI Layer │
|
|
14
|
+
│ src/cli/commands/search.ts │
|
|
15
|
+
│ - Mode detection (semantic vs keyword) │
|
|
16
|
+
│ - Auto-index prompt for missing embeddings │
|
|
17
|
+
│ - Result formatting and display │
|
|
18
|
+
└──────────────────────────────┬──────────────────────────────────────────┘
|
|
19
|
+
│
|
|
20
|
+
┌──────────────────────────────▼──────────────────────────────────────────┐
|
|
21
|
+
│ Semantic Search Layer │
|
|
22
|
+
│ src/embeddings/semantic-search.ts │
|
|
23
|
+
│ - Cost estimation (estimateEmbeddingCost) │
|
|
24
|
+
│ - Embedding generation (buildEmbeddings) │
|
|
25
|
+
│ - Query execution (semanticSearch, semanticSearchWithContent) │
|
|
26
|
+
│ - Statistics (getEmbeddingStats) │
|
|
27
|
+
└─────────────┬─────────────────────────────────────┬─────────────────────┘
|
|
28
|
+
│ │
|
|
29
|
+
┌─────────────▼───────────────┐ ┌───────────────▼─────────────────────┐
|
|
30
|
+
│ Embedding Provider │ │ Vector Store │
|
|
31
|
+
│ src/embeddings/ │ │ src/embeddings/vector-store.ts │
|
|
32
|
+
│ openai-provider.ts │ │ - HNSW index (hnswlib-node) │
|
|
33
|
+
│ - OpenAI API integration │ │ - Cosine similarity search │
|
|
34
|
+
│ - text-embedding-3-small │ │ - Binary index persistence │
|
|
35
|
+
│ - Batch processing (100) │ │ - Metadata JSON storage │
|
|
36
|
+
└──────────────────────────────┘ └─────────────────────────────────────┘
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Components
|
|
40
|
+
|
|
41
|
+
### 1. Embedding Provider (`src/embeddings/openai-provider.ts`)
|
|
42
|
+
|
|
43
|
+
**Current Provider**: OpenAI `text-embedding-3-small`
|
|
44
|
+
|
|
45
|
+
| Property | Value |
|
|
46
|
+
| ---------- | ------------------------ |
|
|
47
|
+
| Model | `text-embedding-3-small` |
|
|
48
|
+
| Dimensions | 1536 |
|
|
49
|
+
| Batch Size | 100 texts per API call |
|
|
50
|
+
| Cost | $0.02 per 1M tokens |
|
|
51
|
+
|
|
52
|
+
**Interface**:
|
|
53
|
+
|
|
54
|
+
```typescript
|
|
55
|
+
interface EmbeddingProvider {
|
|
56
|
+
readonly name: string; // e.g., "openai:text-embedding-3-small"
|
|
57
|
+
readonly dimensions: number; // 1536 for small, 3072 for large
|
|
58
|
+
embed(texts: string[]): Promise<EmbeddingResult>;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
interface EmbeddingResult {
|
|
62
|
+
readonly embeddings: readonly number[][];
|
|
63
|
+
readonly tokensUsed: number;
|
|
64
|
+
readonly cost: number;
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
**Supported Models**:
|
|
69
|
+
|
|
70
|
+
- `text-embedding-3-small` (default): 1536 dimensions, $0.02/1M tokens
|
|
71
|
+
- `text-embedding-3-large`: 3072 dimensions, $0.13/1M tokens
|
|
72
|
+
- `text-embedding-ada-002` (legacy): 1536 dimensions, $0.10/1M tokens
|
|
73
|
+
|
|
74
|
+
### 2. Vector Store (`src/embeddings/vector-store.ts`)
|
|
75
|
+
|
|
76
|
+
**Implementation**: HNSW via `hnswlib-node`
|
|
77
|
+
|
|
78
|
+
| Parameter | Value | Description |
|
|
79
|
+
| ---------------- | -------- | -------------------------------------------- |
|
|
80
|
+
| Space | `cosine` | Cosine similarity distance metric |
|
|
81
|
+
| Initial Capacity | 10,000 | Auto-resizes by 2x when full |
|
|
82
|
+
| M | 16 | Max connections per node (default) |
|
|
83
|
+
| efConstruction | 200 | Construction-time search width |
|
|
84
|
+
| efSearch | 100 | Query-time search width (implicit from init) |
|
|
85
|
+
|
|
86
|
+
**Storage Format**:
|
|
87
|
+
|
|
88
|
+
- `vectors.bin`: Binary HNSW index file
|
|
89
|
+
- `vectors.meta.json`: Metadata including entries, costs, timestamps
|
|
90
|
+
|
|
91
|
+
**Vector Entry Structure**:
|
|
92
|
+
|
|
93
|
+
```typescript
|
|
94
|
+
interface VectorEntry {
|
|
95
|
+
readonly id: string; // Section ID
|
|
96
|
+
readonly sectionId: string; // Same as id
|
|
97
|
+
readonly documentPath: string; // Relative path to document
|
|
98
|
+
readonly heading: string; // Section heading text
|
|
99
|
+
readonly embedding: readonly number[]; // 1536-dimensional vector
|
|
100
|
+
}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
**Similarity Calculation**:
|
|
104
|
+
|
|
105
|
+
- HNSW stores cosine distance (1 - similarity)
|
|
106
|
+
- Search returns `similarity = 1 - distance`
|
|
107
|
+
- Results filtered by threshold (default: 0.35)
|
|
108
|
+
|
|
109
|
+
### 3. Semantic Search (`src/embeddings/semantic-search.ts`)
|
|
110
|
+
|
|
111
|
+
**Text Generation for Embeddings**:
|
|
112
|
+
|
|
113
|
+
Each section is embedded with contextual metadata:
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
# {heading}
|
|
117
|
+
Parent section: {parentHeading} // if nested
|
|
118
|
+
Document: {documentTitle}
|
|
119
|
+
|
|
120
|
+
{full section content}
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
**Filtering**:
|
|
124
|
+
|
|
125
|
+
- Sections with < 10 tokens are skipped
|
|
126
|
+
- Exclude patterns can filter by document path
|
|
127
|
+
|
|
128
|
+
**Search Flow**:
|
|
129
|
+
|
|
130
|
+
1. Load vector store from disk
|
|
131
|
+
2. Embed query using same provider
|
|
132
|
+
3. kNN search with limit \* 2 (over-fetch for filtering)
|
|
133
|
+
4. Apply path pattern filter if specified
|
|
134
|
+
5. Return top `limit` results above threshold
|
|
135
|
+
|
|
136
|
+
### 4. CLI Search Command (`src/cli/commands/search.ts`)
|
|
137
|
+
|
|
138
|
+
**Mode Detection Priority**:
|
|
139
|
+
|
|
140
|
+
1. `--mode semantic` or `--mode keyword` (explicit)
|
|
141
|
+
2. `--keyword` flag (force keyword)
|
|
142
|
+
3. Boolean/phrase pattern detected (`AND`, `OR`, `NOT`, `"quoted"`)
|
|
143
|
+
4. Regex pattern detected (special characters)
|
|
144
|
+
5. Embeddings available → semantic
|
|
145
|
+
6. No embeddings → keyword
|
|
146
|
+
|
|
147
|
+
**Auto-Index Behavior**:
|
|
148
|
+
|
|
149
|
+
- If semantic mode requested but no embeddings exist:
|
|
150
|
+
- Estimate time/cost
|
|
151
|
+
- If < 10 seconds: auto-create silently
|
|
152
|
+
- Otherwise: prompt user for choice
|
|
153
|
+
|
|
154
|
+
**Default Search Threshold**: 0.35 (raised from 0.3 to filter low-quality matches)
|
|
155
|
+
|
|
156
|
+
## Data Flow
|
|
157
|
+
|
|
158
|
+
### Building Embeddings
|
|
159
|
+
|
|
160
|
+
```
|
|
161
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
162
|
+
│ 1. Load Indexes │
|
|
163
|
+
│ - documents.json (document metadata) │
|
|
164
|
+
│ - sections.json (section index with line numbers) │
|
|
165
|
+
└──────────────────────────────┬──────────────────────────────────────────┘
|
|
166
|
+
▼
|
|
167
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
168
|
+
│ 2. Group Sections by Document │
|
|
169
|
+
│ - Skip sections < 10 tokens │
|
|
170
|
+
│ - Apply exclude patterns │
|
|
171
|
+
│ - Track parent headings for context │
|
|
172
|
+
└──────────────────────────────┬──────────────────────────────────────────┘
|
|
173
|
+
▼
|
|
174
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
175
|
+
│ 3. Read File Content │
|
|
176
|
+
│ - For each document, read file │
|
|
177
|
+
│ - Extract section content by line numbers │
|
|
178
|
+
│ - Generate embedding text with metadata │
|
|
179
|
+
└──────────────────────────────┬──────────────────────────────────────────┘
|
|
180
|
+
▼
|
|
181
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
182
|
+
│ 4. Generate Embeddings │
|
|
183
|
+
│ - Send all texts to OpenAI API (batched by 100) │
|
|
184
|
+
│ - Track token usage and cost │
|
|
185
|
+
└──────────────────────────────┬──────────────────────────────────────────┘
|
|
186
|
+
▼
|
|
187
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
188
|
+
│ 5. Build HNSW Index │
|
|
189
|
+
│ - Add vectors with sequential integer IDs │
|
|
190
|
+
│ - Map section IDs to index positions │
|
|
191
|
+
└──────────────────────────────┬──────────────────────────────────────────┘
|
|
192
|
+
▼
|
|
193
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
194
|
+
│ 6. Persist to Disk │
|
|
195
|
+
│ - vectors.bin (HNSW binary) │
|
|
196
|
+
│ - vectors.meta.json (metadata + entries) │
|
|
197
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Query Execution
|
|
201
|
+
|
|
202
|
+
```
|
|
203
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
204
|
+
│ 1. Query Input │
|
|
205
|
+
│ "How do I configure authentication?" │
|
|
206
|
+
└──────────────────────────────┬──────────────────────────────────────────┘
|
|
207
|
+
▼
|
|
208
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
209
|
+
│ 2. Load Vector Store │
|
|
210
|
+
│ - Read vectors.bin into HNSW index │
|
|
211
|
+
│ - Load metadata from vectors.meta.json │
|
|
212
|
+
└──────────────────────────────┬──────────────────────────────────────────┘
|
|
213
|
+
▼
|
|
214
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
215
|
+
│ 3. Embed Query │
|
|
216
|
+
│ - Single API call to OpenAI │
|
|
217
|
+
│ - Returns 1536-dim vector │
|
|
218
|
+
└──────────────────────────────┬──────────────────────────────────────────┘
|
|
219
|
+
▼
|
|
220
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
221
|
+
│ 4. HNSW kNN Search │
|
|
222
|
+
│ - Find k nearest neighbors (cosine similarity) │
|
|
223
|
+
│ - Over-fetch: request limit * 2 │
|
|
224
|
+
└──────────────────────────────┬──────────────────────────────────────────┘
|
|
225
|
+
▼
|
|
226
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
227
|
+
│ 5. Post-Processing │
|
|
228
|
+
│ - Filter by similarity threshold (default: 0.35) │
|
|
229
|
+
│ - Filter by path pattern (if specified) │
|
|
230
|
+
│ - Truncate to requested limit │
|
|
231
|
+
└──────────────────────────────┬──────────────────────────────────────────┘
|
|
232
|
+
▼
|
|
233
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
234
|
+
│ 6. Return Results │
|
|
235
|
+
│ [{sectionId, documentPath, heading, similarity}, ...] │
|
|
236
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## Storage Files
|
|
240
|
+
|
|
241
|
+
Located in `.mdcontext/` directory:
|
|
242
|
+
|
|
243
|
+
| File | Format | Contents |
|
|
244
|
+
| ------------------- | ------ | ---------------------------------------------- |
|
|
245
|
+
| `vectors.bin` | Binary | HNSW index (hnswlib native format) |
|
|
246
|
+
| `vectors.meta.json` | JSON | Entry metadata, costs, timestamps |
|
|
247
|
+
| `documents.json` | JSON | Document index (title, path, stats) |
|
|
248
|
+
| `sections.json` | JSON | Section index (headings, line numbers, tokens) |
|
|
249
|
+
|
|
250
|
+
## Current Limitations and Gaps
|
|
251
|
+
|
|
252
|
+
### 1. ~~Single Provider Lock-in~~ RESOLVED (ALP-215)
|
|
253
|
+
|
|
254
|
+
- **RESOLVED**: Multiple embedding providers now supported (OpenAI, Ollama, LM Studio, OpenRouter)
|
|
255
|
+
- **Impact**: Users can choose local providers for offline capability and cost savings
|
|
256
|
+
- **Code Location**: `provider-factory.ts` handles provider selection based on config
|
|
257
|
+
|
|
258
|
+
### 2. No Incremental Updates
|
|
259
|
+
|
|
260
|
+
- **Issue**: `buildEmbeddings` either builds all or skips entirely
|
|
261
|
+
- **Impact**: Adding one document requires re-embedding everything (with `--force`)
|
|
262
|
+
- **Workaround**: Cache hit detection skips if any embeddings exist
|
|
263
|
+
|
|
264
|
+
### 3. Fixed HNSW Parameters
|
|
265
|
+
|
|
266
|
+
- **Issue**: HNSW parameters (M=16, efConstruction=200) are hardcoded
|
|
267
|
+
- **Impact**: No tuning for different corpus sizes or quality/speed tradeoffs
|
|
268
|
+
- **Code Location**: `vector-store.ts:94`
|
|
269
|
+
|
|
270
|
+
### 4. No Hybrid Search
|
|
271
|
+
|
|
272
|
+
- **Issue**: Semantic and keyword search are mutually exclusive
|
|
273
|
+
- **Impact**: Can't combine exact matches with semantic similarity
|
|
274
|
+
- **Workaround**: Mode auto-detection helps, but no fusion ranking
|
|
275
|
+
|
|
276
|
+
### 5. No Re-ranking
|
|
277
|
+
|
|
278
|
+
- **Issue**: Results are pure cosine similarity, no re-ranking
|
|
279
|
+
- **Impact**: May miss contextually relevant results that rank lower in embedding space
|
|
280
|
+
- **Alternative**: Cross-encoder re-ranking could improve precision
|
|
281
|
+
|
|
282
|
+
### 6. Section-Level Granularity Only
|
|
283
|
+
|
|
284
|
+
- **Issue**: Embeddings are per-section, no paragraph or sentence chunking
|
|
285
|
+
- **Impact**: Large sections may have diluted embeddings; queries may match subsections better
|
|
286
|
+
- **Tradeoff**: Current approach preserves document structure
|
|
287
|
+
|
|
288
|
+
### 7. No Query Expansion
|
|
289
|
+
|
|
290
|
+
- **Issue**: Queries are embedded as-is
|
|
291
|
+
- **Impact**: Synonyms, abbreviations, and related terms may not match
|
|
292
|
+
- **Opportunity**: HyDE or query reformulation could help
|
|
293
|
+
|
|
294
|
+
### 8. Limited Metadata Filtering
|
|
295
|
+
|
|
296
|
+
- **Issue**: Only path pattern filtering supported
|
|
297
|
+
- **Impact**: Can't filter by date, author, tags, or other metadata
|
|
298
|
+
- **Code Location**: `semanticSearch` has `pathPattern` option only
|
|
299
|
+
|
|
300
|
+
### 9. No Batch Query Support
|
|
301
|
+
|
|
302
|
+
- **Issue**: Each search embeds query individually
|
|
303
|
+
- **Impact**: Multiple searches incur repeated API calls
|
|
304
|
+
- **Opportunity**: Query batching could reduce latency
|
|
305
|
+
|
|
306
|
+
### 10. Memory Usage
|
|
307
|
+
|
|
308
|
+
- **Issue**: Entire HNSW index loaded into memory
|
|
309
|
+
- **Impact**: Large corpora may hit memory limits
|
|
310
|
+
- **Note**: Not a problem for typical documentation sizes
|
|
311
|
+
|
|
312
|
+
## Cost Analysis
|
|
313
|
+
|
|
314
|
+
For a typical documentation corpus (~1000 sections, ~500K tokens):
|
|
315
|
+
|
|
316
|
+
| Operation | Tokens | Cost |
|
|
317
|
+
| ----------------- | ------- | ---------- |
|
|
318
|
+
| Initial embedding | ~500K | ~$0.01 |
|
|
319
|
+
| Per query | ~50-100 | ~$0.000002 |
|
|
320
|
+
|
|
321
|
+
The cost is dominated by initial embedding creation. Query costs are negligible.
|
|
322
|
+
|
|
323
|
+
## Performance Characteristics
|
|
324
|
+
|
|
325
|
+
| Metric | Typical Value |
|
|
326
|
+
| --------------- | ------------------------------ |
|
|
327
|
+
| Embedding build | ~1.5s per 100 sections |
|
|
328
|
+
| Query latency | ~200-500ms (API call dominant) |
|
|
329
|
+
| Index load time | ~50-100ms for 1000 vectors |
|
|
330
|
+
| Memory usage | ~10MB per 1000 vectors |
|
|
331
|
+
|
|
332
|
+
## Configuration
|
|
333
|
+
|
|
334
|
+
Current configuration is largely hardcoded. Key values:
|
|
335
|
+
|
|
336
|
+
```typescript
|
|
337
|
+
// Embedding
|
|
338
|
+
model: 'text-embedding-3-small' // openai-provider.ts
|
|
339
|
+
batchSize: 100 // openai-provider.ts
|
|
340
|
+
minTokens: 10 // semantic-search.ts (skip small sections)
|
|
341
|
+
|
|
342
|
+
// Vector store
|
|
343
|
+
space: 'cosine' // vector-store.ts
|
|
344
|
+
initialCapacity: 10000 // vector-store.ts
|
|
345
|
+
M: 16 // vector-store.ts
|
|
346
|
+
efConstruction: 200 // vector-store.ts
|
|
347
|
+
|
|
348
|
+
// Search
|
|
349
|
+
defaultLimit: 10 // search.ts
|
|
350
|
+
defaultThreshold: 0.35 // search.ts
|
|
351
|
+
autoIndexThreshold: 10 seconds // search.ts
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
## Type Definitions
|
|
355
|
+
|
|
356
|
+
Full type definitions are in `src/embeddings/types.ts`:
|
|
357
|
+
|
|
358
|
+
- `EmbeddingProvider`: Provider interface
|
|
359
|
+
- `EmbeddingResult`: Embed response
|
|
360
|
+
- `VectorEntry`: Stored vector with metadata
|
|
361
|
+
- `VectorIndex`: Full index schema
|
|
362
|
+
- `SemanticSearchOptions`: Search parameters
|
|
363
|
+
- `SemanticSearchResult`: Search result item
|
|
364
|
+
- `EmbedError`: Error types (RateLimit, ApiKey, Network, Unknown)
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# mdcontext Dogfooding Findings
|
|
2
|
+
|
|
3
|
+
**Date:** 2025-01-19
|
|
4
|
+
**Method:** 6 autonomous agents explored documentation directories using only mdcontext CLI
|
|
5
|
+
**Target directories:** `./docs`, `./doc.llm`, `./docs.amorphic` (in `/Users/alphab/Dev/LLM/DEV/TMP/ralph`)
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Executive Summary
|
|
10
|
+
|
|
11
|
+
**Verdict: YES - mdcontext is useful**, with the `context` command being the standout feature delivering 80-99% token reduction. However, several issues limit the tool's effectiveness for discovery workflows.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## What Works Well
|
|
16
|
+
|
|
17
|
+
### 1. Context Command (Killer Feature)
|
|
18
|
+
|
|
19
|
+
- Consistently achieved 80-99% token reduction across all test directories
|
|
20
|
+
- Multi-file assembly with budget allocation works as designed
|
|
21
|
+
- `--brief` mode effective for quick overviews
|
|
22
|
+
- JSON output useful for programmatic consumption
|
|
23
|
+
|
|
24
|
+
### 2. Tree Command
|
|
25
|
+
|
|
26
|
+
- Excellent for initial codebase discovery
|
|
27
|
+
- Clean hierarchical output
|
|
28
|
+
- Good starting point before diving into specific files
|
|
29
|
+
|
|
30
|
+
### 3. Help System
|
|
31
|
+
|
|
32
|
+
- Well-polished with examples
|
|
33
|
+
- Subcommand help (`mdcontext context --help`) informative
|
|
34
|
+
- Agents successfully learned the tool from `--help` alone
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Issues Found
|
|
39
|
+
|
|
40
|
+
### Critical: `index` Command Returns "0 Documents"
|
|
41
|
+
|
|
42
|
+
**Symptom:**
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
mdcontext index ./docs
|
|
46
|
+
# Output: "Indexed: 0 documents"
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
**Expected:** Should index markdown files in the directory.
|
|
50
|
+
|
|
51
|
+
**Impact:** Breaks the discovery workflow. Users can't find content without working index.
|
|
52
|
+
|
|
53
|
+
**Frequency:** Inconsistent - sometimes works, sometimes doesn't.
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
### High: `search` Only Matches Headings
|
|
58
|
+
|
|
59
|
+
**Symptom:**
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
mdcontext search "authentication" ./docs
|
|
63
|
+
# Returns: Only matches if "authentication" appears in a heading
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
**Expected:** Search should find content anywhere in documents.
|
|
67
|
+
|
|
68
|
+
**Impact:** Major limitation for discovery. Users searching for concepts/keywords get no results if those words aren't in headings.
|
|
69
|
+
|
|
70
|
+
**Note:** Full semantic search requires an embedding provider (OpenAI, Ollama, LM Studio, or OpenRouter) + `--embed` flag. See [CONFIG.md](./CONFIG.md) for free local options. Structural search should at least search content.
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
### Medium: Token Budget Sometimes Exceeded
|
|
75
|
+
|
|
76
|
+
**Symptom:**
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
mdcontext context --tokens 500 --brief file.md
|
|
80
|
+
# Output may exceed 500 tokens
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**Expected:** Output should respect token budget.
|
|
84
|
+
|
|
85
|
+
**Impact:** Unpredictable context sizes when assembling for LLM consumption.
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
### Low: `stats` Command Minimal Without Embeddings
|
|
90
|
+
|
|
91
|
+
**Symptom:**
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
mdcontext stats ./docs
|
|
95
|
+
# Shows basic counts only
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**Impact:** Limited usefulness without embeddings enabled.
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## Recommendations
|
|
103
|
+
|
|
104
|
+
### P0 - Fix Index Command
|
|
105
|
+
|
|
106
|
+
The "0 documents" issue breaks the primary discovery workflow. Investigate:
|
|
107
|
+
|
|
108
|
+
- Directory path resolution
|
|
109
|
+
- File extension filtering
|
|
110
|
+
- Silent failures in indexing process
|
|
111
|
+
|
|
112
|
+
### P1 - Expand Structural Search
|
|
113
|
+
|
|
114
|
+
Make structural search (`--structural` or default without embeddings) search document content, not just headings:
|
|
115
|
+
|
|
116
|
+
- Full-text regex matching
|
|
117
|
+
- Content snippet in results
|
|
118
|
+
- Line number references
|
|
119
|
+
|
|
120
|
+
### P2 - Enforce Token Budgets
|
|
121
|
+
|
|
122
|
+
Ensure `--tokens` flag is respected:
|
|
123
|
+
|
|
124
|
+
- Truncate output if necessary
|
|
125
|
+
- Warn user if content exceeds budget
|
|
126
|
+
- Consider separate flags for hard vs soft limits
|
|
127
|
+
|
|
128
|
+
### P3 - Improve Stats Without Embeddings
|
|
129
|
+
|
|
130
|
+
Show useful stats even without embeddings:
|
|
131
|
+
|
|
132
|
+
- Document count, total tokens, avg tokens/doc
|
|
133
|
+
- Section depth analysis
|
|
134
|
+
- File size distribution
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Test Matrix
|
|
139
|
+
|
|
140
|
+
| Agent | Directory | Commands Used | Verdict |
|
|
141
|
+
| ------- | --------------- | ---------------------- | ------------------------------- |
|
|
142
|
+
| a3caa1b | ./docs | tree, context, search | YES with caveats |
|
|
143
|
+
| a199309 | ./docs | tree, context, index | YES - context justifies tool |
|
|
144
|
+
| a7857e0 | ./docs | help, context, tree | YES - direction is good |
|
|
145
|
+
| a71de5c | ./doc.llm | index, search, context | Partially - search/index issues |
|
|
146
|
+
| a4ec1e1 | ./docs (ralph) | tree, context, stats | YES - solves real problem |
|
|
147
|
+
| a96ff96 | ./docs.amorphic | tree, context, index | YES - context is valuable |
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Raw Findings Summary
|
|
152
|
+
|
|
153
|
+
### What Agents Tried That Failed
|
|
154
|
+
|
|
155
|
+
1. `mdcontext index <dir>` → "0 documents"
|
|
156
|
+
2. `mdcontext search "keyword" <dir>` → No results (keyword in content, not heading)
|
|
157
|
+
3. `mdcontext stats <dir>` → Minimal output without embeddings
|
|
158
|
+
|
|
159
|
+
### What Agents Found Valuable
|
|
160
|
+
|
|
161
|
+
1. `mdcontext context --brief file.md` → Instant useful summary
|
|
162
|
+
2. `mdcontext tree <dir>` → Quick structure overview
|
|
163
|
+
3. `mdcontext context file1.md file2.md --tokens 1000` → Multi-file assembly
|
|
164
|
+
4. `mdcontext --help` / `mdcontext <cmd> --help` → Self-discovery worked
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
## Suggested Task Scope
|
|
169
|
+
|
|
170
|
+
A follow-up task should address:
|
|
171
|
+
|
|
172
|
+
1. **Index reliability** - Debug why index returns 0 documents
|
|
173
|
+
2. **Content search** - Extend structural search beyond headings
|
|
174
|
+
3. **Budget enforcement** - Ensure token limits are respected
|
|
175
|
+
4. **Error messages** - Surface why operations "silently fail"
|
package/docs/BACKLOG.md
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Backlog
|
|
2
|
+
|
|
3
|
+
Ideas and improvements to revisit later.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## CLI: Schema-Based Argv Preprocessor
|
|
8
|
+
|
|
9
|
+
**Date:** 2025-01-19
|
|
10
|
+
**Priority:** Medium
|
|
11
|
+
**Context:** Current `argv-preprocessor.ts` uses hardcoded flag lists which breaks on unknown flags.
|
|
12
|
+
|
|
13
|
+
### Problem
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
mdcontext context --json docs/*.md --pretty -x 200
|
|
17
|
+
# Error: ENOENT: no such file or directory, open '.../-x'
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Unknown flags like `-x` get passed through as positional args (file paths) instead of being rejected with a clear error.
|
|
21
|
+
|
|
22
|
+
### Proposed Solution
|
|
23
|
+
|
|
24
|
+
Replace hardcoded `flagsWithValues` set with a schema-based approach:
|
|
25
|
+
|
|
26
|
+
```typescript
|
|
27
|
+
interface FlagSpec {
|
|
28
|
+
type: "boolean" | "string";
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const schema: Record<string, FlagSpec> = {
|
|
32
|
+
"--json": { type: "boolean" },
|
|
33
|
+
"--output": { type: "string" },
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
function parse(argv: string[], schema: Record<string, FlagSpec>) {
|
|
37
|
+
const options: Record<string, any> = {};
|
|
38
|
+
const positionals: string[] = [];
|
|
39
|
+
|
|
40
|
+
for (let i = 0; i < argv.length; i++) {
|
|
41
|
+
const arg = argv[i];
|
|
42
|
+
if (arg.startsWith("-")) {
|
|
43
|
+
const spec = schema[arg];
|
|
44
|
+
if (!spec) throw new Error(`Unknown option: ${arg}`);
|
|
45
|
+
if (spec.type === "boolean") {
|
|
46
|
+
options[arg] = true;
|
|
47
|
+
} else {
|
|
48
|
+
const value = argv[i + 1];
|
|
49
|
+
if (!value || value.startsWith("-")) {
|
|
50
|
+
throw new Error(`Missing value for option: ${arg}`);
|
|
51
|
+
}
|
|
52
|
+
options[arg] = value;
|
|
53
|
+
i++;
|
|
54
|
+
}
|
|
55
|
+
} else {
|
|
56
|
+
positionals.push(arg);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return { options, positionals };
|
|
60
|
+
}
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Benefits
|
|
64
|
+
|
|
65
|
+
1. **Clear errors** - "Unknown option: -x" instead of cryptic file errors
|
|
66
|
+
2. **Single source of truth** - Schema can align with Effect CLI definitions
|
|
67
|
+
3. **Per-command schemas** - Each command declares its own flags
|
|
68
|
+
4. **Maintainable** - Adding flags = adding to schema, not hunting through code
|
|
69
|
+
|
|
70
|
+
### Implementation Notes
|
|
71
|
+
|
|
72
|
+
- Could extract schema from existing Effect CLI option definitions
|
|
73
|
+
- Or define shared schema that both preprocessor and CLI use
|
|
74
|
+
- Consider generating schema from CLI definitions at build time
|
|
75
|
+
|
|
76
|
+
### Related Files
|
|
77
|
+
|
|
78
|
+
- `src/cli/argv-preprocessor.ts` - Current implementation
|
|
79
|
+
- `src/cli/commands/*.ts` - Effect CLI command definitions
|
|
80
|
+
- `src/cli/options.ts` - Shared options
|