mdcontext 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.claude/settings.local.json +25 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/claude-code-review.yml +44 -0
- package/.github/workflows/claude.yml +85 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/BACKLOG.md +338 -0
- package/CONTRIBUTING.md +186 -0
- package/NOTES/NOTES +44 -0
- package/README.md +434 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-23UPXDNL.js +3044 -0
- package/dist/chunk-2W7MO2DL.js +1366 -0
- package/dist/chunk-3NUAZGMA.js +1689 -0
- package/dist/chunk-7TOWB2XB.js +366 -0
- package/dist/chunk-7XOTOADQ.js +3065 -0
- package/dist/chunk-AH2PDM2K.js +3042 -0
- package/dist/chunk-BNXWSZ63.js +3742 -0
- package/dist/chunk-BTL5DJVU.js +3222 -0
- package/dist/chunk-HDHYG7E4.js +104 -0
- package/dist/chunk-HLR4KZBP.js +3234 -0
- package/dist/chunk-IP3FRFEB.js +1045 -0
- package/dist/chunk-KHU56VDO.js +3042 -0
- package/dist/chunk-KRYIFLQR.js +88 -0
- package/dist/chunk-LBSDNLEM.js +287 -0
- package/dist/chunk-MNTQ7HCP.js +2643 -0
- package/dist/chunk-MUJELQQ6.js +1387 -0
- package/dist/chunk-MXJGMSLV.js +2199 -0
- package/dist/chunk-N6QJGC3Z.js +2636 -0
- package/dist/chunk-OBELGBPM.js +1713 -0
- package/dist/chunk-OT7R5XTA.js +3192 -0
- package/dist/chunk-P7X4RA2T.js +106 -0
- package/dist/chunk-PIDUQNC2.js +3185 -0
- package/dist/chunk-POGCDIH4.js +3187 -0
- package/dist/chunk-PSIEOQGZ.js +3043 -0
- package/dist/chunk-PVRT3IHA.js +3238 -0
- package/dist/chunk-QNN4TT23.js +1430 -0
- package/dist/chunk-RE3R45RJ.js +3042 -0
- package/dist/chunk-S7E6TFX6.js +803 -0
- package/dist/chunk-SG6GLU4U.js +1378 -0
- package/dist/chunk-SJCDV2ST.js +274 -0
- package/dist/chunk-SYE5XLF3.js +104 -0
- package/dist/chunk-T5VLYBZD.js +103 -0
- package/dist/chunk-TOQB7VWU.js +3238 -0
- package/dist/chunk-VFNMZ4ZQ.js +3228 -0
- package/dist/chunk-VVTGZNBT.js +1629 -0
- package/dist/chunk-W7Q4RFEV.js +104 -0
- package/dist/chunk-XTYYVRLO.js +3190 -0
- package/dist/chunk-Y6MDYVJD.js +3063 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +5458 -0
- package/dist/index.d.ts +653 -0
- package/dist/index.js +79 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +472 -0
- package/dist/schema-BAWSG7KY.js +22 -0
- package/dist/schema-E3QUPL26.js +20 -0
- package/dist/schema-EHL7WUT6.js +20 -0
- package/docs/019-USAGE.md +625 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/CONFIG.md +1123 -0
- package/docs/DESIGN.md +439 -0
- package/docs/ERRORS.md +383 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/summarization.md +320 -0
- package/docs/test-links.md +9 -0
- package/justfile +40 -0
- package/package.json +74 -9
- package/pnpm-workspace.yaml +5 -0
- package/research/INDEX.md +315 -0
- package/research/code-review/README.md +90 -0
- package/research/code-review/cli-error-handling-review.md +979 -0
- package/research/code-review/code-review-validation-report.md +464 -0
- package/research/code-review/main-ts-review.md +1128 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/config-docs/SUMMARY.md +357 -0
- package/research/config-docs/TEST-RESULTS.md +776 -0
- package/research/config-docs/TODO.md +542 -0
- package/research/config-docs/analysis.md +744 -0
- package/research/config-docs/fix-validation.md +502 -0
- package/research/config-docs/help-audit.md +264 -0
- package/research/config-docs/help-system-analysis.md +890 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
- package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
- package/research/issue-review.md +603 -0
- package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
- package/research/llm-summarization/alternative-providers-2026.md +1428 -0
- package/research/llm-summarization/anthropic-2026.md +367 -0
- package/research/llm-summarization/claude-cli-integration.md +1706 -0
- package/research/llm-summarization/cli-integration-patterns.md +3155 -0
- package/research/llm-summarization/openai-2026.md +473 -0
- package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
- package/research/llm-summarization/opencode-cli-integration.md +1552 -0
- package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
- package/research/llm-summarization/prototype-results.md +56 -0
- package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
- package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
- package/research/mdcontext-pudding/01-index-embed.md +956 -0
- package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
- package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
- package/research/mdcontext-pudding/02-search.md +970 -0
- package/research/mdcontext-pudding/03-context.md +779 -0
- package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
- package/research/mdcontext-pudding/04-tree.md +704 -0
- package/research/mdcontext-pudding/05-config.md +1038 -0
- package/research/mdcontext-pudding/06-links-summary.txt +87 -0
- package/research/mdcontext-pudding/06-links.md +679 -0
- package/research/mdcontext-pudding/07-stats.md +693 -0
- package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
- package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
- package/research/mdcontext-pudding/README.md +168 -0
- package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/research-quality-review.md +834 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/semantic-search/embedding-text-analysis.md +156 -0
- package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
- package/research/semantic-search/query-processing-analysis.md +207 -0
- package/research/semantic-search/root-cause-and-solution.md +114 -0
- package/research/semantic-search/threshold-validation-report.md +69 -0
- package/research/semantic-search/vector-search-analysis.md +63 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/research/test-path-issues.md +276 -0
- package/review/ALP-76/1-error-type-design.md +962 -0
- package/review/ALP-76/2-error-handling-patterns.md +906 -0
- package/review/ALP-76/3-error-presentation.md +624 -0
- package/review/ALP-76/4-test-coverage.md +625 -0
- package/review/ALP-76/5-migration-completeness.md +440 -0
- package/review/ALP-76/6-effect-best-practices.md +755 -0
- package/scripts/apply-branch-protection.sh +47 -0
- package/scripts/branch-protection-templates.json +79 -0
- package/scripts/prototype-summarization.ts +346 -0
- package/scripts/rebuild-hnswlib.js +58 -0
- package/scripts/setup-branch-protection.sh +64 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
- package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +627 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/config-cmd.ts +642 -0
- package/src/cli/commands/context.ts +285 -0
- package/src/cli/commands/duplicates.ts +122 -0
- package/src/cli/commands/embeddings.ts +529 -0
- package/src/cli/commands/index-cmd.ts +480 -0
- package/src/cli/commands/index.ts +16 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +1281 -0
- package/src/cli/commands/stats.ts +149 -0
- package/src/cli/commands/tree.ts +128 -0
- package/src/cli/config-layer.ts +176 -0
- package/src/cli/error-handler.test.ts +235 -0
- package/src/cli/error-handler.ts +655 -0
- package/src/cli/flag-schemas.ts +341 -0
- package/src/cli/help.ts +588 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +435 -0
- package/src/cli/options.ts +41 -0
- package/src/cli/shared-error-handling.ts +199 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +259 -0
- package/src/config/file-provider.test.ts +320 -0
- package/src/config/file-provider.ts +273 -0
- package/src/config/index.ts +72 -0
- package/src/config/integration.test.ts +667 -0
- package/src/config/precedence.test.ts +277 -0
- package/src/config/precedence.ts +451 -0
- package/src/config/schema.test.ts +414 -0
- package/src/config/schema.ts +603 -0
- package/src/config/service.test.ts +320 -0
- package/src/config/service.ts +243 -0
- package/src/config/testing.test.ts +264 -0
- package/src/config/testing.ts +110 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +113 -0
- package/src/duplicates/detector.test.ts +183 -0
- package/src/duplicates/detector.ts +414 -0
- package/src/duplicates/index.ts +18 -0
- package/src/embeddings/embedding-namespace.test.ts +300 -0
- package/src/embeddings/embedding-namespace.ts +947 -0
- package/src/embeddings/heading-boost.test.ts +222 -0
- package/src/embeddings/hnsw-build-options.test.ts +198 -0
- package/src/embeddings/hyde.test.ts +272 -0
- package/src/embeddings/hyde.ts +264 -0
- package/src/embeddings/index.ts +10 -0
- package/src/embeddings/openai-provider.ts +414 -0
- package/src/embeddings/pricing.json +22 -0
- package/src/embeddings/provider-constants.ts +204 -0
- package/src/embeddings/provider-errors.test.ts +967 -0
- package/src/embeddings/provider-errors.ts +565 -0
- package/src/embeddings/provider-factory.test.ts +240 -0
- package/src/embeddings/provider-factory.ts +225 -0
- package/src/embeddings/provider-integration.test.ts +788 -0
- package/src/embeddings/query-preprocessing.test.ts +187 -0
- package/src/embeddings/semantic-search-threshold.test.ts +508 -0
- package/src/embeddings/semantic-search.ts +1270 -0
- package/src/embeddings/types.ts +359 -0
- package/src/embeddings/vector-store.ts +708 -0
- package/src/embeddings/voyage-provider.ts +313 -0
- package/src/errors/errors.test.ts +845 -0
- package/src/errors/index.ts +533 -0
- package/src/index/ignore-patterns.test.ts +354 -0
- package/src/index/ignore-patterns.ts +305 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +684 -0
- package/src/index/storage.ts +260 -0
- package/src/index/types.ts +147 -0
- package/src/index/watcher.ts +189 -0
- package/src/index.ts +30 -0
- package/src/integration/search-keyword.test.ts +678 -0
- package/src/mcp/server.ts +612 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +394 -0
- package/src/parser/section-filter.test.ts +277 -0
- package/src/parser/section-filter.ts +392 -0
- package/src/search/__tests__/hybrid-search.test.ts +650 -0
- package/src/search/bm25-store.ts +366 -0
- package/src/search/cross-encoder.test.ts +253 -0
- package/src/search/cross-encoder.ts +406 -0
- package/src/search/fuzzy-search.test.ts +419 -0
- package/src/search/fuzzy-search.ts +273 -0
- package/src/search/hybrid-search.ts +448 -0
- package/src/search/path-matcher.test.ts +276 -0
- package/src/search/path-matcher.ts +33 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +280 -0
- package/src/search/searcher.ts +724 -0
- package/src/search/wink-bm25.d.ts +30 -0
- package/src/summarization/cli-providers/claude.ts +202 -0
- package/src/summarization/cli-providers/detection.test.ts +273 -0
- package/src/summarization/cli-providers/detection.ts +118 -0
- package/src/summarization/cli-providers/index.ts +8 -0
- package/src/summarization/cost.test.ts +139 -0
- package/src/summarization/cost.ts +102 -0
- package/src/summarization/error-handler.test.ts +127 -0
- package/src/summarization/error-handler.ts +111 -0
- package/src/summarization/index.ts +102 -0
- package/src/summarization/pipeline.test.ts +498 -0
- package/src/summarization/pipeline.ts +231 -0
- package/src/summarization/prompts.test.ts +269 -0
- package/src/summarization/prompts.ts +133 -0
- package/src/summarization/provider-factory.test.ts +396 -0
- package/src/summarization/provider-factory.ts +178 -0
- package/src/summarization/types.ts +184 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +597 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/types/huggingface-transformers.d.ts +66 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +247 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tests/integration/embed-index.test.ts +712 -0
- package/tests/integration/search-context.test.ts +469 -0
- package/tests/integration/search-semantic.test.ts +522 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +16 -0
- package/vitest.setup.ts +12 -0
package/docs/DESIGN.md
ADDED
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
# Design: @hw/mdcontext
|
|
2
|
+
|
|
3
|
+
## Data Model
|
|
4
|
+
|
|
5
|
+
### Document
|
|
6
|
+
|
|
7
|
+
```typescript
|
|
8
|
+
interface MdDocument {
|
|
9
|
+
readonly id: string; // hash of path
|
|
10
|
+
readonly path: string; // relative to root
|
|
11
|
+
readonly title: string; // first H1 or filename
|
|
12
|
+
readonly frontmatter: Record<string, unknown>;
|
|
13
|
+
readonly sections: readonly MdSection[];
|
|
14
|
+
readonly links: readonly MdLink[];
|
|
15
|
+
readonly codeBlocks: readonly MdCodeBlock[];
|
|
16
|
+
readonly metadata: DocumentMetadata;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
interface DocumentMetadata {
|
|
20
|
+
readonly wordCount: number;
|
|
21
|
+
readonly tokenCount: number; // estimated
|
|
22
|
+
readonly headingCount: number;
|
|
23
|
+
readonly linkCount: number;
|
|
24
|
+
readonly codeBlockCount: number;
|
|
25
|
+
readonly lastModified: Date;
|
|
26
|
+
readonly indexedAt: Date;
|
|
27
|
+
}
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### Section
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
interface MdSection {
|
|
34
|
+
readonly id: string; // doc-id + heading slug
|
|
35
|
+
readonly heading: string; // heading text
|
|
36
|
+
readonly level: 1 | 2 | 3 | 4 | 5 | 6;
|
|
37
|
+
readonly content: string; // raw markdown content
|
|
38
|
+
readonly plainText: string; // stripped for embedding
|
|
39
|
+
readonly startLine: number;
|
|
40
|
+
readonly endLine: number;
|
|
41
|
+
readonly children: readonly MdSection[]; // nested sections
|
|
42
|
+
readonly metadata: SectionMetadata;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
interface SectionMetadata {
|
|
46
|
+
readonly wordCount: number;
|
|
47
|
+
readonly tokenCount: number;
|
|
48
|
+
readonly hasCode: boolean;
|
|
49
|
+
readonly hasList: boolean;
|
|
50
|
+
readonly hasTable: boolean;
|
|
51
|
+
}
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Link
|
|
55
|
+
|
|
56
|
+
```typescript
|
|
57
|
+
interface MdLink {
|
|
58
|
+
readonly type: "internal" | "external" | "image";
|
|
59
|
+
readonly href: string;
|
|
60
|
+
readonly text: string;
|
|
61
|
+
readonly sectionId: string; // which section contains this link
|
|
62
|
+
readonly line: number;
|
|
63
|
+
}
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Code Block
|
|
67
|
+
|
|
68
|
+
```typescript
|
|
69
|
+
interface MdCodeBlock {
|
|
70
|
+
readonly language: string | null;
|
|
71
|
+
readonly content: string;
|
|
72
|
+
readonly sectionId: string;
|
|
73
|
+
readonly startLine: number;
|
|
74
|
+
readonly endLine: number;
|
|
75
|
+
}
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Index Structure
|
|
81
|
+
|
|
82
|
+
### File Layout
|
|
83
|
+
|
|
84
|
+
```
|
|
85
|
+
.mdcontext/
|
|
86
|
+
config.json # Configuration
|
|
87
|
+
indexes/
|
|
88
|
+
documents.json # Document metadata index
|
|
89
|
+
sections.json # Section index
|
|
90
|
+
links.json # Link graph (forward + back)
|
|
91
|
+
vectors.faiss # Embedding vectors
|
|
92
|
+
vectors.meta.json # Vector ID → Section ID mapping
|
|
93
|
+
cache/
|
|
94
|
+
parsed/ # Cached parsed documents
|
|
95
|
+
<hash>.json
|
|
96
|
+
metrics/
|
|
97
|
+
queries.jsonl # Query log
|
|
98
|
+
stats.json # Aggregated stats
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Document Index
|
|
102
|
+
|
|
103
|
+
```typescript
|
|
104
|
+
// documents.json
|
|
105
|
+
interface DocumentIndex {
|
|
106
|
+
readonly version: number;
|
|
107
|
+
readonly rootPath: string;
|
|
108
|
+
readonly documents: Record<string, DocumentEntry>;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
interface DocumentEntry {
|
|
112
|
+
readonly path: string;
|
|
113
|
+
readonly title: string;
|
|
114
|
+
readonly mtime: number;
|
|
115
|
+
readonly hash: string; // content hash for change detection
|
|
116
|
+
readonly tokenCount: number;
|
|
117
|
+
readonly sectionCount: number;
|
|
118
|
+
}
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Section Index
|
|
122
|
+
|
|
123
|
+
```typescript
|
|
124
|
+
// sections.json
|
|
125
|
+
interface SectionIndex {
|
|
126
|
+
readonly version: number;
|
|
127
|
+
readonly sections: Record<string, SectionEntry>;
|
|
128
|
+
readonly byHeading: Record<string, string[]>; // heading → section IDs
|
|
129
|
+
readonly byDocument: Record<string, string[]>; // doc ID → section IDs
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
interface SectionEntry {
|
|
133
|
+
readonly documentId: string;
|
|
134
|
+
readonly heading: string;
|
|
135
|
+
readonly level: number;
|
|
136
|
+
readonly startLine: number;
|
|
137
|
+
readonly tokenCount: number;
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Link Index
|
|
142
|
+
|
|
143
|
+
```typescript
|
|
144
|
+
// links.json
|
|
145
|
+
interface LinkIndex {
|
|
146
|
+
readonly version: number;
|
|
147
|
+
readonly forward: Record<string, string[]>; // doc → docs it links to
|
|
148
|
+
readonly backward: Record<string, string[]>; // doc → docs that link to it
|
|
149
|
+
readonly broken: string[]; // links to non-existent docs
|
|
150
|
+
}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Embedding Strategy
|
|
156
|
+
|
|
157
|
+
### What to Embed
|
|
158
|
+
|
|
159
|
+
| Unit | Pros | Cons |
|
|
160
|
+
| ---------- | --------------------- | --------------------------------------- |
|
|
161
|
+
| Document | Simple, fewer vectors | Too coarse, loses section relevance |
|
|
162
|
+
| Section | Good granularity | Many vectors, section boundaries matter |
|
|
163
|
+
| Paragraph | Fine-grained | Too many vectors, context loss |
|
|
164
|
+
| **Hybrid** | Best of both | More complexity |
|
|
165
|
+
|
|
166
|
+
**Decision: Section-level embeddings with document-level fallback**
|
|
167
|
+
|
|
168
|
+
- Each section gets embedded
|
|
169
|
+
- Very short sections (< 50 tokens) merged with parent
|
|
170
|
+
- Document-level embedding as additional signal
|
|
171
|
+
|
|
172
|
+
### Embedding Content
|
|
173
|
+
|
|
174
|
+
For each section, embed:
|
|
175
|
+
|
|
176
|
+
```
|
|
177
|
+
{heading}
|
|
178
|
+
|
|
179
|
+
{plainText first 500 tokens}
|
|
180
|
+
|
|
181
|
+
Parent: {parent heading}
|
|
182
|
+
Document: {document title}
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
Including parent and document provides hierarchical context.
|
|
186
|
+
|
|
187
|
+
### Vector Dimensions
|
|
188
|
+
|
|
189
|
+
| Model | Dimensions | Notes |
|
|
190
|
+
| ----------------------------- | ---------- | ----------------------- |
|
|
191
|
+
| OpenAI text-embedding-3-small | 1536 | Good quality, cheap |
|
|
192
|
+
| OpenAI text-embedding-3-large | 3072 | Best quality, expensive |
|
|
193
|
+
| BGE-large-en-v1.5 | 1024 | Local, good quality |
|
|
194
|
+
|
|
195
|
+
**Decision: Start with text-embedding-3-small, make pluggable**
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Summarization Strategy
|
|
200
|
+
|
|
201
|
+
### Hierarchical Compression
|
|
202
|
+
|
|
203
|
+
```
|
|
204
|
+
Level 0: Raw section content
|
|
205
|
+
↓ (compress)
|
|
206
|
+
Level 1: Section summary (key sentences, ~20% of original)
|
|
207
|
+
↓ (compress)
|
|
208
|
+
Level 2: Document summary (combined section summaries)
|
|
209
|
+
↓ (compress)
|
|
210
|
+
Level 3: Collection summary (key documents, themes)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Section Summary Algorithm
|
|
214
|
+
|
|
215
|
+
1. **Extract key sentences** — First sentence, sentences with keywords, concluding sentence
|
|
216
|
+
2. **Preserve structure markers** — Keep heading, list item starts, code block presence
|
|
217
|
+
3. **Token budget** — Target 20% of original, min 50 tokens, max 500 tokens
|
|
218
|
+
|
|
219
|
+
### Document Summary Template
|
|
220
|
+
|
|
221
|
+
```markdown
|
|
222
|
+
# {title}
|
|
223
|
+
|
|
224
|
+
## Overview
|
|
225
|
+
|
|
226
|
+
{first paragraph or extracted thesis}
|
|
227
|
+
|
|
228
|
+
## Sections
|
|
229
|
+
|
|
230
|
+
- **{heading 1}**: {one-line summary}
|
|
231
|
+
- **{heading 2}**: {one-line summary}
|
|
232
|
+
...
|
|
233
|
+
|
|
234
|
+
## Key Points
|
|
235
|
+
|
|
236
|
+
- {extracted key point 1}
|
|
237
|
+
- {extracted key point 2}
|
|
238
|
+
|
|
239
|
+
## Links
|
|
240
|
+
|
|
241
|
+
- References: {count} internal, {count} external
|
|
242
|
+
- Referenced by: {backlink count} documents
|
|
243
|
+
|
|
244
|
+
**Tokens:** {original} → {summary} ({percent}% reduction)
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
---
|
|
248
|
+
|
|
249
|
+
## Analytics Design
|
|
250
|
+
|
|
251
|
+
### Metrics Categories
|
|
252
|
+
|
|
253
|
+
#### Performance Metrics
|
|
254
|
+
|
|
255
|
+
| Metric | Type | Labels | Description |
|
|
256
|
+
| ----------------------------------- | --------- | ------- | ------------------------- |
|
|
257
|
+
| `mdcontext_parse_duration_ms` | Histogram | - | Time to parse a document |
|
|
258
|
+
| `mdcontext_index_build_duration_ms` | Histogram | `type` | Time to build index |
|
|
259
|
+
| `mdcontext_query_duration_ms` | Histogram | `type` | Query execution time |
|
|
260
|
+
| `mdcontext_embed_duration_ms` | Histogram | `model` | Embedding generation time |
|
|
261
|
+
| `mdcontext_cache_hit_total` | Counter | `cache` | Cache hits |
|
|
262
|
+
| `mdcontext_cache_miss_total` | Counter | `cache` | Cache misses |
|
|
263
|
+
|
|
264
|
+
#### Usage Metrics
|
|
265
|
+
|
|
266
|
+
| Metric | Type | Labels | Description |
|
|
267
|
+
| ------------------------------- | ------- | ------ | ------------------------ |
|
|
268
|
+
| `mdcontext_queries_total` | Counter | `type` | Total queries |
|
|
269
|
+
| `mdcontext_tokens_input_total` | Counter | - | Tokens sent to embedding |
|
|
270
|
+
| `mdcontext_tokens_output_total` | Counter | - | Tokens in responses |
|
|
271
|
+
| `mdcontext_documents_indexed` | Gauge | - | Documents in index |
|
|
272
|
+
| `mdcontext_sections_indexed` | Gauge | - | Sections in index |
|
|
273
|
+
|
|
274
|
+
#### Quality Metrics
|
|
275
|
+
|
|
276
|
+
| Metric | Type | Labels | Description |
|
|
277
|
+
| ----------------------------------- | --------- | ------- | ------------------------ |
|
|
278
|
+
| `mdcontext_search_results_returned` | Histogram | - | Results per query |
|
|
279
|
+
| `mdcontext_compression_ratio` | Histogram | `level` | Token reduction achieved |
|
|
280
|
+
|
|
281
|
+
### Query Logging
|
|
282
|
+
|
|
283
|
+
```typescript
|
|
284
|
+
interface QueryLogEntry {
|
|
285
|
+
readonly timestamp: Date;
|
|
286
|
+
readonly type: "search" | "context" | "structure";
|
|
287
|
+
readonly query: string;
|
|
288
|
+
readonly filters: Record<string, unknown>;
|
|
289
|
+
readonly resultCount: number;
|
|
290
|
+
readonly durationMs: number;
|
|
291
|
+
readonly tokensUsed: number;
|
|
292
|
+
readonly cacheHit: boolean;
|
|
293
|
+
}
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
Stored as JSONL for easy streaming analysis.
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
## API Design
|
|
301
|
+
|
|
302
|
+
### Core Functions
|
|
303
|
+
|
|
304
|
+
```typescript
|
|
305
|
+
// Parsing
|
|
306
|
+
parse(content: string): Effect<MdDocument, ParseError>
|
|
307
|
+
parseFile(path: string): Effect<MdDocument, ParseError | IoError>
|
|
308
|
+
|
|
309
|
+
// Indexing
|
|
310
|
+
index(dir: string, options?: IndexOptions): Effect<IndexResult, IndexError>
|
|
311
|
+
reindex(paths: string[]): Effect<IndexResult, IndexError>
|
|
312
|
+
|
|
313
|
+
// Search
|
|
314
|
+
search(query: string, options?: SearchOptions): Effect<SearchResult[], SearchError>
|
|
315
|
+
structuralSearch(pattern: StructuralPattern): Effect<StructuralResult[], SearchError>
|
|
316
|
+
|
|
317
|
+
// Context
|
|
318
|
+
getContext(path: string, options?: ContextOptions): Effect<Context, ContextError>
|
|
319
|
+
assembleContext(sources: ContextSource[], budget: number): Effect<AssembledContext, ContextError>
|
|
320
|
+
|
|
321
|
+
// Summarization
|
|
322
|
+
summarize(doc: MdDocument, level: SummaryLevel): Effect<Summary, SummarizeError>
|
|
323
|
+
|
|
324
|
+
// Metrics
|
|
325
|
+
getMetrics(): Effect<Metrics, never>
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
### Options Types
|
|
329
|
+
|
|
330
|
+
```typescript
|
|
331
|
+
interface IndexOptions {
|
|
332
|
+
readonly include?: string[]; // Glob patterns
|
|
333
|
+
readonly exclude?: string[]; // Glob patterns
|
|
334
|
+
readonly embeddings?: boolean; // Generate embeddings
|
|
335
|
+
readonly force?: boolean; // Rebuild even if cached
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
interface SearchOptions {
|
|
339
|
+
readonly limit?: number; // Max results (default 10)
|
|
340
|
+
readonly threshold?: number; // Min similarity (default 0.7)
|
|
341
|
+
readonly filter?: SearchFilter; // Structural filters
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
interface SearchFilter {
|
|
345
|
+
readonly paths?: string[]; // Limit to these paths
|
|
346
|
+
readonly headingLevel?: number[]; // Only these heading levels
|
|
347
|
+
readonly hasCode?: boolean; // Sections with code
|
|
348
|
+
readonly minTokens?: number; // Minimum section size
|
|
349
|
+
readonly maxTokens?: number; // Maximum section size
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
interface ContextOptions {
|
|
353
|
+
readonly level?: SummaryLevel; // 'full' | 'summary' | 'brief'
|
|
354
|
+
readonly maxTokens?: number; // Token budget
|
|
355
|
+
readonly sections?: string[]; // Specific sections
|
|
356
|
+
}
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
---
|
|
360
|
+
|
|
361
|
+
## CLI Design
|
|
362
|
+
|
|
363
|
+
```bash
|
|
364
|
+
# Indexing
|
|
365
|
+
mdcontext index [dir] # Index directory (default: .)
|
|
366
|
+
mdcontext index --watch # Index and watch for changes
|
|
367
|
+
mdcontext index --force # Force full rebuild
|
|
368
|
+
|
|
369
|
+
# Search
|
|
370
|
+
mdcontext search "query" # Semantic search
|
|
371
|
+
mdcontext search "query" --limit 5 # Limit results
|
|
372
|
+
mdcontext search "query" --json # JSON output
|
|
373
|
+
|
|
374
|
+
# Context
|
|
375
|
+
mdcontext context <path> # Full document context
|
|
376
|
+
mdcontext context <path> --brief # Brief summary
|
|
377
|
+
mdcontext context <path> --tokens 500 # Token budget
|
|
378
|
+
|
|
379
|
+
# Structure
|
|
380
|
+
mdcontext structure <path> # Show document structure
|
|
381
|
+
mdcontext structure <path> --tree # Tree view
|
|
382
|
+
mdcontext links <path> # Show link graph
|
|
383
|
+
mdcontext backlinks <path> # What links to this?
|
|
384
|
+
|
|
385
|
+
# Metrics
|
|
386
|
+
mdcontext metrics # Show current metrics
|
|
387
|
+
mdcontext metrics --json # JSON format
|
|
388
|
+
mdcontext metrics --reset # Reset counters
|
|
389
|
+
|
|
390
|
+
# Daemon
|
|
391
|
+
mdcontext daemon # Run as daemon
|
|
392
|
+
mdcontext daemon --port 8765 # Custom port
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
---
|
|
396
|
+
|
|
397
|
+
## MCP Tools
|
|
398
|
+
|
|
399
|
+
```typescript
|
|
400
|
+
const tools = [
|
|
401
|
+
{
|
|
402
|
+
name: "md_search",
|
|
403
|
+
description: "Search markdown documents by meaning",
|
|
404
|
+
parameters: {
|
|
405
|
+
query: { type: "string", required: true },
|
|
406
|
+
limit: { type: "number", default: 5 },
|
|
407
|
+
path_filter: { type: "string", description: "Glob pattern" },
|
|
408
|
+
},
|
|
409
|
+
},
|
|
410
|
+
{
|
|
411
|
+
name: "md_context",
|
|
412
|
+
description: "Get LLM-ready context from a markdown file",
|
|
413
|
+
parameters: {
|
|
414
|
+
path: { type: "string", required: true },
|
|
415
|
+
level: { type: "string", enum: ["full", "summary", "brief"] },
|
|
416
|
+
max_tokens: { type: "number" },
|
|
417
|
+
},
|
|
418
|
+
},
|
|
419
|
+
{
|
|
420
|
+
name: "md_structure",
|
|
421
|
+
description: "Get the structure/outline of a markdown file",
|
|
422
|
+
parameters: {
|
|
423
|
+
path: { type: "string", required: true },
|
|
424
|
+
},
|
|
425
|
+
},
|
|
426
|
+
{
|
|
427
|
+
name: "md_links",
|
|
428
|
+
description: "Get links to/from a markdown file",
|
|
429
|
+
parameters: {
|
|
430
|
+
path: { type: "string", required: true },
|
|
431
|
+
direction: { type: "string", enum: ["outgoing", "incoming", "both"] },
|
|
432
|
+
},
|
|
433
|
+
},
|
|
434
|
+
];
|
|
435
|
+
```
|
|
436
|
+
|
|
437
|
+
---
|
|
438
|
+
|
|
439
|
+
_Created: 2025-01-18_
|