mdcontext 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.github/workflows/ci.yml +83 -0
  4. package/.github/workflows/release.yml +113 -0
  5. package/.tldrignore +112 -0
  6. package/AGENTS.md +46 -0
  7. package/BACKLOG.md +338 -0
  8. package/README.md +231 -11
  9. package/biome.json +36 -0
  10. package/cspell.config.yaml +14 -0
  11. package/dist/chunk-KRYIFLQR.js +92 -0
  12. package/dist/chunk-S7E6TFX6.js +742 -0
  13. package/dist/chunk-VVTGZNBT.js +1519 -0
  14. package/dist/cli/main.d.ts +1 -0
  15. package/dist/cli/main.js +2015 -0
  16. package/dist/index.d.ts +266 -0
  17. package/dist/index.js +86 -0
  18. package/dist/mcp/server.d.ts +1 -0
  19. package/dist/mcp/server.js +376 -0
  20. package/docs/019-USAGE.md +586 -0
  21. package/docs/020-current-implementation.md +364 -0
  22. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  23. package/docs/BACKLOG.md +80 -0
  24. package/docs/DESIGN.md +439 -0
  25. package/docs/PROJECT.md +88 -0
  26. package/docs/ROADMAP.md +407 -0
  27. package/docs/test-links.md +9 -0
  28. package/package.json +69 -10
  29. package/pnpm-workspace.yaml +5 -0
  30. package/research/config-analysis/01-current-implementation.md +470 -0
  31. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  32. package/research/config-analysis/03-task-candidates.md +715 -0
  33. package/research/config-analysis/033-research-configuration-management.md +828 -0
  34. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  35. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  36. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  37. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  38. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  39. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  40. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  41. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  42. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  43. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  44. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  45. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  46. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  47. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  48. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  49. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  50. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  51. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  52. package/research/effect-cli-error-handling.md +845 -0
  53. package/research/effect-errors-as-values.md +943 -0
  54. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  55. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  56. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  57. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  58. package/research/mdcontext-error-analysis.md +521 -0
  59. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  60. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  61. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  62. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  63. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  64. package/research/semantic-search/002-research-embedding-models.md +490 -0
  65. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  66. package/research/semantic-search/004-research-vector-search.md +841 -0
  67. package/research/semantic-search/032-research-semantic-search.md +427 -0
  68. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  69. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  70. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  71. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  72. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  73. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  74. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  75. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  76. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  77. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  78. package/scripts/rebuild-hnswlib.js +63 -0
  79. package/src/cli/argv-preprocessor.test.ts +210 -0
  80. package/src/cli/argv-preprocessor.ts +202 -0
  81. package/src/cli/cli.test.ts +430 -0
  82. package/src/cli/commands/backlinks.ts +54 -0
  83. package/src/cli/commands/context.ts +197 -0
  84. package/src/cli/commands/index-cmd.ts +300 -0
  85. package/src/cli/commands/index.ts +13 -0
  86. package/src/cli/commands/links.ts +52 -0
  87. package/src/cli/commands/search.ts +451 -0
  88. package/src/cli/commands/stats.ts +146 -0
  89. package/src/cli/commands/tree.ts +107 -0
  90. package/src/cli/flag-schemas.ts +275 -0
  91. package/src/cli/help.ts +386 -0
  92. package/src/cli/index.ts +9 -0
  93. package/src/cli/main.ts +145 -0
  94. package/src/cli/options.ts +31 -0
  95. package/src/cli/typo-suggester.test.ts +105 -0
  96. package/src/cli/typo-suggester.ts +130 -0
  97. package/src/cli/utils.ts +126 -0
  98. package/src/core/index.ts +1 -0
  99. package/src/core/types.ts +140 -0
  100. package/src/embeddings/index.ts +8 -0
  101. package/src/embeddings/openai-provider.ts +165 -0
  102. package/src/embeddings/semantic-search.ts +583 -0
  103. package/src/embeddings/types.ts +82 -0
  104. package/src/embeddings/vector-store.ts +299 -0
  105. package/src/index/index.ts +4 -0
  106. package/src/index/indexer.ts +446 -0
  107. package/src/index/storage.ts +196 -0
  108. package/src/index/types.ts +109 -0
  109. package/src/index/watcher.ts +131 -0
  110. package/src/index.ts +8 -0
  111. package/src/mcp/server.ts +483 -0
  112. package/src/parser/index.ts +1 -0
  113. package/src/parser/parser.test.ts +291 -0
  114. package/src/parser/parser.ts +395 -0
  115. package/src/parser/section-filter.ts +270 -0
  116. package/src/search/query-parser.test.ts +260 -0
  117. package/src/search/query-parser.ts +319 -0
  118. package/src/search/searcher.test.ts +182 -0
  119. package/src/search/searcher.ts +602 -0
  120. package/src/summarize/budget-bugs.test.ts +620 -0
  121. package/src/summarize/formatters.ts +419 -0
  122. package/src/summarize/index.ts +20 -0
  123. package/src/summarize/summarizer.test.ts +275 -0
  124. package/src/summarize/summarizer.ts +528 -0
  125. package/src/summarize/verify-bugs.test.ts +238 -0
  126. package/src/utils/index.ts +1 -0
  127. package/src/utils/tokens.test.ts +142 -0
  128. package/src/utils/tokens.ts +186 -0
  129. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  130. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  131. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  132. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +233 -0
  133. package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
  134. package/tests/fixtures/cli/.mdcontext/vectors.meta.json +1264 -0
  135. package/tests/fixtures/cli/README.md +9 -0
  136. package/tests/fixtures/cli/api-reference.md +11 -0
  137. package/tests/fixtures/cli/getting-started.md +11 -0
  138. package/tsconfig.json +26 -0
  139. package/vitest.config.ts +21 -0
  140. package/vitest.setup.ts +12 -0
package/BACKLOG.md ADDED
@@ -0,0 +1,338 @@
1
+ # mdcontext Improvement Backlog
2
+
3
+ > Generated from validation experiment with 11 AI agents across 3 strategies.
4
+ > See `/reports/FINAL-SYNTHESIS.md` for full analysis.
5
+
6
+ ---
7
+
8
+ ## P0 - Critical (Blocking Agent Workflows)
9
+
10
+ ### 1. Boolean Query Operators
11
+
12
+ **Problem:** Agents couldn't search for multi-term concepts like "architecture AND criticism" or "checkpoint NOT example". Phrases returned 0 results even when concepts were present separately.
13
+
14
+ **Impact:** 3-5x more commands needed for workaround (multiple single-term searches + manual correlation). Strategy B agents rated tool lower (4.2/5) partly due to this.
15
+
16
+ **Solution:** Add boolean operators to search command.
17
+
18
+ **Acceptance Criteria:**
19
+ - [ ] `mdcontext search "auth AND criticism"` returns results containing both terms
20
+ - [ ] `mdcontext search "checkpoint OR gate"` returns results containing either term
21
+ - [ ] `mdcontext search "implementation NOT example"` excludes results with "example"
22
+ - [ ] `mdcontext search --help` documents boolean syntax
23
+
24
+ **Effort:** Medium
25
+
26
+ **Sources:** B1, B2, B3, C3, C4, FINAL-SYNTHESIS
27
+
28
+ ---
29
+
30
+ ### 2. Graceful Embeddings Fallback
31
+
32
+ **Problem:** Semantic searches returned 0 results without clear indication that embeddings weren't built. Agent A2 was confused by silent failures.
33
+
34
+ **Impact:** Inconsistent behavior, wasted agent turns, reduced confidence. A2 rating would be 5/5 with better UX.
35
+
36
+ **Solution:** Auto-detect embeddings state and provide clear feedback.
37
+
38
+ **Acceptance Criteria:**
39
+ - [ ] When embeddings don't exist, search shows: "Semantic search unavailable. Using structural search. Run `mdcontext index --embed` for semantic search."
40
+ - [ ] OR: Auto-prompt on first search: "Enable semantic search? (requires ~30s indexing)"
41
+ - [ ] Search output shows mode indicator: `[semantic]` or `[structural]`
42
+ - [ ] `mdcontext stats` shows embeddings status: "Embeddings: Yes/No (run index --embed to enable)"
43
+
44
+ **Effort:** Low-Medium
45
+
46
+ **Sources:** A2, FINAL-SYNTHESIS
47
+
48
+ ---
49
+
50
+ ### 3. Section-Level Context Extraction
51
+
52
+ **Problem:** Agents couldn't request context for a specific section without retrieving the entire file. When investigating a specific subsection, full-file context wastes tokens.
53
+
54
+ **Impact:** Over-retrieval or multiple refined searches needed. Forces choosing between full context or aggressive summarization.
55
+
56
+ **Solution:** Enable section-targeted context extraction.
57
+
58
+ **Acceptance Criteria:**
59
+ - [ ] `mdcontext context file.md --section "Memory Model"` returns only that section
60
+ - [ ] `mdcontext context file.md:5.3` returns section 5.3 (by number)
61
+ - [ ] `mdcontext context file.md --section "Memory*"` supports glob patterns
62
+ - [ ] Nested sections included by default, `--shallow` flag for top-level only
63
+
64
+ **Effort:** Medium-High
65
+
66
+ **Sources:** A1, A2, C2, C4, FINAL-SYNTHESIS
67
+
68
+ ---
69
+
70
+ ## P1 - High Priority (Significant UX Improvement)
71
+
72
+ ### 4. Search Result Context Lines
73
+
74
+ **Problem:** Search results show line numbers but minimal surrounding context. Hard to evaluate relevance without reading full sections.
75
+
76
+ **Impact:** Extra commands needed to fetch context around matches. Agents requested grep-like `-C` behavior.
77
+
78
+ **Solution:** Add context lines around search matches.
79
+
80
+ **Acceptance Criteria:**
81
+ - [ ] `mdcontext search "checkpoint" -C 3` shows 3 lines before/after each match
82
+ - [ ] `mdcontext search "checkpoint" -B 2 -A 5` shows 2 before, 5 after
83
+ - [ ] Context lines clearly delineated from match lines
84
+ - [ ] Works with both structural and semantic search
85
+
86
+ **Effort:** Low-Medium
87
+
88
+ **Sources:** B2, C3, C5, FINAL-SYNTHESIS
89
+
90
+ ---
91
+
92
+ ### 5. Remove 10-Result Limit / Add Pagination
93
+
94
+ **Problem:** Default 10 results per query makes it hard to see ALL occurrences of a theme. Agents needed multiple queries to ensure comprehensive coverage.
95
+
96
+ **Impact:** Incomplete results for common terms, extra commands for pagination workarounds.
97
+
98
+ **Solution:** Add flags for result limit control.
99
+
100
+ **Acceptance Criteria:**
101
+ - [ ] `mdcontext search "workflow" --all` shows all matches (no limit)
102
+ - [ ] `mdcontext search "workflow" -n 50` shows up to 50 results
103
+ - [ ] `mdcontext search "workflow" --offset 10 -n 10` for pagination
104
+ - [ ] Default remains 10 for quick searches
105
+
106
+ **Effort:** Low
107
+
108
+ **Sources:** B1, B3, C1, FINAL-SYNTHESIS
109
+
110
+ ---
111
+
112
+ ### 6. Truncation UX Improvement
113
+
114
+ **Problem:** When output truncated, agents couldn't selectively access missing sections. Truncation note appeared at end, not clearly signaled upfront.
115
+
116
+ **Impact:** Forces over-retrieval or multiple refined searches. 50-96% reduction sometimes excessive.
117
+
118
+ **Solution:** Better truncation signaling and navigation.
119
+
120
+ **Acceptance Criteria:**
121
+ - [ ] Truncation warning appears at TOP of output: "Output truncated (showing 2000/8500 tokens). Use --full or --section to retrieve more."
122
+ - [ ] `mdcontext context file.md --full` shows complete content (no truncation)
123
+ - [ ] Truncated output shows which sections were included/excluded
124
+ - [ ] `mdcontext context file.md --sections` lists available sections for targeted retrieval
125
+
126
+ **Effort:** Medium
127
+
128
+ **Sources:** A2, B2, C2, FINAL-SYNTHESIS
129
+
130
+ ---
131
+
132
+ ### 7. Phrase Search with Quotes
133
+
134
+ **Problem:** No way to search for exact phrases. "context resumption" as two words found irrelevant matches.
135
+
136
+ **Impact:** Reduced precision for multi-word concepts.
137
+
138
+ **Solution:** Support quoted phrase search.
139
+
140
+ **Acceptance Criteria:**
141
+ - [ ] `mdcontext search '"context resumption"'` matches exact phrase only
142
+ - [ ] `mdcontext search '"drift-free"'` matches hyphenated terms
143
+ - [ ] Can combine with boolean: `mdcontext search '"context resumption" AND drift`
144
+ - [ ] `mdcontext search --help` documents phrase syntax
145
+
146
+ **Effort:** Medium
147
+
148
+ **Sources:** B1, B2, FINAL-SYNTHESIS
149
+
150
+ ---
151
+
152
+ ## P2 - Medium Priority (Nice to Have)
153
+
154
+ ### 8. Cross-File Link Analysis
155
+
156
+ **Problem:** No command to find which files reference which concepts. Backlinks/links commands showed 0 for most queries.
157
+
158
+ **Impact:** Manual grep needed to understand document relationships. Particularly wanted by Strategy C agents for architectural investigation.
159
+
160
+ **Solution:** Implement concept-based cross-file analysis.
161
+
162
+ **Acceptance Criteria:**
163
+ - [ ] `mdcontext refs "Execution Context"` shows all files mentioning this concept
164
+ - [ ] `mdcontext refs --graph` outputs dependency graph (mermaid/dot format)
165
+ - [ ] `mdcontext backlinks file.md` shows files that link TO this file
166
+ - [ ] Works with markdown links `[text](file.md)` and concept mentions
167
+
168
+ **Effort:** High
169
+
170
+ **Sources:** C1, C6, FINAL-SYNTHESIS
171
+
172
+ ---
173
+
174
+ ### 9. Neighborhood View Around Search Results
175
+
176
+ **Problem:** Search results show isolated matches. Agents wanted to see adjacent sections for context without fetching entire file.
177
+
178
+ **Impact:** Extra context commands needed to understand match surroundings.
179
+
180
+ **Solution:** Add section-level context around search results.
181
+
182
+ **Acceptance Criteria:**
183
+ - [ ] `mdcontext search "checkpoint" --context-sections 1` shows 1 section before/after each match
184
+ - [ ] Section context clearly labeled with headers
185
+ - [ ] Works with structural and semantic search
186
+
187
+ **Effort:** Medium
188
+
189
+ **Sources:** C2, C3, FINAL-SYNTHESIS
190
+
191
+ ---
192
+
193
+ ### 10. Search Mode Indicator
194
+
195
+ **Problem:** Agents couldn't tell if search was using semantic or structural mode. Behavior varied based on embeddings presence.
196
+
197
+ **Impact:** Confusion about why some searches worked differently than others.
198
+
199
+ **Solution:** Always show search mode in output.
200
+
201
+ **Acceptance Criteria:**
202
+ - [ ] Search results header shows: `[semantic search]` or `[structural search]`
203
+ - [ ] If semantic attempted but embeddings missing, show: `[structural search - embeddings not found]`
204
+ - [ ] `mdcontext search --mode` flag to force mode: `--mode semantic` or `--mode structural`
205
+
206
+ **Effort:** Low
207
+
208
+ **Sources:** A2, FINAL-SYNTHESIS
209
+
210
+ ---
211
+
212
+ ### 11. Query Syntax Help
213
+
214
+ **Problem:** Agents had to discover search syntax through trial and error. No examples in help.
215
+
216
+ **Impact:** Wasted turns on failed queries, inconsistent usage patterns.
217
+
218
+ **Solution:** Improve search help with examples.
219
+
220
+ **Acceptance Criteria:**
221
+ - [ ] `mdcontext search --help` includes example section:
222
+ ```
223
+ EXAMPLES:
224
+ mdcontext search "auth" # Single term (structural)
225
+ mdcontext search "how to deploy" # Semantic (if embeddings exist)
226
+ mdcontext search "auth AND deploy" # Boolean AND
227
+ mdcontext search '"context resumption"' # Exact phrase
228
+ mdcontext search "impl NOT test" -C 3 # Exclude term, show context
229
+ ```
230
+ - [ ] `mdcontext search --examples` shows extended examples with explanations
231
+
232
+ **Effort:** Low
233
+
234
+ **Sources:** B2, B3, FINAL-SYNTHESIS
235
+
236
+ ---
237
+
238
+ ## P3 - Low Priority (Future Enhancements)
239
+
240
+ ### 12. Multi-File Glob Context
241
+
242
+ **Problem:** Agents had to run context command on each file separately for batch operations.
243
+
244
+ **Impact:** More commands needed for comprehensive extraction.
245
+
246
+ **Solution:** Support glob patterns in context command.
247
+
248
+ **Acceptance Criteria:**
249
+ - [ ] `mdcontext context "docs/**/*.md"` extracts context from all matching files
250
+ - [ ] `mdcontext context "docs/*.md" -t 5000` applies token budget across all files
251
+ - [ ] Output clearly shows which content came from which file
252
+
253
+ **Effort:** Medium
254
+
255
+ **Sources:** FINAL-SYNTHESIS
256
+
257
+ ---
258
+
259
+ ### 13. Saved Queries / Aliases
260
+
261
+ **Problem:** Agents repeatedly ran similar complex queries.
262
+
263
+ **Impact:** Command duplication, typo risk.
264
+
265
+ **Solution:** Allow saving common query patterns.
266
+
267
+ **Acceptance Criteria:**
268
+ - [ ] `mdcontext alias add arch-issues "search 'architecture AND (problem OR issue OR concern)'"`
269
+ - [ ] `mdcontext arch-issues` runs saved query
270
+ - [ ] Aliases stored in `.mdcontext/aliases.json`
271
+
272
+ **Effort:** Medium
273
+
274
+ **Sources:** FINAL-SYNTHESIS
275
+
276
+ ---
277
+
278
+ ### 14. Relevance Score Display
279
+
280
+ **Problem:** Search results showed matches but not how relevant each match was.
281
+
282
+ **Impact:** Harder to prioritize which results to investigate first.
283
+
284
+ **Solution:** Show relevance/similarity scores.
285
+
286
+ **Acceptance Criteria:**
287
+ - [ ] Semantic search shows similarity score: `[0.87] docs/ARCH.md:45 - Control Plane...`
288
+ - [ ] Results sorted by relevance by default
289
+ - [ ] `--sort recent` flag to sort by file modification time instead
290
+
291
+ **Effort:** Low-Medium
292
+
293
+ **Sources:** C1, FINAL-SYNTHESIS
294
+
295
+ ---
296
+
297
+ ### 15. Troubleshooting Guide
298
+
299
+ **Problem:** Agents encountered issues (0 results, truncation, embeddings) without clear resolution paths.
300
+
301
+ **Impact:** Wasted turns debugging tool behavior.
302
+
303
+ **Solution:** Add troubleshooting documentation.
304
+
305
+ **Acceptance Criteria:**
306
+ - [ ] `mdcontext troubleshoot` command shows common issues and fixes
307
+ - [ ] Covers: 0 results, embeddings setup, truncation, index staleness
308
+ - [ ] OR: Add troubleshooting section to README
309
+
310
+ **Effort:** Low
311
+
312
+ **Sources:** FINAL-SYNTHESIS
313
+
314
+ ---
315
+
316
+ ## Summary
317
+
318
+ | Priority | Count | Theme |
319
+ |----------|-------|-------|
320
+ | P0 Critical | 3 | Boolean search, embeddings UX, section extraction |
321
+ | P1 High | 4 | Context lines, pagination, truncation, phrases |
322
+ | P2 Medium | 4 | Cross-file analysis, neighborhood view, mode indicator, help |
323
+ | P3 Low | 4 | Glob context, aliases, relevance scores, docs |
324
+
325
+ **Total: 15 actionable tasks**
326
+
327
+ ---
328
+
329
+ ## Validation Sources
330
+
331
+ All tasks derived from agent feedback across three strategies:
332
+
333
+ - **Strategy A:** A1, A2, A3, A-Synth (4 agents, by-folder approach)
334
+ - **Strategy B:** B1, B2, B3, B-Synth (4 agents, by-question approach)
335
+ - **Strategy C:** C1, C2-C6, C-Synth (7 agents, two-phase approach)
336
+ - **Final Synthesis:** Cross-strategy analysis
337
+
338
+ Full reports: `/Users/alphab/Dev/LLM/DEV/TMP/memory/reports/`
package/README.md CHANGED
@@ -1,19 +1,239 @@
1
1
  # mdcontext
2
2
 
3
- Intelligent markdown context extraction for LLMs.
3
+ **Give LLMs exactly the markdown they need. Nothing more.**
4
4
 
5
- **Coming soon.** This package is a placeholder for the upcoming release.
5
+ ```bash
6
+ QUICK REFERENCE
7
+ mdcontext index [path] Index markdown files (add --embed for semantic search)
8
+ mdcontext search <query> [path] Search by meaning or structure
9
+ mdcontext context <files...> Get LLM-ready summary
10
+ mdcontext tree [path|file] Show files or document outline
11
+ mdcontext links <file> Outgoing links
12
+ mdcontext backlinks <file> Incoming links
13
+ mdcontext stats [path] Index statistics
14
+ ```
6
15
 
7
- ## What is mdcontext?
16
+ ---
8
17
 
9
- mdcontext extracts structured, token-optimized context from markdown documentation for use with LLMs. Features include:
18
+ ## Why?
10
19
 
11
- - Semantic search with embeddings
12
- - Smart section extraction
13
- - Token counting and optimization
14
- - Context-aware chunking
20
+ Your documentation is 50K tokens of markdown. LLM context windows are limited. Raw markdown dumps waste tokens on structure, headers, and noise.
15
21
 
16
- ## Links
22
+ mdcontext extracts *structure* instead of dumping *text*. The result: **80%+ fewer tokens** while preserving everything needed to understand your docs.
17
23
 
18
- - Website: https://mdcontext.com
19
- - GitHub: https://github.com/mdcontext/mdcontext
24
+ ```bash
25
+ npm install -g mdcontext
26
+ mdcontext index . # Index your docs
27
+ mdcontext search "authentication" # Find by meaning
28
+ mdcontext context README.md # Get LLM-ready summary
29
+ ```
30
+
31
+ ---
32
+
33
+ ## Installation
34
+
35
+ ```bash
36
+ npm install -g mdcontext
37
+ ```
38
+
39
+ Requires Node.js 18+. Semantic search requires `OPENAI_API_KEY`.
40
+
41
+ ---
42
+
43
+ ## Commands
44
+
45
+ ### index
46
+
47
+ Index markdown files. Run this first.
48
+
49
+ ```bash
50
+ mdcontext index # Index current directory (prompts for semantic)
51
+ mdcontext index ./docs # Index specific path
52
+ mdcontext index --embed # Also build embeddings for semantic search
53
+ mdcontext index --no-embed # Skip the semantic search prompt
54
+ mdcontext index --watch # Watch for changes
55
+ mdcontext index --force # Force full rebuild
56
+ ```
57
+
58
+ ### search
59
+
60
+ Search by meaning (semantic) or keyword (text match).
61
+
62
+ ```bash
63
+ mdcontext search "how to authenticate" # Semantic search (if embeddings exist)
64
+ mdcontext search -k "auth.*flow" # Keyword search (text match)
65
+ mdcontext search -n 5 "setup" # Limit to 5 results
66
+ mdcontext search --threshold 0.8 "deploy" # Higher similarity threshold
67
+ ```
68
+
69
+ #### Context Lines
70
+
71
+ Show surrounding lines around matches (like grep):
72
+
73
+ ```bash
74
+ mdcontext search "checkpoint" -C 3 # 3 lines before AND after each match
75
+ mdcontext search "error" -B 2 -A 5 # 2 lines before, 5 lines after
76
+ ```
77
+
78
+ Auto-detection: Uses semantic search if embeddings exist and query looks like natural language. Use `-k` to force keyword search.
79
+
80
+ ### context
81
+
82
+ Get LLM-ready summaries from one or more files.
83
+
84
+ ```bash
85
+ mdcontext context README.md # Single file
86
+ mdcontext context README.md docs/api.md # Multiple files
87
+ mdcontext context docs/*.md # Glob patterns work
88
+ mdcontext context -t 500 README.md # Token budget
89
+ mdcontext context --brief README.md # Minimal output
90
+ mdcontext context --full README.md # Include full content
91
+ ```
92
+
93
+ #### Section Filtering
94
+
95
+ Extract specific sections instead of entire files:
96
+
97
+ ```bash
98
+ mdcontext context doc.md --sections # List available sections
99
+ mdcontext context doc.md --section "Setup" # Extract by section name
100
+ mdcontext context doc.md --section "2.1" # Extract by section number
101
+ mdcontext context doc.md --section "API*" # Glob pattern matching
102
+ mdcontext context doc.md --section "Config" --shallow # Top-level only (no nested subsections)
103
+ ```
104
+
105
+ The `--sections` flag shows all sections with their numbers and token counts, helping you target exactly what you need.
106
+
107
+ ### tree
108
+
109
+ Show file structure or document outline.
110
+
111
+ ```bash
112
+ mdcontext tree # List markdown files in current directory
113
+ mdcontext tree ./docs # List files in specific directory
114
+ mdcontext tree README.md # Show document outline (heading hierarchy)
115
+ ```
116
+
117
+ Auto-detection: Directory shows file list, file shows document outline.
118
+
119
+ ### links / backlinks
120
+
121
+ Analyze link relationships.
122
+
123
+ ```bash
124
+ mdcontext links README.md # What does this file link to?
125
+ mdcontext backlinks docs/api.md # What files link to this?
126
+ ```
127
+
128
+ ### stats
129
+
130
+ Show index statistics.
131
+
132
+ ```bash
133
+ mdcontext stats # Current directory
134
+ mdcontext stats ./docs # Specific path
135
+ ```
136
+
137
+ ---
138
+
139
+ ## Workflows
140
+
141
+ ### Before Adding Context to LLM
142
+
143
+ ```bash
144
+ mdcontext tree docs/ # See what's available
145
+ mdcontext tree docs/api.md # Check document structure
146
+ mdcontext context -t 500 docs/api.md # Get summary within token budget
147
+ ```
148
+
149
+ ### Finding Documentation
150
+
151
+ ```bash
152
+ mdcontext search "authentication" # By meaning
153
+ mdcontext search -k "Setup|Install" # By keyword pattern
154
+ ```
155
+
156
+ ### Setting Up Semantic Search
157
+
158
+ ```bash
159
+ export OPENAI_API_KEY=sk-...
160
+ mdcontext index --embed # Build embeddings
161
+ mdcontext search "how to deploy" # Now works semantically
162
+ ```
163
+
164
+ ---
165
+
166
+ ## MCP Integration
167
+
168
+ For Claude Desktop, add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
169
+
170
+ ```json
171
+ {
172
+ "mcpServers": {
173
+ "mdcontext": {
174
+ "command": "mdcontext-mcp",
175
+ "args": []
176
+ }
177
+ }
178
+ }
179
+ ```
180
+
181
+ For Claude Code, add to `.claude/settings.json`:
182
+
183
+ ```json
184
+ {
185
+ "mcpServers": {
186
+ "mdcontext": {
187
+ "command": "mdcontext-mcp",
188
+ "args": []
189
+ }
190
+ }
191
+ }
192
+ ```
193
+
194
+ ### MCP Tools
195
+
196
+ | Tool | Description |
197
+ |------|-------------|
198
+ | `md_search` | Semantic search across indexed docs |
199
+ | `md_context` | Get LLM-ready summary for a file |
200
+ | `md_structure` | Get document outline |
201
+
202
+ ---
203
+
204
+ ## Configuration
205
+
206
+ ### Index Location
207
+
208
+ Indexes are stored in `.mdcontext/` in your project root:
209
+
210
+ ```
211
+ .mdcontext/
212
+ indexes/
213
+ documents.json # Document metadata
214
+ sections.json # Section index
215
+ links.json # Link graph
216
+ vectors.bin # Embeddings (if enabled)
217
+ ```
218
+
219
+ ### Environment Variables
220
+
221
+ | Variable | Description |
222
+ |----------|-------------|
223
+ | `OPENAI_API_KEY` | Required for semantic search |
224
+
225
+ ---
226
+
227
+ ## Performance
228
+
229
+ | Metric | Raw Markdown | mdcontext | Savings |
230
+ |--------|--------------|---------|---------|
231
+ | Context for single doc | 2,500 tokens | 400 tokens | **84%** |
232
+ | Context for 10 docs | 25,000 tokens | 4,000 tokens | **84%** |
233
+ | Search latency | N/A | <100ms | - |
234
+
235
+ ---
236
+
237
+ ## License
238
+
239
+ MIT
package/biome.json ADDED
@@ -0,0 +1,36 @@
1
+ {
2
+ "$schema": "https://biomejs.dev/schemas/2.3.11/schema.json",
3
+ "assist": {
4
+ "actions": {
5
+ "source": {
6
+ "organizeImports": "on"
7
+ }
8
+ }
9
+ },
10
+ "linter": {
11
+ "enabled": true,
12
+ "rules": {
13
+ "recommended": true,
14
+ "suspicious": {
15
+ "noExplicitAny": "off"
16
+ },
17
+ "style": {
18
+ "noNonNullAssertion": "off"
19
+ }
20
+ }
21
+ },
22
+ "formatter": {
23
+ "enabled": true,
24
+ "indentStyle": "space",
25
+ "indentWidth": 2
26
+ },
27
+ "javascript": {
28
+ "formatter": {
29
+ "semicolons": "asNeeded",
30
+ "quoteStyle": "single"
31
+ }
32
+ },
33
+ "files": {
34
+ "includes": ["src/**", "tests/**", "*.json", "*.ts"]
35
+ }
36
+ }
@@ -0,0 +1,14 @@
1
+ version: "0.2"
2
+ ignorePaths: []
3
+ dictionaryDefinitions: []
4
+ dictionaries: []
5
+ words:
6
+ - attw
7
+ - backlinks
8
+ - Hnsw
9
+ - hnswlib
10
+ - mdcontext
11
+ - modelcontextprotocol
12
+ - tiktoken
13
+ ignoreWords: []
14
+ import: []