mdcontext 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.github/workflows/ci.yml +83 -0
  4. package/.github/workflows/release.yml +113 -0
  5. package/.tldrignore +112 -0
  6. package/AGENTS.md +46 -0
  7. package/BACKLOG.md +338 -0
  8. package/README.md +231 -11
  9. package/biome.json +36 -0
  10. package/cspell.config.yaml +14 -0
  11. package/dist/chunk-KRYIFLQR.js +92 -0
  12. package/dist/chunk-S7E6TFX6.js +742 -0
  13. package/dist/chunk-VVTGZNBT.js +1519 -0
  14. package/dist/cli/main.d.ts +1 -0
  15. package/dist/cli/main.js +2015 -0
  16. package/dist/index.d.ts +266 -0
  17. package/dist/index.js +86 -0
  18. package/dist/mcp/server.d.ts +1 -0
  19. package/dist/mcp/server.js +376 -0
  20. package/docs/019-USAGE.md +586 -0
  21. package/docs/020-current-implementation.md +364 -0
  22. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  23. package/docs/BACKLOG.md +80 -0
  24. package/docs/DESIGN.md +439 -0
  25. package/docs/PROJECT.md +88 -0
  26. package/docs/ROADMAP.md +407 -0
  27. package/docs/test-links.md +9 -0
  28. package/package.json +69 -10
  29. package/pnpm-workspace.yaml +5 -0
  30. package/research/config-analysis/01-current-implementation.md +470 -0
  31. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  32. package/research/config-analysis/03-task-candidates.md +715 -0
  33. package/research/config-analysis/033-research-configuration-management.md +828 -0
  34. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  35. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  36. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  37. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  38. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  39. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  40. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  41. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  42. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  43. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  44. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  45. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  46. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  47. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  48. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  49. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  50. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  51. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  52. package/research/effect-cli-error-handling.md +845 -0
  53. package/research/effect-errors-as-values.md +943 -0
  54. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  55. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  56. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  57. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  58. package/research/mdcontext-error-analysis.md +521 -0
  59. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  60. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  61. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  62. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  63. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  64. package/research/semantic-search/002-research-embedding-models.md +490 -0
  65. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  66. package/research/semantic-search/004-research-vector-search.md +841 -0
  67. package/research/semantic-search/032-research-semantic-search.md +427 -0
  68. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  69. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  70. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  71. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  72. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  73. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  74. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  75. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  76. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  77. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  78. package/scripts/rebuild-hnswlib.js +63 -0
  79. package/src/cli/argv-preprocessor.test.ts +210 -0
  80. package/src/cli/argv-preprocessor.ts +202 -0
  81. package/src/cli/cli.test.ts +430 -0
  82. package/src/cli/commands/backlinks.ts +54 -0
  83. package/src/cli/commands/context.ts +197 -0
  84. package/src/cli/commands/index-cmd.ts +300 -0
  85. package/src/cli/commands/index.ts +13 -0
  86. package/src/cli/commands/links.ts +52 -0
  87. package/src/cli/commands/search.ts +451 -0
  88. package/src/cli/commands/stats.ts +146 -0
  89. package/src/cli/commands/tree.ts +107 -0
  90. package/src/cli/flag-schemas.ts +275 -0
  91. package/src/cli/help.ts +386 -0
  92. package/src/cli/index.ts +9 -0
  93. package/src/cli/main.ts +145 -0
  94. package/src/cli/options.ts +31 -0
  95. package/src/cli/typo-suggester.test.ts +105 -0
  96. package/src/cli/typo-suggester.ts +130 -0
  97. package/src/cli/utils.ts +126 -0
  98. package/src/core/index.ts +1 -0
  99. package/src/core/types.ts +140 -0
  100. package/src/embeddings/index.ts +8 -0
  101. package/src/embeddings/openai-provider.ts +165 -0
  102. package/src/embeddings/semantic-search.ts +583 -0
  103. package/src/embeddings/types.ts +82 -0
  104. package/src/embeddings/vector-store.ts +299 -0
  105. package/src/index/index.ts +4 -0
  106. package/src/index/indexer.ts +446 -0
  107. package/src/index/storage.ts +196 -0
  108. package/src/index/types.ts +109 -0
  109. package/src/index/watcher.ts +131 -0
  110. package/src/index.ts +8 -0
  111. package/src/mcp/server.ts +483 -0
  112. package/src/parser/index.ts +1 -0
  113. package/src/parser/parser.test.ts +291 -0
  114. package/src/parser/parser.ts +395 -0
  115. package/src/parser/section-filter.ts +270 -0
  116. package/src/search/query-parser.test.ts +260 -0
  117. package/src/search/query-parser.ts +319 -0
  118. package/src/search/searcher.test.ts +182 -0
  119. package/src/search/searcher.ts +602 -0
  120. package/src/summarize/budget-bugs.test.ts +620 -0
  121. package/src/summarize/formatters.ts +419 -0
  122. package/src/summarize/index.ts +20 -0
  123. package/src/summarize/summarizer.test.ts +275 -0
  124. package/src/summarize/summarizer.ts +528 -0
  125. package/src/summarize/verify-bugs.test.ts +238 -0
  126. package/src/utils/index.ts +1 -0
  127. package/src/utils/tokens.test.ts +142 -0
  128. package/src/utils/tokens.ts +186 -0
  129. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  130. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  131. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  132. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +233 -0
  133. package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
  134. package/tests/fixtures/cli/.mdcontext/vectors.meta.json +1264 -0
  135. package/tests/fixtures/cli/README.md +9 -0
  136. package/tests/fixtures/cli/api-reference.md +11 -0
  137. package/tests/fixtures/cli/getting-started.md +11 -0
  138. package/tsconfig.json +26 -0
  139. package/vitest.config.ts +21 -0
  140. package/vitest.setup.ts +12 -0
@@ -0,0 +1,451 @@
1
+ /**
2
+ * SEARCH Command
3
+ *
4
+ * Search markdown content by meaning or heading pattern.
5
+ */
6
+
7
+ import * as path from 'node:path'
8
+ import * as readline from 'node:readline'
9
+ import { Args, Command, Options } from '@effect/cli'
10
+ import { Console, Effect, Option } from 'effect'
11
+ import { handleApiKeyError } from '../../embeddings/openai-provider.js'
12
+ import {
13
+ buildEmbeddings,
14
+ estimateEmbeddingCost,
15
+ semanticSearch,
16
+ } from '../../embeddings/semantic-search.js'
17
+ import { isAdvancedQuery } from '../../search/query-parser.js'
18
+ import { search, searchContent } from '../../search/searcher.js'
19
+ import { jsonOption, prettyOption } from '../options.js'
20
+ import { formatJson, getIndexInfo, isRegexPattern } from '../utils.js'
21
+
22
+ // Auto-index threshold in seconds
23
+ const AUTO_INDEX_THRESHOLD_SECONDS = 10
24
+
25
+ const promptUser = (message: string): Promise<string> => {
26
+ return new Promise((resolve) => {
27
+ const rl = readline.createInterface({
28
+ input: process.stdin,
29
+ output: process.stdout,
30
+ })
31
+ rl.question(message, (answer) => {
32
+ rl.close()
33
+ resolve(answer.trim().toLowerCase())
34
+ })
35
+ })
36
+ }
37
+
38
+ export const searchCommand = Command.make(
39
+ 'search',
40
+ {
41
+ query: Args.text({ name: 'query' }).pipe(
42
+ Args.withDescription('Search query (natural language or regex pattern)'),
43
+ ),
44
+ path: Args.directory({ name: 'path' }).pipe(
45
+ Args.withDescription('Directory to search in'),
46
+ Args.withDefault('.'),
47
+ ),
48
+ keyword: Options.boolean('keyword').pipe(
49
+ Options.withAlias('k'),
50
+ Options.withDescription('Force keyword search (content text match)'),
51
+ Options.withDefault(false),
52
+ ),
53
+ headingOnly: Options.boolean('heading-only').pipe(
54
+ Options.withAlias('H'),
55
+ Options.withDescription('Search headings only (not content)'),
56
+ Options.withDefault(false),
57
+ ),
58
+ mode: Options.choice('mode', ['semantic', 'keyword']).pipe(
59
+ Options.withAlias('m'),
60
+ Options.withDescription('Force search mode: semantic or keyword'),
61
+ Options.optional,
62
+ ),
63
+ limit: Options.integer('limit').pipe(
64
+ Options.withAlias('n'),
65
+ Options.withDescription('Maximum results'),
66
+ Options.withDefault(10),
67
+ ),
68
+ threshold: Options.float('threshold').pipe(
69
+ Options.withDescription('Similarity threshold for semantic search (0-1)'),
70
+ Options.withDefault(0.45),
71
+ ),
72
+ context: Options.integer('context').pipe(
73
+ Options.withAlias('C'),
74
+ Options.withDescription('Lines of context around matches (like grep -C)'),
75
+ Options.optional,
76
+ ),
77
+ beforeContext: Options.integer('before-context').pipe(
78
+ Options.withAlias('B'),
79
+ Options.withDescription('Lines of context before matches (like grep -B)'),
80
+ Options.optional,
81
+ ),
82
+ afterContext: Options.integer('after-context').pipe(
83
+ Options.withAlias('A'),
84
+ Options.withDescription('Lines of context after matches (like grep -A)'),
85
+ Options.optional,
86
+ ),
87
+ autoIndexThreshold: Options.integer('auto-index-threshold').pipe(
88
+ Options.withDescription(
89
+ 'Auto-create semantic index if estimated time is under this threshold (seconds)',
90
+ ),
91
+ Options.withDefault(AUTO_INDEX_THRESHOLD_SECONDS),
92
+ ),
93
+ json: jsonOption,
94
+ pretty: prettyOption,
95
+ },
96
+ ({
97
+ query,
98
+ path: dirPath,
99
+ keyword,
100
+ headingOnly,
101
+ mode,
102
+ limit,
103
+ threshold,
104
+ context,
105
+ beforeContext,
106
+ afterContext,
107
+ autoIndexThreshold,
108
+ json,
109
+ pretty,
110
+ }) =>
111
+ Effect.gen(function* () {
112
+ const resolvedDir = path.resolve(dirPath)
113
+
114
+ // Get index info for display
115
+ const indexInfo = yield* Effect.promise(() => getIndexInfo(resolvedDir))
116
+
117
+ // Check if no index exists
118
+ if (!indexInfo.exists && !json) {
119
+ yield* Console.log('No index found.')
120
+ yield* Console.log('')
121
+ yield* Console.log('Run: mdcontext index /path/to/docs')
122
+ yield* Console.log(' Add --embed for semantic search capabilities')
123
+ return
124
+ }
125
+
126
+ // Check for embeddings
127
+ let embedsExist = indexInfo.embeddingsExist
128
+
129
+ // Determine search mode
130
+ // Priority: --mode flag > --keyword flag > regex pattern > embeddings availability
131
+ let useKeyword: boolean
132
+ let modeReason: string
133
+
134
+ const modeValue = Option.getOrUndefined(mode)
135
+
136
+ if (modeValue === 'semantic') {
137
+ // User explicitly requested semantic search
138
+ if (!embedsExist) {
139
+ // Try to auto-create index
140
+ embedsExist = yield* handleMissingEmbeddings(
141
+ resolvedDir,
142
+ autoIndexThreshold,
143
+ json,
144
+ )
145
+ if (!embedsExist) {
146
+ // User declined or error
147
+ return
148
+ }
149
+ }
150
+ useKeyword = false
151
+ modeReason = '--mode semantic'
152
+ } else if (modeValue === 'keyword') {
153
+ useKeyword = true
154
+ modeReason = '--mode keyword'
155
+ } else if (keyword) {
156
+ useKeyword = true
157
+ modeReason = '--keyword flag'
158
+ } else if (isAdvancedQuery(query)) {
159
+ // Detect quoted phrases and boolean operators (AND, OR, NOT)
160
+ useKeyword = true
161
+ modeReason = 'boolean/phrase pattern detected'
162
+ } else if (isRegexPattern(query)) {
163
+ useKeyword = true
164
+ modeReason = 'regex pattern detected'
165
+ } else if (!embedsExist) {
166
+ useKeyword = true
167
+ modeReason = 'no embeddings'
168
+ } else {
169
+ useKeyword = false
170
+ modeReason = 'embeddings available'
171
+ }
172
+
173
+ const modeIndicator = useKeyword ? '[keyword]' : '[semantic]'
174
+
175
+ // Show index info (non-JSON mode)
176
+ if (!json && indexInfo.lastUpdated) {
177
+ const lastUpdatedDate = new Date(indexInfo.lastUpdated)
178
+ const dateStr = lastUpdatedDate.toLocaleDateString('en-CA')
179
+ const timeStr = lastUpdatedDate.toLocaleTimeString('en-US', {
180
+ hour: '2-digit',
181
+ minute: '2-digit',
182
+ hour12: false,
183
+ })
184
+ yield* Console.log(`Using index from ${dateStr} ${timeStr}`)
185
+ yield* Console.log(` Sections: ${indexInfo.sectionCount ?? 0}`)
186
+ if (indexInfo.embeddingsExist) {
187
+ yield* Console.log(
188
+ ` Embeddings: yes (${indexInfo.vectorCount ?? 0} vectors)`,
189
+ )
190
+ } else {
191
+ yield* Console.log(' Embeddings: no')
192
+ }
193
+ yield* Console.log('')
194
+ }
195
+
196
+ // Calculate context lines
197
+ // -C sets both before and after; -B and -A override individual sides
198
+ const contextValue = Option.getOrUndefined(context)
199
+ const beforeValue = Option.getOrUndefined(beforeContext)
200
+ const afterValue = Option.getOrUndefined(afterContext)
201
+
202
+ const contextBefore = beforeValue ?? contextValue ?? 1
203
+ const contextAfter = afterValue ?? contextValue ?? 1
204
+
205
+ if (useKeyword) {
206
+ // Keyword search - content by default, heading-only if flag set
207
+ const results = headingOnly
208
+ ? yield* search(resolvedDir, { heading: query, limit })
209
+ : yield* searchContent(resolvedDir, {
210
+ content: query,
211
+ limit,
212
+ contextBefore,
213
+ contextAfter,
214
+ })
215
+
216
+ if (json) {
217
+ const output = {
218
+ mode: 'keyword',
219
+ modeReason,
220
+ query,
221
+ contextBefore,
222
+ contextAfter,
223
+ results: results.map((r) => ({
224
+ path: r.section.documentPath,
225
+ heading: r.section.heading,
226
+ level: r.section.level,
227
+ tokens: r.section.tokenCount,
228
+ line: r.section.startLine,
229
+ matches: r.matches?.map((m) => ({
230
+ lineNumber: m.lineNumber,
231
+ line: m.line,
232
+ contextLines: m.contextLines,
233
+ })),
234
+ })),
235
+ }
236
+ yield* Console.log(formatJson(output, pretty))
237
+ } else {
238
+ const searchType = headingOnly ? 'Heading' : 'Content'
239
+ // Show mode with explanation for auto-detected modes
240
+ const showReason =
241
+ modeReason !== '--mode keyword' && modeReason !== '--keyword flag'
242
+ const modeStr = showReason
243
+ ? `${modeIndicator} (${modeReason})`
244
+ : modeIndicator
245
+ yield* Console.log(`${modeStr} ${searchType} search: "${query}"`)
246
+ yield* Console.log(`Results: ${results.length}`)
247
+ yield* Console.log('')
248
+
249
+ for (const result of results) {
250
+ const levelMarker = '#'.repeat(result.section.level)
251
+ yield* Console.log(
252
+ ` ${result.section.documentPath}:${result.section.startLine}`,
253
+ )
254
+ yield* Console.log(
255
+ ` ${levelMarker} ${result.section.heading} (${result.section.tokenCount} tokens)`,
256
+ )
257
+
258
+ // Show match snippets with line numbers
259
+ if (result.matches && result.matches.length > 0) {
260
+ yield* Console.log('')
261
+ for (const match of result.matches.slice(0, 3)) {
262
+ // Show first 3 matches per section
263
+ // Use contextLines for formatted output with line numbers
264
+ if (match.contextLines && match.contextLines.length > 0) {
265
+ for (const ctxLine of match.contextLines) {
266
+ const marker = ctxLine.isMatch ? '>' : ' '
267
+ yield* Console.log(
268
+ ` ${marker} ${ctxLine.lineNumber}: ${ctxLine.line}`,
269
+ )
270
+ }
271
+ } else {
272
+ // Fallback to simple snippet display
273
+ yield* Console.log(` Line ${match.lineNumber}:`)
274
+ const snippetLines = match.snippet.split('\n')
275
+ for (const line of snippetLines) {
276
+ yield* Console.log(` ${line}`)
277
+ }
278
+ }
279
+ yield* Console.log('')
280
+ }
281
+ if (result.matches.length > 3) {
282
+ yield* Console.log(
283
+ ` ... and ${result.matches.length - 3} more matches`,
284
+ )
285
+ }
286
+ }
287
+ yield* Console.log('')
288
+ }
289
+
290
+ // Show tip for enabling semantic search if no embeddings
291
+ if (!indexInfo.embeddingsExist) {
292
+ yield* Console.log(
293
+ "Tip: Run 'mdcontext index --embed' to enable semantic search",
294
+ )
295
+ }
296
+ }
297
+ } else {
298
+ // Semantic search
299
+ const results = yield* semanticSearch(resolvedDir, query, {
300
+ limit,
301
+ threshold,
302
+ }).pipe(handleApiKeyError)
303
+
304
+ if (json) {
305
+ const output = {
306
+ mode: 'semantic',
307
+ modeReason,
308
+ query,
309
+ results,
310
+ }
311
+ yield* Console.log(formatJson(output, pretty))
312
+ } else {
313
+ // Show mode with explanation for auto-detected modes
314
+ const showSemanticReason = modeReason !== '--mode semantic'
315
+ const semanticModeStr = showSemanticReason
316
+ ? `${modeIndicator} (${modeReason})`
317
+ : modeIndicator
318
+ yield* Console.log(`${semanticModeStr} Semantic search: "${query}"`)
319
+ yield* Console.log(`Results: ${results.length}`)
320
+ yield* Console.log('')
321
+
322
+ for (const result of results) {
323
+ const similarity = (result.similarity * 100).toFixed(1)
324
+ yield* Console.log(` ${result.documentPath}`)
325
+ yield* Console.log(` ${result.heading} (${similarity}% match)`)
326
+ yield* Console.log('')
327
+ }
328
+
329
+ // Show tip for keyword search alternative
330
+ yield* Console.log('Tip: Use --mode keyword for exact text matching')
331
+ }
332
+ }
333
+ }),
334
+ ).pipe(Command.withDescription('Search by meaning or structure'))
335
+
336
+ /**
337
+ * Handle the case when embeddings don't exist.
338
+ * Returns true if embeddings were created (or already exist), false to fall back to keyword search.
339
+ */
340
+ const handleMissingEmbeddings = (
341
+ resolvedDir: string,
342
+ autoIndexThreshold: number,
343
+ json: boolean,
344
+ ): Effect.Effect<boolean, Error> =>
345
+ Effect.gen(function* () {
346
+ // Get cost estimate
347
+ const estimate = yield* estimateEmbeddingCost(resolvedDir).pipe(
348
+ Effect.catchAll(() => Effect.succeed(null)),
349
+ )
350
+
351
+ if (!estimate) {
352
+ yield* Console.error(
353
+ 'No semantic index found and could not estimate cost.',
354
+ )
355
+ yield* Console.error('Run "mdcontext index --embed" first.')
356
+ return false
357
+ }
358
+
359
+ // Check if we should auto-index
360
+ if (estimate.estimatedTimeSeconds <= autoIndexThreshold) {
361
+ if (!json) {
362
+ yield* Console.log(
363
+ `Creating semantic index (~${estimate.estimatedTimeSeconds}s, ~$${estimate.totalCost.toFixed(4)})...`,
364
+ )
365
+ }
366
+
367
+ const result = yield* buildEmbeddings(resolvedDir, {
368
+ force: false,
369
+ onFileProgress: (progress) => {
370
+ if (!json) {
371
+ process.stdout.write(
372
+ `\r [${progress.fileIndex}/${progress.totalFiles}] ${progress.filePath}...`,
373
+ )
374
+ }
375
+ },
376
+ }).pipe(
377
+ handleApiKeyError,
378
+ Effect.catchAll(() => Effect.succeed(null)),
379
+ )
380
+
381
+ if (!result) {
382
+ return false
383
+ }
384
+
385
+ if (!json) {
386
+ process.stdout.write(`\r${' '.repeat(80)}\r`)
387
+ yield* Console.log(
388
+ `Index created (${result.sectionsEmbedded} sections, $${result.cost.toFixed(6)})`,
389
+ )
390
+ yield* Console.log('')
391
+ }
392
+
393
+ return true
394
+ }
395
+
396
+ // Prompt user for larger indexes
397
+ if (!json) {
398
+ yield* Console.log('')
399
+ yield* Console.log('No semantic index found.')
400
+ yield* Console.log('')
401
+ yield* Console.log('Options:')
402
+ yield* Console.log(
403
+ ` 1. Create now (recommended, ~${estimate.estimatedTimeSeconds}s, ~$${estimate.totalCost.toFixed(4)})`,
404
+ )
405
+ yield* Console.log(' 2. Use keyword search instead')
406
+ yield* Console.log('')
407
+ }
408
+
409
+ const answer = yield* Effect.promise(() => promptUser('Choice [1]: '))
410
+ const choice = answer === '' || answer === '1' ? '1' : answer
411
+
412
+ if (choice === '1') {
413
+ if (!json) {
414
+ yield* Console.log('')
415
+ yield* Console.log('Building embeddings...')
416
+ }
417
+
418
+ const result = yield* buildEmbeddings(resolvedDir, {
419
+ force: false,
420
+ onFileProgress: (progress) => {
421
+ if (!json) {
422
+ process.stdout.write(
423
+ `\r [${progress.fileIndex}/${progress.totalFiles}] ${progress.filePath}...`,
424
+ )
425
+ }
426
+ },
427
+ }).pipe(
428
+ handleApiKeyError,
429
+ Effect.catchAll(() => Effect.succeed(null)),
430
+ )
431
+
432
+ if (!result) {
433
+ return false
434
+ }
435
+
436
+ if (!json) {
437
+ process.stdout.write(`\r${' '.repeat(80)}\r`)
438
+ yield* Console.log(
439
+ `Index created (${result.sectionsEmbedded} sections, $${result.cost.toFixed(6)})`,
440
+ )
441
+ yield* Console.log('')
442
+ }
443
+
444
+ return true
445
+ }
446
+
447
+ // User chose keyword search
448
+ yield* Console.log('')
449
+ yield* Console.log('Falling back to keyword search.')
450
+ return false
451
+ })
@@ -0,0 +1,146 @@
1
+ /**
2
+ * STATS Command
3
+ *
4
+ * Show index statistics.
5
+ */
6
+
7
+ import * as path from 'node:path'
8
+ import { Args, Command } from '@effect/cli'
9
+ import { Console, Effect } from 'effect'
10
+ import { getEmbeddingStats } from '../../embeddings/semantic-search.js'
11
+ import {
12
+ createStorage,
13
+ loadDocumentIndex,
14
+ loadSectionIndex,
15
+ } from '../../index/storage.js'
16
+ import { jsonOption, prettyOption } from '../options.js'
17
+ import { formatJson } from '../utils.js'
18
+
19
+ interface IndexStats {
20
+ documentCount: number
21
+ totalTokens: number
22
+ avgTokensPerDoc: number
23
+ totalSections: number
24
+ sectionsByLevel: Record<number, number>
25
+ tokenDistribution: {
26
+ min: number
27
+ max: number
28
+ median: number
29
+ }
30
+ }
31
+
32
+ export const statsCommand = Command.make(
33
+ 'stats',
34
+ {
35
+ path: Args.directory({ name: 'path' }).pipe(
36
+ Args.withDescription('Directory to show stats for'),
37
+ Args.withDefault('.'),
38
+ ),
39
+ json: jsonOption,
40
+ pretty: prettyOption,
41
+ },
42
+ ({ path: dirPath, json, pretty }) =>
43
+ Effect.gen(function* () {
44
+ const resolvedRoot = path.resolve(dirPath)
45
+ const storage = createStorage(resolvedRoot)
46
+
47
+ // Load document and section indexes
48
+ const docIndex = yield* loadDocumentIndex(storage)
49
+ const sectionIndex = yield* loadSectionIndex(storage)
50
+
51
+ // Handle case where index doesn't exist
52
+ if (!docIndex || !sectionIndex) {
53
+ if (json) {
54
+ yield* Console.log(formatJson({ error: 'No index found' }, pretty))
55
+ } else {
56
+ yield* Console.log('No index found.')
57
+ yield* Console.log("Run 'mdcontext index <path>' to create an index.")
58
+ }
59
+ return
60
+ }
61
+
62
+ // Calculate index stats
63
+ const docs = Object.values(docIndex.documents)
64
+ const sections = Object.values(sectionIndex.sections)
65
+
66
+ const tokenCounts = docs.map((d) => d.tokenCount).sort((a, b) => a - b)
67
+ const totalTokens = tokenCounts.reduce((sum, t) => sum + t, 0)
68
+
69
+ // Count sections by level
70
+ const sectionsByLevel: Record<number, number> = {}
71
+ for (const section of sections) {
72
+ sectionsByLevel[section.level] =
73
+ (sectionsByLevel[section.level] || 0) + 1
74
+ }
75
+
76
+ const indexStats: IndexStats = {
77
+ documentCount: docs.length,
78
+ totalTokens,
79
+ avgTokensPerDoc:
80
+ docs.length > 0 ? Math.round(totalTokens / docs.length) : 0,
81
+ totalSections: sections.length,
82
+ sectionsByLevel,
83
+ tokenDistribution: {
84
+ min: tokenCounts[0] || 0,
85
+ max: tokenCounts[tokenCounts.length - 1] || 0,
86
+ median: tokenCounts[Math.floor(tokenCounts.length / 2)] || 0,
87
+ },
88
+ }
89
+
90
+ // Get embedding stats
91
+ const embeddingStats = yield* getEmbeddingStats(resolvedRoot)
92
+
93
+ if (json) {
94
+ yield* Console.log(
95
+ formatJson({ ...indexStats, embeddings: embeddingStats }, pretty),
96
+ )
97
+ } else {
98
+ yield* Console.log('Index statistics:')
99
+ yield* Console.log('')
100
+ yield* Console.log(' Documents')
101
+ yield* Console.log(` Count: ${indexStats.documentCount}`)
102
+ yield* Console.log(
103
+ ` Tokens: ${indexStats.totalTokens.toLocaleString()}`,
104
+ )
105
+ yield* Console.log(` Avg/doc: ${indexStats.avgTokensPerDoc}`)
106
+ yield* Console.log('')
107
+ yield* Console.log(' Token distribution')
108
+ yield* Console.log(
109
+ ` Min: ${indexStats.tokenDistribution.min}`,
110
+ )
111
+ yield* Console.log(
112
+ ` Median: ${indexStats.tokenDistribution.median}`,
113
+ )
114
+ yield* Console.log(
115
+ ` Max: ${indexStats.tokenDistribution.max}`,
116
+ )
117
+ yield* Console.log('')
118
+ yield* Console.log(' Sections')
119
+ yield* Console.log(` Total: ${indexStats.totalSections}`)
120
+ // Show section depth breakdown
121
+ const levels = Object.keys(sectionsByLevel)
122
+ .map(Number)
123
+ .sort((a, b) => a - b)
124
+ for (const level of levels) {
125
+ yield* Console.log(
126
+ ` h${level}: ${sectionsByLevel[level]}`,
127
+ )
128
+ }
129
+ yield* Console.log('')
130
+ yield* Console.log(' Embeddings')
131
+ if (embeddingStats.hasEmbeddings) {
132
+ yield* Console.log(` Vectors: ${embeddingStats.count}`)
133
+ yield* Console.log(` Provider: ${embeddingStats.provider}`)
134
+ yield* Console.log(` Dimensions: ${embeddingStats.dimensions}`)
135
+ yield* Console.log(
136
+ ` Cost: $${embeddingStats.totalCost.toFixed(6)}`,
137
+ )
138
+ } else {
139
+ yield* Console.log(' Not enabled')
140
+ yield* Console.log(
141
+ " Run 'mdcontext index --embed' to build embeddings.",
142
+ )
143
+ }
144
+ }
145
+ }),
146
+ ).pipe(Command.withDescription('Index statistics'))
@@ -0,0 +1,107 @@
1
+ /**
2
+ * TREE Command
3
+ *
4
+ * Show file tree or document outline.
5
+ */
6
+
7
+ import * as fs from 'node:fs'
8
+ import * as path from 'node:path'
9
+ import { Args, Command } from '@effect/cli'
10
+ import { Console, Effect } from 'effect'
11
+ import type { MdSection } from '../../core/types.js'
12
+ import { parseFile } from '../../parser/parser.js'
13
+ import { jsonOption, prettyOption } from '../options.js'
14
+ import { formatJson, walkDir } from '../utils.js'
15
+
16
+ export const treeCommand = Command.make(
17
+ 'tree',
18
+ {
19
+ pathArg: Args.text({ name: 'path' }).pipe(
20
+ Args.withDescription('Directory (shows files) or file (shows outline)'),
21
+ Args.withDefault('.'),
22
+ ),
23
+ json: jsonOption,
24
+ pretty: prettyOption,
25
+ },
26
+ ({ pathArg, json, pretty }) =>
27
+ Effect.gen(function* () {
28
+ const resolvedPath = path.resolve(pathArg)
29
+
30
+ // Auto-detect: file or directory
31
+ const stat = yield* Effect.try(() => fs.statSync(resolvedPath))
32
+
33
+ if (stat.isFile()) {
34
+ // Show document outline
35
+ const result = yield* parseFile(resolvedPath).pipe(
36
+ Effect.mapError((e) => new Error(`${e._tag}: ${e.message}`)),
37
+ )
38
+
39
+ const extractStructure = (
40
+ section: MdSection,
41
+ ): {
42
+ heading: string
43
+ level: number
44
+ tokens: number
45
+ children: unknown[]
46
+ } => ({
47
+ heading: section.heading,
48
+ level: section.level,
49
+ tokens: section.metadata.tokenCount,
50
+ children: section.children.map(extractStructure),
51
+ })
52
+
53
+ if (json) {
54
+ const structure = {
55
+ title: result.title,
56
+ path: result.path,
57
+ totalTokens: result.metadata.tokenCount,
58
+ sections: result.sections.map(extractStructure),
59
+ }
60
+ yield* Console.log(formatJson(structure, pretty))
61
+ } else {
62
+ yield* Console.log(`# ${result.title}`)
63
+ yield* Console.log(`Total tokens: ${result.metadata.tokenCount}`)
64
+ yield* Console.log('')
65
+
66
+ const printOutline = (
67
+ section: MdSection,
68
+ depth: number = 0,
69
+ ): Effect.Effect<void> =>
70
+ Effect.gen(function* () {
71
+ const indent = ' '.repeat(depth)
72
+ const marker = '#'.repeat(section.level)
73
+ yield* Console.log(
74
+ `${indent}${marker} ${section.heading} [${section.metadata.tokenCount} tokens]`,
75
+ )
76
+ for (const child of section.children) {
77
+ yield* printOutline(child, depth + 1)
78
+ }
79
+ })
80
+
81
+ for (const section of result.sections) {
82
+ yield* printOutline(section)
83
+ }
84
+ }
85
+ } else {
86
+ // Show file list
87
+ const files = yield* Effect.promise(() => walkDir(resolvedPath))
88
+
89
+ const tree = files.sort().map((f) => ({
90
+ path: f,
91
+ relativePath: path.relative(resolvedPath, f),
92
+ }))
93
+
94
+ if (json) {
95
+ yield* Console.log(formatJson(tree, pretty))
96
+ } else {
97
+ yield* Console.log(`Markdown files in ${resolvedPath}:`)
98
+ yield* Console.log('')
99
+ for (const file of tree) {
100
+ yield* Console.log(` ${file.relativePath}`)
101
+ }
102
+ yield* Console.log('')
103
+ yield* Console.log(`Total: ${tree.length} files`)
104
+ }
105
+ }
106
+ }),
107
+ ).pipe(Command.withDescription('Show files or document outline'))