mdcontext 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/AGENTS.md +46 -0
- package/BACKLOG.md +338 -0
- package/README.md +231 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-KRYIFLQR.js +92 -0
- package/dist/chunk-S7E6TFX6.js +742 -0
- package/dist/chunk-VVTGZNBT.js +1519 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +2015 -0
- package/dist/index.d.ts +266 -0
- package/dist/index.js +86 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +376 -0
- package/docs/019-USAGE.md +586 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/DESIGN.md +439 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/test-links.md +9 -0
- package/package.json +69 -10
- package/pnpm-workspace.yaml +5 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/scripts/rebuild-hnswlib.js +63 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +430 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/context.ts +197 -0
- package/src/cli/commands/index-cmd.ts +300 -0
- package/src/cli/commands/index.ts +13 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +451 -0
- package/src/cli/commands/stats.ts +146 -0
- package/src/cli/commands/tree.ts +107 -0
- package/src/cli/flag-schemas.ts +275 -0
- package/src/cli/help.ts +386 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +145 -0
- package/src/cli/options.ts +31 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +126 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +140 -0
- package/src/embeddings/index.ts +8 -0
- package/src/embeddings/openai-provider.ts +165 -0
- package/src/embeddings/semantic-search.ts +583 -0
- package/src/embeddings/types.ts +82 -0
- package/src/embeddings/vector-store.ts +299 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +446 -0
- package/src/index/storage.ts +196 -0
- package/src/index/types.ts +109 -0
- package/src/index/watcher.ts +131 -0
- package/src/index.ts +8 -0
- package/src/mcp/server.ts +483 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +395 -0
- package/src/parser/section-filter.ts +270 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +182 -0
- package/src/search/searcher.ts +602 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +528 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +233 -0
- package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/vectors.meta.json +1264 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +21 -0
- package/vitest.setup.ts +12 -0
|
@@ -0,0 +1,451 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SEARCH Command
|
|
3
|
+
*
|
|
4
|
+
* Search markdown content by meaning or heading pattern.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import * as path from 'node:path'
|
|
8
|
+
import * as readline from 'node:readline'
|
|
9
|
+
import { Args, Command, Options } from '@effect/cli'
|
|
10
|
+
import { Console, Effect, Option } from 'effect'
|
|
11
|
+
import { handleApiKeyError } from '../../embeddings/openai-provider.js'
|
|
12
|
+
import {
|
|
13
|
+
buildEmbeddings,
|
|
14
|
+
estimateEmbeddingCost,
|
|
15
|
+
semanticSearch,
|
|
16
|
+
} from '../../embeddings/semantic-search.js'
|
|
17
|
+
import { isAdvancedQuery } from '../../search/query-parser.js'
|
|
18
|
+
import { search, searchContent } from '../../search/searcher.js'
|
|
19
|
+
import { jsonOption, prettyOption } from '../options.js'
|
|
20
|
+
import { formatJson, getIndexInfo, isRegexPattern } from '../utils.js'
|
|
21
|
+
|
|
22
|
+
// Auto-index threshold in seconds
|
|
23
|
+
const AUTO_INDEX_THRESHOLD_SECONDS = 10
|
|
24
|
+
|
|
25
|
+
const promptUser = (message: string): Promise<string> => {
|
|
26
|
+
return new Promise((resolve) => {
|
|
27
|
+
const rl = readline.createInterface({
|
|
28
|
+
input: process.stdin,
|
|
29
|
+
output: process.stdout,
|
|
30
|
+
})
|
|
31
|
+
rl.question(message, (answer) => {
|
|
32
|
+
rl.close()
|
|
33
|
+
resolve(answer.trim().toLowerCase())
|
|
34
|
+
})
|
|
35
|
+
})
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export const searchCommand = Command.make(
|
|
39
|
+
'search',
|
|
40
|
+
{
|
|
41
|
+
query: Args.text({ name: 'query' }).pipe(
|
|
42
|
+
Args.withDescription('Search query (natural language or regex pattern)'),
|
|
43
|
+
),
|
|
44
|
+
path: Args.directory({ name: 'path' }).pipe(
|
|
45
|
+
Args.withDescription('Directory to search in'),
|
|
46
|
+
Args.withDefault('.'),
|
|
47
|
+
),
|
|
48
|
+
keyword: Options.boolean('keyword').pipe(
|
|
49
|
+
Options.withAlias('k'),
|
|
50
|
+
Options.withDescription('Force keyword search (content text match)'),
|
|
51
|
+
Options.withDefault(false),
|
|
52
|
+
),
|
|
53
|
+
headingOnly: Options.boolean('heading-only').pipe(
|
|
54
|
+
Options.withAlias('H'),
|
|
55
|
+
Options.withDescription('Search headings only (not content)'),
|
|
56
|
+
Options.withDefault(false),
|
|
57
|
+
),
|
|
58
|
+
mode: Options.choice('mode', ['semantic', 'keyword']).pipe(
|
|
59
|
+
Options.withAlias('m'),
|
|
60
|
+
Options.withDescription('Force search mode: semantic or keyword'),
|
|
61
|
+
Options.optional,
|
|
62
|
+
),
|
|
63
|
+
limit: Options.integer('limit').pipe(
|
|
64
|
+
Options.withAlias('n'),
|
|
65
|
+
Options.withDescription('Maximum results'),
|
|
66
|
+
Options.withDefault(10),
|
|
67
|
+
),
|
|
68
|
+
threshold: Options.float('threshold').pipe(
|
|
69
|
+
Options.withDescription('Similarity threshold for semantic search (0-1)'),
|
|
70
|
+
Options.withDefault(0.45),
|
|
71
|
+
),
|
|
72
|
+
context: Options.integer('context').pipe(
|
|
73
|
+
Options.withAlias('C'),
|
|
74
|
+
Options.withDescription('Lines of context around matches (like grep -C)'),
|
|
75
|
+
Options.optional,
|
|
76
|
+
),
|
|
77
|
+
beforeContext: Options.integer('before-context').pipe(
|
|
78
|
+
Options.withAlias('B'),
|
|
79
|
+
Options.withDescription('Lines of context before matches (like grep -B)'),
|
|
80
|
+
Options.optional,
|
|
81
|
+
),
|
|
82
|
+
afterContext: Options.integer('after-context').pipe(
|
|
83
|
+
Options.withAlias('A'),
|
|
84
|
+
Options.withDescription('Lines of context after matches (like grep -A)'),
|
|
85
|
+
Options.optional,
|
|
86
|
+
),
|
|
87
|
+
autoIndexThreshold: Options.integer('auto-index-threshold').pipe(
|
|
88
|
+
Options.withDescription(
|
|
89
|
+
'Auto-create semantic index if estimated time is under this threshold (seconds)',
|
|
90
|
+
),
|
|
91
|
+
Options.withDefault(AUTO_INDEX_THRESHOLD_SECONDS),
|
|
92
|
+
),
|
|
93
|
+
json: jsonOption,
|
|
94
|
+
pretty: prettyOption,
|
|
95
|
+
},
|
|
96
|
+
({
|
|
97
|
+
query,
|
|
98
|
+
path: dirPath,
|
|
99
|
+
keyword,
|
|
100
|
+
headingOnly,
|
|
101
|
+
mode,
|
|
102
|
+
limit,
|
|
103
|
+
threshold,
|
|
104
|
+
context,
|
|
105
|
+
beforeContext,
|
|
106
|
+
afterContext,
|
|
107
|
+
autoIndexThreshold,
|
|
108
|
+
json,
|
|
109
|
+
pretty,
|
|
110
|
+
}) =>
|
|
111
|
+
Effect.gen(function* () {
|
|
112
|
+
const resolvedDir = path.resolve(dirPath)
|
|
113
|
+
|
|
114
|
+
// Get index info for display
|
|
115
|
+
const indexInfo = yield* Effect.promise(() => getIndexInfo(resolvedDir))
|
|
116
|
+
|
|
117
|
+
// Check if no index exists
|
|
118
|
+
if (!indexInfo.exists && !json) {
|
|
119
|
+
yield* Console.log('No index found.')
|
|
120
|
+
yield* Console.log('')
|
|
121
|
+
yield* Console.log('Run: mdcontext index /path/to/docs')
|
|
122
|
+
yield* Console.log(' Add --embed for semantic search capabilities')
|
|
123
|
+
return
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Check for embeddings
|
|
127
|
+
let embedsExist = indexInfo.embeddingsExist
|
|
128
|
+
|
|
129
|
+
// Determine search mode
|
|
130
|
+
// Priority: --mode flag > --keyword flag > regex pattern > embeddings availability
|
|
131
|
+
let useKeyword: boolean
|
|
132
|
+
let modeReason: string
|
|
133
|
+
|
|
134
|
+
const modeValue = Option.getOrUndefined(mode)
|
|
135
|
+
|
|
136
|
+
if (modeValue === 'semantic') {
|
|
137
|
+
// User explicitly requested semantic search
|
|
138
|
+
if (!embedsExist) {
|
|
139
|
+
// Try to auto-create index
|
|
140
|
+
embedsExist = yield* handleMissingEmbeddings(
|
|
141
|
+
resolvedDir,
|
|
142
|
+
autoIndexThreshold,
|
|
143
|
+
json,
|
|
144
|
+
)
|
|
145
|
+
if (!embedsExist) {
|
|
146
|
+
// User declined or error
|
|
147
|
+
return
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
useKeyword = false
|
|
151
|
+
modeReason = '--mode semantic'
|
|
152
|
+
} else if (modeValue === 'keyword') {
|
|
153
|
+
useKeyword = true
|
|
154
|
+
modeReason = '--mode keyword'
|
|
155
|
+
} else if (keyword) {
|
|
156
|
+
useKeyword = true
|
|
157
|
+
modeReason = '--keyword flag'
|
|
158
|
+
} else if (isAdvancedQuery(query)) {
|
|
159
|
+
// Detect quoted phrases and boolean operators (AND, OR, NOT)
|
|
160
|
+
useKeyword = true
|
|
161
|
+
modeReason = 'boolean/phrase pattern detected'
|
|
162
|
+
} else if (isRegexPattern(query)) {
|
|
163
|
+
useKeyword = true
|
|
164
|
+
modeReason = 'regex pattern detected'
|
|
165
|
+
} else if (!embedsExist) {
|
|
166
|
+
useKeyword = true
|
|
167
|
+
modeReason = 'no embeddings'
|
|
168
|
+
} else {
|
|
169
|
+
useKeyword = false
|
|
170
|
+
modeReason = 'embeddings available'
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const modeIndicator = useKeyword ? '[keyword]' : '[semantic]'
|
|
174
|
+
|
|
175
|
+
// Show index info (non-JSON mode)
|
|
176
|
+
if (!json && indexInfo.lastUpdated) {
|
|
177
|
+
const lastUpdatedDate = new Date(indexInfo.lastUpdated)
|
|
178
|
+
const dateStr = lastUpdatedDate.toLocaleDateString('en-CA')
|
|
179
|
+
const timeStr = lastUpdatedDate.toLocaleTimeString('en-US', {
|
|
180
|
+
hour: '2-digit',
|
|
181
|
+
minute: '2-digit',
|
|
182
|
+
hour12: false,
|
|
183
|
+
})
|
|
184
|
+
yield* Console.log(`Using index from ${dateStr} ${timeStr}`)
|
|
185
|
+
yield* Console.log(` Sections: ${indexInfo.sectionCount ?? 0}`)
|
|
186
|
+
if (indexInfo.embeddingsExist) {
|
|
187
|
+
yield* Console.log(
|
|
188
|
+
` Embeddings: yes (${indexInfo.vectorCount ?? 0} vectors)`,
|
|
189
|
+
)
|
|
190
|
+
} else {
|
|
191
|
+
yield* Console.log(' Embeddings: no')
|
|
192
|
+
}
|
|
193
|
+
yield* Console.log('')
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Calculate context lines
|
|
197
|
+
// -C sets both before and after; -B and -A override individual sides
|
|
198
|
+
const contextValue = Option.getOrUndefined(context)
|
|
199
|
+
const beforeValue = Option.getOrUndefined(beforeContext)
|
|
200
|
+
const afterValue = Option.getOrUndefined(afterContext)
|
|
201
|
+
|
|
202
|
+
const contextBefore = beforeValue ?? contextValue ?? 1
|
|
203
|
+
const contextAfter = afterValue ?? contextValue ?? 1
|
|
204
|
+
|
|
205
|
+
if (useKeyword) {
|
|
206
|
+
// Keyword search - content by default, heading-only if flag set
|
|
207
|
+
const results = headingOnly
|
|
208
|
+
? yield* search(resolvedDir, { heading: query, limit })
|
|
209
|
+
: yield* searchContent(resolvedDir, {
|
|
210
|
+
content: query,
|
|
211
|
+
limit,
|
|
212
|
+
contextBefore,
|
|
213
|
+
contextAfter,
|
|
214
|
+
})
|
|
215
|
+
|
|
216
|
+
if (json) {
|
|
217
|
+
const output = {
|
|
218
|
+
mode: 'keyword',
|
|
219
|
+
modeReason,
|
|
220
|
+
query,
|
|
221
|
+
contextBefore,
|
|
222
|
+
contextAfter,
|
|
223
|
+
results: results.map((r) => ({
|
|
224
|
+
path: r.section.documentPath,
|
|
225
|
+
heading: r.section.heading,
|
|
226
|
+
level: r.section.level,
|
|
227
|
+
tokens: r.section.tokenCount,
|
|
228
|
+
line: r.section.startLine,
|
|
229
|
+
matches: r.matches?.map((m) => ({
|
|
230
|
+
lineNumber: m.lineNumber,
|
|
231
|
+
line: m.line,
|
|
232
|
+
contextLines: m.contextLines,
|
|
233
|
+
})),
|
|
234
|
+
})),
|
|
235
|
+
}
|
|
236
|
+
yield* Console.log(formatJson(output, pretty))
|
|
237
|
+
} else {
|
|
238
|
+
const searchType = headingOnly ? 'Heading' : 'Content'
|
|
239
|
+
// Show mode with explanation for auto-detected modes
|
|
240
|
+
const showReason =
|
|
241
|
+
modeReason !== '--mode keyword' && modeReason !== '--keyword flag'
|
|
242
|
+
const modeStr = showReason
|
|
243
|
+
? `${modeIndicator} (${modeReason})`
|
|
244
|
+
: modeIndicator
|
|
245
|
+
yield* Console.log(`${modeStr} ${searchType} search: "${query}"`)
|
|
246
|
+
yield* Console.log(`Results: ${results.length}`)
|
|
247
|
+
yield* Console.log('')
|
|
248
|
+
|
|
249
|
+
for (const result of results) {
|
|
250
|
+
const levelMarker = '#'.repeat(result.section.level)
|
|
251
|
+
yield* Console.log(
|
|
252
|
+
` ${result.section.documentPath}:${result.section.startLine}`,
|
|
253
|
+
)
|
|
254
|
+
yield* Console.log(
|
|
255
|
+
` ${levelMarker} ${result.section.heading} (${result.section.tokenCount} tokens)`,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
// Show match snippets with line numbers
|
|
259
|
+
if (result.matches && result.matches.length > 0) {
|
|
260
|
+
yield* Console.log('')
|
|
261
|
+
for (const match of result.matches.slice(0, 3)) {
|
|
262
|
+
// Show first 3 matches per section
|
|
263
|
+
// Use contextLines for formatted output with line numbers
|
|
264
|
+
if (match.contextLines && match.contextLines.length > 0) {
|
|
265
|
+
for (const ctxLine of match.contextLines) {
|
|
266
|
+
const marker = ctxLine.isMatch ? '>' : ' '
|
|
267
|
+
yield* Console.log(
|
|
268
|
+
` ${marker} ${ctxLine.lineNumber}: ${ctxLine.line}`,
|
|
269
|
+
)
|
|
270
|
+
}
|
|
271
|
+
} else {
|
|
272
|
+
// Fallback to simple snippet display
|
|
273
|
+
yield* Console.log(` Line ${match.lineNumber}:`)
|
|
274
|
+
const snippetLines = match.snippet.split('\n')
|
|
275
|
+
for (const line of snippetLines) {
|
|
276
|
+
yield* Console.log(` ${line}`)
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
yield* Console.log('')
|
|
280
|
+
}
|
|
281
|
+
if (result.matches.length > 3) {
|
|
282
|
+
yield* Console.log(
|
|
283
|
+
` ... and ${result.matches.length - 3} more matches`,
|
|
284
|
+
)
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
yield* Console.log('')
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// Show tip for enabling semantic search if no embeddings
|
|
291
|
+
if (!indexInfo.embeddingsExist) {
|
|
292
|
+
yield* Console.log(
|
|
293
|
+
"Tip: Run 'mdcontext index --embed' to enable semantic search",
|
|
294
|
+
)
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
} else {
|
|
298
|
+
// Semantic search
|
|
299
|
+
const results = yield* semanticSearch(resolvedDir, query, {
|
|
300
|
+
limit,
|
|
301
|
+
threshold,
|
|
302
|
+
}).pipe(handleApiKeyError)
|
|
303
|
+
|
|
304
|
+
if (json) {
|
|
305
|
+
const output = {
|
|
306
|
+
mode: 'semantic',
|
|
307
|
+
modeReason,
|
|
308
|
+
query,
|
|
309
|
+
results,
|
|
310
|
+
}
|
|
311
|
+
yield* Console.log(formatJson(output, pretty))
|
|
312
|
+
} else {
|
|
313
|
+
// Show mode with explanation for auto-detected modes
|
|
314
|
+
const showSemanticReason = modeReason !== '--mode semantic'
|
|
315
|
+
const semanticModeStr = showSemanticReason
|
|
316
|
+
? `${modeIndicator} (${modeReason})`
|
|
317
|
+
: modeIndicator
|
|
318
|
+
yield* Console.log(`${semanticModeStr} Semantic search: "${query}"`)
|
|
319
|
+
yield* Console.log(`Results: ${results.length}`)
|
|
320
|
+
yield* Console.log('')
|
|
321
|
+
|
|
322
|
+
for (const result of results) {
|
|
323
|
+
const similarity = (result.similarity * 100).toFixed(1)
|
|
324
|
+
yield* Console.log(` ${result.documentPath}`)
|
|
325
|
+
yield* Console.log(` ${result.heading} (${similarity}% match)`)
|
|
326
|
+
yield* Console.log('')
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Show tip for keyword search alternative
|
|
330
|
+
yield* Console.log('Tip: Use --mode keyword for exact text matching')
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
}),
|
|
334
|
+
).pipe(Command.withDescription('Search by meaning or structure'))
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Handle the case when embeddings don't exist.
|
|
338
|
+
* Returns true if embeddings were created (or already exist), false to fall back to keyword search.
|
|
339
|
+
*/
|
|
340
|
+
const handleMissingEmbeddings = (
|
|
341
|
+
resolvedDir: string,
|
|
342
|
+
autoIndexThreshold: number,
|
|
343
|
+
json: boolean,
|
|
344
|
+
): Effect.Effect<boolean, Error> =>
|
|
345
|
+
Effect.gen(function* () {
|
|
346
|
+
// Get cost estimate
|
|
347
|
+
const estimate = yield* estimateEmbeddingCost(resolvedDir).pipe(
|
|
348
|
+
Effect.catchAll(() => Effect.succeed(null)),
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
if (!estimate) {
|
|
352
|
+
yield* Console.error(
|
|
353
|
+
'No semantic index found and could not estimate cost.',
|
|
354
|
+
)
|
|
355
|
+
yield* Console.error('Run "mdcontext index --embed" first.')
|
|
356
|
+
return false
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
// Check if we should auto-index
|
|
360
|
+
if (estimate.estimatedTimeSeconds <= autoIndexThreshold) {
|
|
361
|
+
if (!json) {
|
|
362
|
+
yield* Console.log(
|
|
363
|
+
`Creating semantic index (~${estimate.estimatedTimeSeconds}s, ~$${estimate.totalCost.toFixed(4)})...`,
|
|
364
|
+
)
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
const result = yield* buildEmbeddings(resolvedDir, {
|
|
368
|
+
force: false,
|
|
369
|
+
onFileProgress: (progress) => {
|
|
370
|
+
if (!json) {
|
|
371
|
+
process.stdout.write(
|
|
372
|
+
`\r [${progress.fileIndex}/${progress.totalFiles}] ${progress.filePath}...`,
|
|
373
|
+
)
|
|
374
|
+
}
|
|
375
|
+
},
|
|
376
|
+
}).pipe(
|
|
377
|
+
handleApiKeyError,
|
|
378
|
+
Effect.catchAll(() => Effect.succeed(null)),
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
if (!result) {
|
|
382
|
+
return false
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
if (!json) {
|
|
386
|
+
process.stdout.write(`\r${' '.repeat(80)}\r`)
|
|
387
|
+
yield* Console.log(
|
|
388
|
+
`Index created (${result.sectionsEmbedded} sections, $${result.cost.toFixed(6)})`,
|
|
389
|
+
)
|
|
390
|
+
yield* Console.log('')
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
return true
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// Prompt user for larger indexes
|
|
397
|
+
if (!json) {
|
|
398
|
+
yield* Console.log('')
|
|
399
|
+
yield* Console.log('No semantic index found.')
|
|
400
|
+
yield* Console.log('')
|
|
401
|
+
yield* Console.log('Options:')
|
|
402
|
+
yield* Console.log(
|
|
403
|
+
` 1. Create now (recommended, ~${estimate.estimatedTimeSeconds}s, ~$${estimate.totalCost.toFixed(4)})`,
|
|
404
|
+
)
|
|
405
|
+
yield* Console.log(' 2. Use keyword search instead')
|
|
406
|
+
yield* Console.log('')
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
const answer = yield* Effect.promise(() => promptUser('Choice [1]: '))
|
|
410
|
+
const choice = answer === '' || answer === '1' ? '1' : answer
|
|
411
|
+
|
|
412
|
+
if (choice === '1') {
|
|
413
|
+
if (!json) {
|
|
414
|
+
yield* Console.log('')
|
|
415
|
+
yield* Console.log('Building embeddings...')
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
const result = yield* buildEmbeddings(resolvedDir, {
|
|
419
|
+
force: false,
|
|
420
|
+
onFileProgress: (progress) => {
|
|
421
|
+
if (!json) {
|
|
422
|
+
process.stdout.write(
|
|
423
|
+
`\r [${progress.fileIndex}/${progress.totalFiles}] ${progress.filePath}...`,
|
|
424
|
+
)
|
|
425
|
+
}
|
|
426
|
+
},
|
|
427
|
+
}).pipe(
|
|
428
|
+
handleApiKeyError,
|
|
429
|
+
Effect.catchAll(() => Effect.succeed(null)),
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
if (!result) {
|
|
433
|
+
return false
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
if (!json) {
|
|
437
|
+
process.stdout.write(`\r${' '.repeat(80)}\r`)
|
|
438
|
+
yield* Console.log(
|
|
439
|
+
`Index created (${result.sectionsEmbedded} sections, $${result.cost.toFixed(6)})`,
|
|
440
|
+
)
|
|
441
|
+
yield* Console.log('')
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
return true
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// User chose keyword search
|
|
448
|
+
yield* Console.log('')
|
|
449
|
+
yield* Console.log('Falling back to keyword search.')
|
|
450
|
+
return false
|
|
451
|
+
})
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* STATS Command
|
|
3
|
+
*
|
|
4
|
+
* Show index statistics.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import * as path from 'node:path'
|
|
8
|
+
import { Args, Command } from '@effect/cli'
|
|
9
|
+
import { Console, Effect } from 'effect'
|
|
10
|
+
import { getEmbeddingStats } from '../../embeddings/semantic-search.js'
|
|
11
|
+
import {
|
|
12
|
+
createStorage,
|
|
13
|
+
loadDocumentIndex,
|
|
14
|
+
loadSectionIndex,
|
|
15
|
+
} from '../../index/storage.js'
|
|
16
|
+
import { jsonOption, prettyOption } from '../options.js'
|
|
17
|
+
import { formatJson } from '../utils.js'
|
|
18
|
+
|
|
19
|
+
interface IndexStats {
|
|
20
|
+
documentCount: number
|
|
21
|
+
totalTokens: number
|
|
22
|
+
avgTokensPerDoc: number
|
|
23
|
+
totalSections: number
|
|
24
|
+
sectionsByLevel: Record<number, number>
|
|
25
|
+
tokenDistribution: {
|
|
26
|
+
min: number
|
|
27
|
+
max: number
|
|
28
|
+
median: number
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export const statsCommand = Command.make(
|
|
33
|
+
'stats',
|
|
34
|
+
{
|
|
35
|
+
path: Args.directory({ name: 'path' }).pipe(
|
|
36
|
+
Args.withDescription('Directory to show stats for'),
|
|
37
|
+
Args.withDefault('.'),
|
|
38
|
+
),
|
|
39
|
+
json: jsonOption,
|
|
40
|
+
pretty: prettyOption,
|
|
41
|
+
},
|
|
42
|
+
({ path: dirPath, json, pretty }) =>
|
|
43
|
+
Effect.gen(function* () {
|
|
44
|
+
const resolvedRoot = path.resolve(dirPath)
|
|
45
|
+
const storage = createStorage(resolvedRoot)
|
|
46
|
+
|
|
47
|
+
// Load document and section indexes
|
|
48
|
+
const docIndex = yield* loadDocumentIndex(storage)
|
|
49
|
+
const sectionIndex = yield* loadSectionIndex(storage)
|
|
50
|
+
|
|
51
|
+
// Handle case where index doesn't exist
|
|
52
|
+
if (!docIndex || !sectionIndex) {
|
|
53
|
+
if (json) {
|
|
54
|
+
yield* Console.log(formatJson({ error: 'No index found' }, pretty))
|
|
55
|
+
} else {
|
|
56
|
+
yield* Console.log('No index found.')
|
|
57
|
+
yield* Console.log("Run 'mdcontext index <path>' to create an index.")
|
|
58
|
+
}
|
|
59
|
+
return
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Calculate index stats
|
|
63
|
+
const docs = Object.values(docIndex.documents)
|
|
64
|
+
const sections = Object.values(sectionIndex.sections)
|
|
65
|
+
|
|
66
|
+
const tokenCounts = docs.map((d) => d.tokenCount).sort((a, b) => a - b)
|
|
67
|
+
const totalTokens = tokenCounts.reduce((sum, t) => sum + t, 0)
|
|
68
|
+
|
|
69
|
+
// Count sections by level
|
|
70
|
+
const sectionsByLevel: Record<number, number> = {}
|
|
71
|
+
for (const section of sections) {
|
|
72
|
+
sectionsByLevel[section.level] =
|
|
73
|
+
(sectionsByLevel[section.level] || 0) + 1
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const indexStats: IndexStats = {
|
|
77
|
+
documentCount: docs.length,
|
|
78
|
+
totalTokens,
|
|
79
|
+
avgTokensPerDoc:
|
|
80
|
+
docs.length > 0 ? Math.round(totalTokens / docs.length) : 0,
|
|
81
|
+
totalSections: sections.length,
|
|
82
|
+
sectionsByLevel,
|
|
83
|
+
tokenDistribution: {
|
|
84
|
+
min: tokenCounts[0] || 0,
|
|
85
|
+
max: tokenCounts[tokenCounts.length - 1] || 0,
|
|
86
|
+
median: tokenCounts[Math.floor(tokenCounts.length / 2)] || 0,
|
|
87
|
+
},
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Get embedding stats
|
|
91
|
+
const embeddingStats = yield* getEmbeddingStats(resolvedRoot)
|
|
92
|
+
|
|
93
|
+
if (json) {
|
|
94
|
+
yield* Console.log(
|
|
95
|
+
formatJson({ ...indexStats, embeddings: embeddingStats }, pretty),
|
|
96
|
+
)
|
|
97
|
+
} else {
|
|
98
|
+
yield* Console.log('Index statistics:')
|
|
99
|
+
yield* Console.log('')
|
|
100
|
+
yield* Console.log(' Documents')
|
|
101
|
+
yield* Console.log(` Count: ${indexStats.documentCount}`)
|
|
102
|
+
yield* Console.log(
|
|
103
|
+
` Tokens: ${indexStats.totalTokens.toLocaleString()}`,
|
|
104
|
+
)
|
|
105
|
+
yield* Console.log(` Avg/doc: ${indexStats.avgTokensPerDoc}`)
|
|
106
|
+
yield* Console.log('')
|
|
107
|
+
yield* Console.log(' Token distribution')
|
|
108
|
+
yield* Console.log(
|
|
109
|
+
` Min: ${indexStats.tokenDistribution.min}`,
|
|
110
|
+
)
|
|
111
|
+
yield* Console.log(
|
|
112
|
+
` Median: ${indexStats.tokenDistribution.median}`,
|
|
113
|
+
)
|
|
114
|
+
yield* Console.log(
|
|
115
|
+
` Max: ${indexStats.tokenDistribution.max}`,
|
|
116
|
+
)
|
|
117
|
+
yield* Console.log('')
|
|
118
|
+
yield* Console.log(' Sections')
|
|
119
|
+
yield* Console.log(` Total: ${indexStats.totalSections}`)
|
|
120
|
+
// Show section depth breakdown
|
|
121
|
+
const levels = Object.keys(sectionsByLevel)
|
|
122
|
+
.map(Number)
|
|
123
|
+
.sort((a, b) => a - b)
|
|
124
|
+
for (const level of levels) {
|
|
125
|
+
yield* Console.log(
|
|
126
|
+
` h${level}: ${sectionsByLevel[level]}`,
|
|
127
|
+
)
|
|
128
|
+
}
|
|
129
|
+
yield* Console.log('')
|
|
130
|
+
yield* Console.log(' Embeddings')
|
|
131
|
+
if (embeddingStats.hasEmbeddings) {
|
|
132
|
+
yield* Console.log(` Vectors: ${embeddingStats.count}`)
|
|
133
|
+
yield* Console.log(` Provider: ${embeddingStats.provider}`)
|
|
134
|
+
yield* Console.log(` Dimensions: ${embeddingStats.dimensions}`)
|
|
135
|
+
yield* Console.log(
|
|
136
|
+
` Cost: $${embeddingStats.totalCost.toFixed(6)}`,
|
|
137
|
+
)
|
|
138
|
+
} else {
|
|
139
|
+
yield* Console.log(' Not enabled')
|
|
140
|
+
yield* Console.log(
|
|
141
|
+
" Run 'mdcontext index --embed' to build embeddings.",
|
|
142
|
+
)
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}),
|
|
146
|
+
).pipe(Command.withDescription('Index statistics'))
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TREE Command
|
|
3
|
+
*
|
|
4
|
+
* Show file tree or document outline.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import * as fs from 'node:fs'
|
|
8
|
+
import * as path from 'node:path'
|
|
9
|
+
import { Args, Command } from '@effect/cli'
|
|
10
|
+
import { Console, Effect } from 'effect'
|
|
11
|
+
import type { MdSection } from '../../core/types.js'
|
|
12
|
+
import { parseFile } from '../../parser/parser.js'
|
|
13
|
+
import { jsonOption, prettyOption } from '../options.js'
|
|
14
|
+
import { formatJson, walkDir } from '../utils.js'
|
|
15
|
+
|
|
16
|
+
export const treeCommand = Command.make(
|
|
17
|
+
'tree',
|
|
18
|
+
{
|
|
19
|
+
pathArg: Args.text({ name: 'path' }).pipe(
|
|
20
|
+
Args.withDescription('Directory (shows files) or file (shows outline)'),
|
|
21
|
+
Args.withDefault('.'),
|
|
22
|
+
),
|
|
23
|
+
json: jsonOption,
|
|
24
|
+
pretty: prettyOption,
|
|
25
|
+
},
|
|
26
|
+
({ pathArg, json, pretty }) =>
|
|
27
|
+
Effect.gen(function* () {
|
|
28
|
+
const resolvedPath = path.resolve(pathArg)
|
|
29
|
+
|
|
30
|
+
// Auto-detect: file or directory
|
|
31
|
+
const stat = yield* Effect.try(() => fs.statSync(resolvedPath))
|
|
32
|
+
|
|
33
|
+
if (stat.isFile()) {
|
|
34
|
+
// Show document outline
|
|
35
|
+
const result = yield* parseFile(resolvedPath).pipe(
|
|
36
|
+
Effect.mapError((e) => new Error(`${e._tag}: ${e.message}`)),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
const extractStructure = (
|
|
40
|
+
section: MdSection,
|
|
41
|
+
): {
|
|
42
|
+
heading: string
|
|
43
|
+
level: number
|
|
44
|
+
tokens: number
|
|
45
|
+
children: unknown[]
|
|
46
|
+
} => ({
|
|
47
|
+
heading: section.heading,
|
|
48
|
+
level: section.level,
|
|
49
|
+
tokens: section.metadata.tokenCount,
|
|
50
|
+
children: section.children.map(extractStructure),
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
if (json) {
|
|
54
|
+
const structure = {
|
|
55
|
+
title: result.title,
|
|
56
|
+
path: result.path,
|
|
57
|
+
totalTokens: result.metadata.tokenCount,
|
|
58
|
+
sections: result.sections.map(extractStructure),
|
|
59
|
+
}
|
|
60
|
+
yield* Console.log(formatJson(structure, pretty))
|
|
61
|
+
} else {
|
|
62
|
+
yield* Console.log(`# ${result.title}`)
|
|
63
|
+
yield* Console.log(`Total tokens: ${result.metadata.tokenCount}`)
|
|
64
|
+
yield* Console.log('')
|
|
65
|
+
|
|
66
|
+
const printOutline = (
|
|
67
|
+
section: MdSection,
|
|
68
|
+
depth: number = 0,
|
|
69
|
+
): Effect.Effect<void> =>
|
|
70
|
+
Effect.gen(function* () {
|
|
71
|
+
const indent = ' '.repeat(depth)
|
|
72
|
+
const marker = '#'.repeat(section.level)
|
|
73
|
+
yield* Console.log(
|
|
74
|
+
`${indent}${marker} ${section.heading} [${section.metadata.tokenCount} tokens]`,
|
|
75
|
+
)
|
|
76
|
+
for (const child of section.children) {
|
|
77
|
+
yield* printOutline(child, depth + 1)
|
|
78
|
+
}
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
for (const section of result.sections) {
|
|
82
|
+
yield* printOutline(section)
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
} else {
|
|
86
|
+
// Show file list
|
|
87
|
+
const files = yield* Effect.promise(() => walkDir(resolvedPath))
|
|
88
|
+
|
|
89
|
+
const tree = files.sort().map((f) => ({
|
|
90
|
+
path: f,
|
|
91
|
+
relativePath: path.relative(resolvedPath, f),
|
|
92
|
+
}))
|
|
93
|
+
|
|
94
|
+
if (json) {
|
|
95
|
+
yield* Console.log(formatJson(tree, pretty))
|
|
96
|
+
} else {
|
|
97
|
+
yield* Console.log(`Markdown files in ${resolvedPath}:`)
|
|
98
|
+
yield* Console.log('')
|
|
99
|
+
for (const file of tree) {
|
|
100
|
+
yield* Console.log(` ${file.relativePath}`)
|
|
101
|
+
}
|
|
102
|
+
yield* Console.log('')
|
|
103
|
+
yield* Console.log(`Total: ${tree.length} files`)
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}),
|
|
107
|
+
).pipe(Command.withDescription('Show files or document outline'))
|