mdcontext 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.changeset/README.md +28 -0
- package/.changeset/config.json +11 -0
- package/.github/workflows/ci.yml +83 -0
- package/.github/workflows/release.yml +113 -0
- package/.tldrignore +112 -0
- package/AGENTS.md +46 -0
- package/BACKLOG.md +338 -0
- package/README.md +231 -11
- package/biome.json +36 -0
- package/cspell.config.yaml +14 -0
- package/dist/chunk-KRYIFLQR.js +92 -0
- package/dist/chunk-S7E6TFX6.js +742 -0
- package/dist/chunk-VVTGZNBT.js +1519 -0
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +2015 -0
- package/dist/index.d.ts +266 -0
- package/dist/index.js +86 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +376 -0
- package/docs/019-USAGE.md +586 -0
- package/docs/020-current-implementation.md +364 -0
- package/docs/021-DOGFOODING-FINDINGS.md +175 -0
- package/docs/BACKLOG.md +80 -0
- package/docs/DESIGN.md +439 -0
- package/docs/PROJECT.md +88 -0
- package/docs/ROADMAP.md +407 -0
- package/docs/test-links.md +9 -0
- package/package.json +69 -10
- package/pnpm-workspace.yaml +5 -0
- package/research/config-analysis/01-current-implementation.md +470 -0
- package/research/config-analysis/02-strategy-recommendation.md +428 -0
- package/research/config-analysis/03-task-candidates.md +715 -0
- package/research/config-analysis/033-research-configuration-management.md +828 -0
- package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
- package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
- package/research/dogfood/consolidated-tool-evaluation.md +373 -0
- package/research/dogfood/strategy-a/a-synthesis.md +184 -0
- package/research/dogfood/strategy-a/a1-docs.md +226 -0
- package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
- package/research/dogfood/strategy-a/a3-llm.md +164 -0
- package/research/dogfood/strategy-b/b-synthesis.md +228 -0
- package/research/dogfood/strategy-b/b1-architecture.md +207 -0
- package/research/dogfood/strategy-b/b2-gaps.md +258 -0
- package/research/dogfood/strategy-b/b3-workflows.md +250 -0
- package/research/dogfood/strategy-c/c-synthesis.md +451 -0
- package/research/dogfood/strategy-c/c1-explorer.md +192 -0
- package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
- package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
- package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
- package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
- package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
- package/research/effect-cli-error-handling.md +845 -0
- package/research/effect-errors-as-values.md +943 -0
- package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
- package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
- package/research/errors-task-analysis/embeddings-analysis.md +709 -0
- package/research/errors-task-analysis/index-search-analysis.md +812 -0
- package/research/mdcontext-error-analysis.md +521 -0
- package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
- package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
- package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
- package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
- package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
- package/research/semantic-search/002-research-embedding-models.md +490 -0
- package/research/semantic-search/003-research-rag-alternatives.md +523 -0
- package/research/semantic-search/004-research-vector-search.md +841 -0
- package/research/semantic-search/032-research-semantic-search.md +427 -0
- package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
- package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
- package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
- package/research/task-management-2026/03-lightweight-file-based.md +567 -0
- package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
- package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
- package/research/task-management-2026/linear/02-api-integrations.md +930 -0
- package/research/task-management-2026/linear/03-ai-features.md +368 -0
- package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
- package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
- package/scripts/rebuild-hnswlib.js +63 -0
- package/src/cli/argv-preprocessor.test.ts +210 -0
- package/src/cli/argv-preprocessor.ts +202 -0
- package/src/cli/cli.test.ts +430 -0
- package/src/cli/commands/backlinks.ts +54 -0
- package/src/cli/commands/context.ts +197 -0
- package/src/cli/commands/index-cmd.ts +300 -0
- package/src/cli/commands/index.ts +13 -0
- package/src/cli/commands/links.ts +52 -0
- package/src/cli/commands/search.ts +451 -0
- package/src/cli/commands/stats.ts +146 -0
- package/src/cli/commands/tree.ts +107 -0
- package/src/cli/flag-schemas.ts +275 -0
- package/src/cli/help.ts +386 -0
- package/src/cli/index.ts +9 -0
- package/src/cli/main.ts +145 -0
- package/src/cli/options.ts +31 -0
- package/src/cli/typo-suggester.test.ts +105 -0
- package/src/cli/typo-suggester.ts +130 -0
- package/src/cli/utils.ts +126 -0
- package/src/core/index.ts +1 -0
- package/src/core/types.ts +140 -0
- package/src/embeddings/index.ts +8 -0
- package/src/embeddings/openai-provider.ts +165 -0
- package/src/embeddings/semantic-search.ts +583 -0
- package/src/embeddings/types.ts +82 -0
- package/src/embeddings/vector-store.ts +299 -0
- package/src/index/index.ts +4 -0
- package/src/index/indexer.ts +446 -0
- package/src/index/storage.ts +196 -0
- package/src/index/types.ts +109 -0
- package/src/index/watcher.ts +131 -0
- package/src/index.ts +8 -0
- package/src/mcp/server.ts +483 -0
- package/src/parser/index.ts +1 -0
- package/src/parser/parser.test.ts +291 -0
- package/src/parser/parser.ts +395 -0
- package/src/parser/section-filter.ts +270 -0
- package/src/search/query-parser.test.ts +260 -0
- package/src/search/query-parser.ts +319 -0
- package/src/search/searcher.test.ts +182 -0
- package/src/search/searcher.ts +602 -0
- package/src/summarize/budget-bugs.test.ts +620 -0
- package/src/summarize/formatters.ts +419 -0
- package/src/summarize/index.ts +20 -0
- package/src/summarize/summarizer.test.ts +275 -0
- package/src/summarize/summarizer.ts +528 -0
- package/src/summarize/verify-bugs.test.ts +238 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/tokens.test.ts +142 -0
- package/src/utils/tokens.ts +186 -0
- package/tests/fixtures/cli/.mdcontext/config.json +8 -0
- package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
- package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
- package/tests/fixtures/cli/.mdcontext/indexes/sections.json +233 -0
- package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
- package/tests/fixtures/cli/.mdcontext/vectors.meta.json +1264 -0
- package/tests/fixtures/cli/README.md +9 -0
- package/tests/fixtures/cli/api-reference.md +11 -0
- package/tests/fixtures/cli/getting-started.md +11 -0
- package/tsconfig.json +26 -0
- package/vitest.config.ts +21 -0
- package/vitest.setup.ts +12 -0
|
@@ -0,0 +1,583 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Semantic search functionality
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import * as fs from 'node:fs/promises'
|
|
6
|
+
import * as path from 'node:path'
|
|
7
|
+
import { Effect } from 'effect'
|
|
8
|
+
import {
|
|
9
|
+
createStorage,
|
|
10
|
+
loadDocumentIndex,
|
|
11
|
+
loadSectionIndex,
|
|
12
|
+
} from '../index/storage.js'
|
|
13
|
+
import type { SectionEntry } from '../index/types.js'
|
|
14
|
+
import { createOpenAIProvider, InvalidApiKeyError } from './openai-provider.js'
|
|
15
|
+
import type {
|
|
16
|
+
EmbeddingProvider,
|
|
17
|
+
SemanticSearchOptions,
|
|
18
|
+
SemanticSearchResult,
|
|
19
|
+
VectorEntry,
|
|
20
|
+
} from './types.js'
|
|
21
|
+
import { createVectorStore, type HnswVectorStore } from './vector-store.js'
|
|
22
|
+
|
|
23
|
+
// ============================================================================
|
|
24
|
+
// Embedding Text Generation
|
|
25
|
+
// ============================================================================
|
|
26
|
+
|
|
27
|
+
const generateEmbeddingText = (
|
|
28
|
+
section: SectionEntry,
|
|
29
|
+
content: string,
|
|
30
|
+
documentTitle: string,
|
|
31
|
+
parentHeading?: string | undefined,
|
|
32
|
+
): string => {
|
|
33
|
+
const parts: string[] = []
|
|
34
|
+
|
|
35
|
+
parts.push(`# ${section.heading}`)
|
|
36
|
+
if (parentHeading) {
|
|
37
|
+
parts.push(`Parent section: ${parentHeading}`)
|
|
38
|
+
}
|
|
39
|
+
parts.push(`Document: ${documentTitle}`)
|
|
40
|
+
parts.push('')
|
|
41
|
+
parts.push(content)
|
|
42
|
+
|
|
43
|
+
return parts.join('\n')
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ============================================================================
|
|
47
|
+
// Cost Estimation
|
|
48
|
+
// ============================================================================
|
|
49
|
+
|
|
50
|
+
// Price per 1M tokens for text-embedding-3-small
|
|
51
|
+
const EMBEDDING_PRICE_PER_MILLION = 0.02
|
|
52
|
+
|
|
53
|
+
export interface DirectoryEstimate {
|
|
54
|
+
readonly directory: string
|
|
55
|
+
readonly fileCount: number
|
|
56
|
+
readonly sectionCount: number
|
|
57
|
+
readonly estimatedTokens: number
|
|
58
|
+
readonly estimatedCost: number
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export interface EmbeddingEstimate {
|
|
62
|
+
readonly totalFiles: number
|
|
63
|
+
readonly totalSections: number
|
|
64
|
+
readonly totalTokens: number
|
|
65
|
+
readonly totalCost: number
|
|
66
|
+
readonly estimatedTimeSeconds: number
|
|
67
|
+
readonly byDirectory: readonly DirectoryEstimate[]
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export const estimateEmbeddingCost = (
|
|
71
|
+
rootPath: string,
|
|
72
|
+
options: { excludePatterns?: readonly string[] | undefined } = {},
|
|
73
|
+
): Effect.Effect<EmbeddingEstimate, Error> =>
|
|
74
|
+
Effect.gen(function* () {
|
|
75
|
+
const resolvedRoot = path.resolve(rootPath)
|
|
76
|
+
const storage = createStorage(resolvedRoot)
|
|
77
|
+
|
|
78
|
+
const docIndex = yield* loadDocumentIndex(storage)
|
|
79
|
+
const sectionIndex = yield* loadSectionIndex(storage)
|
|
80
|
+
|
|
81
|
+
if (!docIndex || !sectionIndex) {
|
|
82
|
+
return yield* Effect.fail(
|
|
83
|
+
new Error("Index not found. Run 'mdcontext index' first."),
|
|
84
|
+
)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Group by directory
|
|
88
|
+
const byDir: Map<
|
|
89
|
+
string,
|
|
90
|
+
{ files: Set<string>; sections: number; tokens: number }
|
|
91
|
+
> = new Map()
|
|
92
|
+
|
|
93
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
94
|
+
// Skip very short sections (< 10 tokens)
|
|
95
|
+
if (section.tokenCount < 10) continue
|
|
96
|
+
|
|
97
|
+
// Check exclude patterns
|
|
98
|
+
if (options.excludePatterns?.length) {
|
|
99
|
+
const excluded = options.excludePatterns.some((pattern) => {
|
|
100
|
+
const regex = new RegExp(
|
|
101
|
+
`^${pattern.replace(/\*/g, '.*').replace(/\?/g, '.')}$`,
|
|
102
|
+
)
|
|
103
|
+
return regex.test(section.documentPath)
|
|
104
|
+
})
|
|
105
|
+
if (excluded) continue
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const dir = path.dirname(section.documentPath) || '.'
|
|
109
|
+
if (!byDir.has(dir)) {
|
|
110
|
+
byDir.set(dir, { files: new Set(), sections: 0, tokens: 0 })
|
|
111
|
+
}
|
|
112
|
+
const entry = byDir.get(dir)!
|
|
113
|
+
entry.files.add(section.documentPath)
|
|
114
|
+
entry.sections++
|
|
115
|
+
entry.tokens += section.tokenCount
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const directoryEstimates: DirectoryEstimate[] = []
|
|
119
|
+
let totalFiles = 0
|
|
120
|
+
let totalSections = 0
|
|
121
|
+
let totalTokens = 0
|
|
122
|
+
|
|
123
|
+
for (const [dir, data] of byDir) {
|
|
124
|
+
directoryEstimates.push({
|
|
125
|
+
directory: dir,
|
|
126
|
+
fileCount: data.files.size,
|
|
127
|
+
sectionCount: data.sections,
|
|
128
|
+
estimatedTokens: data.tokens,
|
|
129
|
+
estimatedCost: (data.tokens / 1_000_000) * EMBEDDING_PRICE_PER_MILLION,
|
|
130
|
+
})
|
|
131
|
+
totalFiles += data.files.size
|
|
132
|
+
totalSections += data.sections
|
|
133
|
+
totalTokens += data.tokens
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Sort by directory name
|
|
137
|
+
directoryEstimates.sort((a, b) => a.directory.localeCompare(b.directory))
|
|
138
|
+
|
|
139
|
+
// Estimate time: ~1.5s per 100 sections (API batch processing)
|
|
140
|
+
const estimatedTimeSeconds = Math.ceil(totalSections / 100) * 1.5
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
totalFiles,
|
|
144
|
+
totalSections,
|
|
145
|
+
totalTokens,
|
|
146
|
+
totalCost: (totalTokens / 1_000_000) * EMBEDDING_PRICE_PER_MILLION,
|
|
147
|
+
estimatedTimeSeconds,
|
|
148
|
+
byDirectory: directoryEstimates,
|
|
149
|
+
}
|
|
150
|
+
})
|
|
151
|
+
|
|
152
|
+
// ============================================================================
|
|
153
|
+
// Build Embeddings
|
|
154
|
+
// ============================================================================
|
|
155
|
+
|
|
156
|
+
export interface FileProgress {
|
|
157
|
+
readonly fileIndex: number
|
|
158
|
+
readonly totalFiles: number
|
|
159
|
+
readonly filePath: string
|
|
160
|
+
readonly sectionCount: number
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
export interface BuildEmbeddingsOptions {
|
|
164
|
+
readonly force?: boolean | undefined
|
|
165
|
+
readonly provider?: EmbeddingProvider | undefined
|
|
166
|
+
readonly excludePatterns?: readonly string[] | undefined
|
|
167
|
+
readonly onFileProgress?: ((progress: FileProgress) => void) | undefined
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
export interface BuildEmbeddingsResult {
|
|
171
|
+
readonly sectionsEmbedded: number
|
|
172
|
+
readonly tokensUsed: number
|
|
173
|
+
readonly cost: number
|
|
174
|
+
readonly duration: number
|
|
175
|
+
readonly filesProcessed: number
|
|
176
|
+
readonly cacheHit?: boolean | undefined
|
|
177
|
+
readonly existingVectors?: number | undefined
|
|
178
|
+
readonly estimatedSavings?: number | undefined
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
export const buildEmbeddings = (
|
|
182
|
+
rootPath: string,
|
|
183
|
+
options: BuildEmbeddingsOptions = {},
|
|
184
|
+
): Effect.Effect<BuildEmbeddingsResult, Error> =>
|
|
185
|
+
Effect.gen(function* () {
|
|
186
|
+
const startTime = Date.now()
|
|
187
|
+
const resolvedRoot = path.resolve(rootPath)
|
|
188
|
+
const storage = createStorage(resolvedRoot)
|
|
189
|
+
|
|
190
|
+
// Load indexes
|
|
191
|
+
const docIndex = yield* loadDocumentIndex(storage)
|
|
192
|
+
const sectionIndex = yield* loadSectionIndex(storage)
|
|
193
|
+
|
|
194
|
+
if (!docIndex || !sectionIndex) {
|
|
195
|
+
return yield* Effect.fail(
|
|
196
|
+
new Error("Index not found. Run 'mdcontext index' first."),
|
|
197
|
+
)
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Get or create provider (wrap in Effect.try to catch MissingApiKeyError)
|
|
201
|
+
const provider =
|
|
202
|
+
options.provider ??
|
|
203
|
+
(yield* Effect.try({
|
|
204
|
+
try: () => createOpenAIProvider(),
|
|
205
|
+
catch: (e) => e as Error,
|
|
206
|
+
}))
|
|
207
|
+
const dimensions = provider.dimensions
|
|
208
|
+
|
|
209
|
+
// Create vector store
|
|
210
|
+
const vectorStore = createVectorStore(
|
|
211
|
+
resolvedRoot,
|
|
212
|
+
dimensions,
|
|
213
|
+
) as HnswVectorStore
|
|
214
|
+
vectorStore.setProvider(provider.name)
|
|
215
|
+
|
|
216
|
+
// Load existing if not forcing
|
|
217
|
+
if (!options.force) {
|
|
218
|
+
const loaded = yield* vectorStore.load()
|
|
219
|
+
if (loaded) {
|
|
220
|
+
const stats = vectorStore.getStats()
|
|
221
|
+
// Skip if any embeddings exist
|
|
222
|
+
if (stats.count > 0) {
|
|
223
|
+
const duration = Date.now() - startTime
|
|
224
|
+
// Estimate savings based on existing tokens
|
|
225
|
+
const estimatedSavings =
|
|
226
|
+
(stats.totalTokens / 1_000_000) * EMBEDDING_PRICE_PER_MILLION
|
|
227
|
+
return {
|
|
228
|
+
sectionsEmbedded: 0,
|
|
229
|
+
tokensUsed: 0,
|
|
230
|
+
cost: 0,
|
|
231
|
+
duration,
|
|
232
|
+
filesProcessed: 0,
|
|
233
|
+
cacheHit: true,
|
|
234
|
+
existingVectors: stats.count,
|
|
235
|
+
estimatedSavings,
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Helper to check if a path matches exclude patterns
|
|
242
|
+
const isExcluded = (docPath: string): boolean => {
|
|
243
|
+
if (!options.excludePatterns?.length) return false
|
|
244
|
+
return options.excludePatterns.some((pattern) => {
|
|
245
|
+
const regex = new RegExp(
|
|
246
|
+
`^${pattern.replace(/\*/g, '.*').replace(/\?/g, '.')}$`,
|
|
247
|
+
)
|
|
248
|
+
return regex.test(docPath)
|
|
249
|
+
})
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Group sections by document for efficient file reading
|
|
253
|
+
const sectionsByDoc: Map<
|
|
254
|
+
string,
|
|
255
|
+
{ section: SectionEntry; parentHeading: string | undefined }[]
|
|
256
|
+
> = new Map()
|
|
257
|
+
|
|
258
|
+
for (const section of Object.values(sectionIndex.sections)) {
|
|
259
|
+
const document = docIndex.documents[section.documentPath]
|
|
260
|
+
if (!document) continue
|
|
261
|
+
|
|
262
|
+
// Skip very short sections (< 10 tokens)
|
|
263
|
+
if (section.tokenCount < 10) continue
|
|
264
|
+
|
|
265
|
+
// Check exclude patterns
|
|
266
|
+
if (isExcluded(section.documentPath)) continue
|
|
267
|
+
|
|
268
|
+
// Find parent heading if any
|
|
269
|
+
let parentHeading: string | undefined
|
|
270
|
+
if (section.level > 1) {
|
|
271
|
+
const docSections = sectionIndex.byDocument[document.id] ?? []
|
|
272
|
+
for (const sibId of docSections) {
|
|
273
|
+
const sib = sectionIndex.sections[sibId]
|
|
274
|
+
if (
|
|
275
|
+
sib &&
|
|
276
|
+
sib.level === section.level - 1 &&
|
|
277
|
+
sib.startLine < section.startLine
|
|
278
|
+
) {
|
|
279
|
+
parentHeading = sib.heading
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
const docPath = section.documentPath
|
|
285
|
+
if (!sectionsByDoc.has(docPath)) {
|
|
286
|
+
sectionsByDoc.set(docPath, [])
|
|
287
|
+
}
|
|
288
|
+
sectionsByDoc.get(docPath)!.push({ section, parentHeading })
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
if (sectionsByDoc.size === 0) {
|
|
292
|
+
const duration = Date.now() - startTime
|
|
293
|
+
return {
|
|
294
|
+
sectionsEmbedded: 0,
|
|
295
|
+
tokensUsed: 0,
|
|
296
|
+
cost: 0,
|
|
297
|
+
duration,
|
|
298
|
+
filesProcessed: 0,
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Prepare sections for embedding by reading file content
|
|
303
|
+
const sectionsToEmbed: { section: SectionEntry; text: string }[] = []
|
|
304
|
+
const docPaths = Array.from(sectionsByDoc.keys())
|
|
305
|
+
let filesProcessed = 0
|
|
306
|
+
|
|
307
|
+
for (let fileIndex = 0; fileIndex < docPaths.length; fileIndex++) {
|
|
308
|
+
const docPath = docPaths[fileIndex]!
|
|
309
|
+
const sections = sectionsByDoc.get(docPath)!
|
|
310
|
+
const document = docIndex.documents[docPath]
|
|
311
|
+
if (!document) continue
|
|
312
|
+
|
|
313
|
+
// Report file progress
|
|
314
|
+
if (options.onFileProgress) {
|
|
315
|
+
options.onFileProgress({
|
|
316
|
+
fileIndex: fileIndex + 1,
|
|
317
|
+
totalFiles: docPaths.length,
|
|
318
|
+
filePath: docPath,
|
|
319
|
+
sectionCount: sections.length,
|
|
320
|
+
})
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
const filePath = path.join(resolvedRoot, docPath)
|
|
324
|
+
let fileContent: string
|
|
325
|
+
try {
|
|
326
|
+
fileContent = yield* Effect.promise(() =>
|
|
327
|
+
fs.readFile(filePath, 'utf-8'),
|
|
328
|
+
)
|
|
329
|
+
} catch {
|
|
330
|
+
// Skip files that can't be read
|
|
331
|
+
continue
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
filesProcessed++
|
|
335
|
+
const lines = fileContent.split('\n')
|
|
336
|
+
|
|
337
|
+
for (const { section, parentHeading } of sections) {
|
|
338
|
+
// Extract section content from file
|
|
339
|
+
const content = lines
|
|
340
|
+
.slice(section.startLine - 1, section.endLine)
|
|
341
|
+
.join('\n')
|
|
342
|
+
|
|
343
|
+
const text = generateEmbeddingText(
|
|
344
|
+
section,
|
|
345
|
+
content,
|
|
346
|
+
document.title,
|
|
347
|
+
parentHeading,
|
|
348
|
+
)
|
|
349
|
+
sectionsToEmbed.push({ section, text })
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
if (sectionsToEmbed.length === 0) {
|
|
354
|
+
const duration = Date.now() - startTime
|
|
355
|
+
return {
|
|
356
|
+
sectionsEmbedded: 0,
|
|
357
|
+
tokensUsed: 0,
|
|
358
|
+
cost: 0,
|
|
359
|
+
duration,
|
|
360
|
+
filesProcessed,
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// Generate embeddings
|
|
365
|
+
const texts = sectionsToEmbed.map((s) => s.text)
|
|
366
|
+
const result = yield* Effect.tryPromise({
|
|
367
|
+
try: () => provider.embed(texts),
|
|
368
|
+
catch: (e) => {
|
|
369
|
+
// Preserve InvalidApiKeyError so handleApiKeyError can catch it
|
|
370
|
+
if (e instanceof InvalidApiKeyError) return e
|
|
371
|
+
return new Error(
|
|
372
|
+
`Embedding failed: ${e instanceof Error ? e.message : String(e)}`,
|
|
373
|
+
)
|
|
374
|
+
},
|
|
375
|
+
})
|
|
376
|
+
|
|
377
|
+
// Create vector entries
|
|
378
|
+
const entries: VectorEntry[] = []
|
|
379
|
+
for (let i = 0; i < sectionsToEmbed.length; i++) {
|
|
380
|
+
const { section } = sectionsToEmbed[i] ?? { section: null }
|
|
381
|
+
const embedding = result.embeddings[i]
|
|
382
|
+
if (!section || !embedding) continue
|
|
383
|
+
|
|
384
|
+
entries.push({
|
|
385
|
+
id: section.id,
|
|
386
|
+
sectionId: section.id,
|
|
387
|
+
documentPath: section.documentPath,
|
|
388
|
+
heading: section.heading,
|
|
389
|
+
embedding,
|
|
390
|
+
})
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// Add to vector store
|
|
394
|
+
yield* vectorStore.add(entries)
|
|
395
|
+
vectorStore.addCost(result.cost, result.tokensUsed)
|
|
396
|
+
|
|
397
|
+
// Save
|
|
398
|
+
yield* vectorStore.save()
|
|
399
|
+
|
|
400
|
+
const duration = Date.now() - startTime
|
|
401
|
+
|
|
402
|
+
return {
|
|
403
|
+
sectionsEmbedded: entries.length,
|
|
404
|
+
tokensUsed: result.tokensUsed,
|
|
405
|
+
cost: result.cost,
|
|
406
|
+
duration,
|
|
407
|
+
filesProcessed,
|
|
408
|
+
}
|
|
409
|
+
})
|
|
410
|
+
|
|
411
|
+
// ============================================================================
|
|
412
|
+
// Semantic Search
|
|
413
|
+
// ============================================================================
|
|
414
|
+
|
|
415
|
+
export const semanticSearch = (
|
|
416
|
+
rootPath: string,
|
|
417
|
+
query: string,
|
|
418
|
+
options: SemanticSearchOptions = {},
|
|
419
|
+
): Effect.Effect<readonly SemanticSearchResult[], Error> =>
|
|
420
|
+
Effect.gen(function* () {
|
|
421
|
+
const resolvedRoot = path.resolve(rootPath)
|
|
422
|
+
|
|
423
|
+
// Get provider for query embedding (wrap in Effect.try to catch MissingApiKeyError)
|
|
424
|
+
const provider = yield* Effect.try({
|
|
425
|
+
try: () => createOpenAIProvider(),
|
|
426
|
+
catch: (e) => e as Error,
|
|
427
|
+
})
|
|
428
|
+
const dimensions = provider.dimensions
|
|
429
|
+
|
|
430
|
+
// Load vector store
|
|
431
|
+
const vectorStore = createVectorStore(resolvedRoot, dimensions)
|
|
432
|
+
const loaded = yield* vectorStore.load()
|
|
433
|
+
|
|
434
|
+
if (!loaded) {
|
|
435
|
+
return yield* Effect.fail(
|
|
436
|
+
new Error("Embeddings not found. Run 'mdcontext embed' first."),
|
|
437
|
+
)
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// Embed the query
|
|
441
|
+
const queryResult = yield* Effect.tryPromise({
|
|
442
|
+
try: () => provider.embed([query]),
|
|
443
|
+
catch: (e) =>
|
|
444
|
+
new Error(
|
|
445
|
+
`Query embedding failed: ${e instanceof Error ? e.message : String(e)}`,
|
|
446
|
+
),
|
|
447
|
+
})
|
|
448
|
+
|
|
449
|
+
const queryVector = queryResult.embeddings[0]
|
|
450
|
+
if (!queryVector) {
|
|
451
|
+
return yield* Effect.fail(new Error('Failed to generate query embedding'))
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// Search
|
|
455
|
+
const limit = options.limit ?? 10
|
|
456
|
+
const threshold = options.threshold ?? 0
|
|
457
|
+
|
|
458
|
+
const searchResults = yield* vectorStore.search(
|
|
459
|
+
queryVector,
|
|
460
|
+
limit * 2,
|
|
461
|
+
threshold,
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
// Apply path filter if specified
|
|
465
|
+
let filteredResults = searchResults
|
|
466
|
+
if (options.pathPattern) {
|
|
467
|
+
const pattern = options.pathPattern
|
|
468
|
+
.replace(/\./g, '\\.')
|
|
469
|
+
.replace(/\*/g, '.*')
|
|
470
|
+
const regex = new RegExp(`^${pattern}$`, 'i')
|
|
471
|
+
filteredResults = searchResults.filter((r) => regex.test(r.documentPath))
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
// Convert to SemanticSearchResult
|
|
475
|
+
const results: SemanticSearchResult[] = filteredResults
|
|
476
|
+
.slice(0, limit)
|
|
477
|
+
.map((r) => ({
|
|
478
|
+
sectionId: r.sectionId,
|
|
479
|
+
documentPath: r.documentPath,
|
|
480
|
+
heading: r.heading,
|
|
481
|
+
similarity: r.similarity,
|
|
482
|
+
}))
|
|
483
|
+
|
|
484
|
+
return results
|
|
485
|
+
})
|
|
486
|
+
|
|
487
|
+
// ============================================================================
|
|
488
|
+
// Search with Content
|
|
489
|
+
// ============================================================================
|
|
490
|
+
|
|
491
|
+
export const semanticSearchWithContent = (
|
|
492
|
+
rootPath: string,
|
|
493
|
+
query: string,
|
|
494
|
+
options: SemanticSearchOptions = {},
|
|
495
|
+
): Effect.Effect<readonly SemanticSearchResult[], Error> =>
|
|
496
|
+
Effect.gen(function* () {
|
|
497
|
+
const resolvedRoot = path.resolve(rootPath)
|
|
498
|
+
const results = yield* semanticSearch(resolvedRoot, query, options)
|
|
499
|
+
|
|
500
|
+
const storage = createStorage(resolvedRoot)
|
|
501
|
+
const sectionIndex = yield* loadSectionIndex(storage)
|
|
502
|
+
|
|
503
|
+
if (!sectionIndex) {
|
|
504
|
+
return results
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
const resultsWithContent: SemanticSearchResult[] = []
|
|
508
|
+
|
|
509
|
+
for (const result of results) {
|
|
510
|
+
const section = sectionIndex.sections[result.sectionId]
|
|
511
|
+
if (!section) {
|
|
512
|
+
resultsWithContent.push(result)
|
|
513
|
+
continue
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
const filePath = path.join(resolvedRoot, result.documentPath)
|
|
517
|
+
|
|
518
|
+
try {
|
|
519
|
+
const fileContent = yield* Effect.promise(() =>
|
|
520
|
+
fs.readFile(filePath, 'utf-8'),
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
const lines = fileContent.split('\n')
|
|
524
|
+
const content = lines
|
|
525
|
+
.slice(section.startLine - 1, section.endLine)
|
|
526
|
+
.join('\n')
|
|
527
|
+
|
|
528
|
+
resultsWithContent.push({
|
|
529
|
+
...result,
|
|
530
|
+
content,
|
|
531
|
+
})
|
|
532
|
+
} catch {
|
|
533
|
+
resultsWithContent.push(result)
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
return resultsWithContent
|
|
538
|
+
})
|
|
539
|
+
|
|
540
|
+
// ============================================================================
|
|
541
|
+
// Get Embedding Stats
|
|
542
|
+
// ============================================================================
|
|
543
|
+
|
|
544
|
+
export interface EmbeddingStats {
|
|
545
|
+
readonly hasEmbeddings: boolean
|
|
546
|
+
readonly count: number
|
|
547
|
+
readonly provider: string
|
|
548
|
+
readonly dimensions: number
|
|
549
|
+
readonly totalCost: number
|
|
550
|
+
readonly totalTokens: number
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
export const getEmbeddingStats = (
|
|
554
|
+
rootPath: string,
|
|
555
|
+
): Effect.Effect<EmbeddingStats, Error> =>
|
|
556
|
+
Effect.gen(function* () {
|
|
557
|
+
const resolvedRoot = path.resolve(rootPath)
|
|
558
|
+
|
|
559
|
+
// Try to load with default dimensions
|
|
560
|
+
const vectorStore = createVectorStore(resolvedRoot, 1536)
|
|
561
|
+
const loaded = yield* vectorStore.load()
|
|
562
|
+
|
|
563
|
+
if (!loaded) {
|
|
564
|
+
return {
|
|
565
|
+
hasEmbeddings: false,
|
|
566
|
+
count: 0,
|
|
567
|
+
provider: 'none',
|
|
568
|
+
dimensions: 0,
|
|
569
|
+
totalCost: 0,
|
|
570
|
+
totalTokens: 0,
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
const stats = vectorStore.getStats()
|
|
575
|
+
return {
|
|
576
|
+
hasEmbeddings: true,
|
|
577
|
+
count: stats.count,
|
|
578
|
+
provider: stats.provider,
|
|
579
|
+
dimensions: stats.dimensions,
|
|
580
|
+
totalCost: stats.totalCost,
|
|
581
|
+
totalTokens: stats.totalTokens,
|
|
582
|
+
}
|
|
583
|
+
})
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding types for mdcontext
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
// ============================================================================
|
|
6
|
+
// Embedding Provider
|
|
7
|
+
// ============================================================================
|
|
8
|
+
|
|
9
|
+
export interface EmbeddingProvider {
|
|
10
|
+
readonly name: string
|
|
11
|
+
readonly dimensions: number
|
|
12
|
+
embed(texts: string[]): Promise<EmbeddingResult>
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface EmbeddingResult {
|
|
16
|
+
readonly embeddings: readonly number[][]
|
|
17
|
+
readonly tokensUsed: number
|
|
18
|
+
readonly cost: number
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// ============================================================================
|
|
22
|
+
// Vector Index
|
|
23
|
+
// ============================================================================
|
|
24
|
+
|
|
25
|
+
export interface VectorEntry {
|
|
26
|
+
readonly id: string
|
|
27
|
+
readonly sectionId: string
|
|
28
|
+
readonly documentPath: string
|
|
29
|
+
readonly heading: string
|
|
30
|
+
readonly embedding: readonly number[]
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface VectorIndex {
|
|
34
|
+
readonly version: number
|
|
35
|
+
readonly provider: string
|
|
36
|
+
readonly dimensions: number
|
|
37
|
+
readonly entries: Record<string, VectorEntry>
|
|
38
|
+
readonly totalCost: number
|
|
39
|
+
readonly totalTokens: number
|
|
40
|
+
readonly createdAt: string
|
|
41
|
+
readonly updatedAt: string
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// ============================================================================
|
|
45
|
+
// Semantic Search
|
|
46
|
+
// ============================================================================
|
|
47
|
+
|
|
48
|
+
export interface SemanticSearchOptions {
|
|
49
|
+
/** Maximum number of results */
|
|
50
|
+
readonly limit?: number | undefined
|
|
51
|
+
/** Minimum similarity threshold (0-1) */
|
|
52
|
+
readonly threshold?: number | undefined
|
|
53
|
+
/** Filter by document path pattern */
|
|
54
|
+
readonly pathPattern?: string | undefined
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface SemanticSearchResult {
|
|
58
|
+
readonly sectionId: string
|
|
59
|
+
readonly documentPath: string
|
|
60
|
+
readonly heading: string
|
|
61
|
+
readonly similarity: number
|
|
62
|
+
readonly content?: string | undefined
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// ============================================================================
|
|
66
|
+
// Errors
|
|
67
|
+
// ============================================================================
|
|
68
|
+
|
|
69
|
+
export interface EmbedError {
|
|
70
|
+
readonly _tag: 'EmbedError'
|
|
71
|
+
readonly cause: 'RateLimit' | 'ApiKey' | 'Network' | 'Unknown'
|
|
72
|
+
readonly message: string
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export const embedError = (
|
|
76
|
+
cause: EmbedError['cause'],
|
|
77
|
+
message: string,
|
|
78
|
+
): EmbedError => ({
|
|
79
|
+
_tag: 'EmbedError',
|
|
80
|
+
cause,
|
|
81
|
+
message,
|
|
82
|
+
})
|