mdcontext 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/.changeset/README.md +28 -0
  2. package/.changeset/config.json +11 -0
  3. package/.github/workflows/ci.yml +83 -0
  4. package/.github/workflows/release.yml +113 -0
  5. package/.tldrignore +112 -0
  6. package/AGENTS.md +46 -0
  7. package/BACKLOG.md +338 -0
  8. package/README.md +231 -11
  9. package/biome.json +36 -0
  10. package/cspell.config.yaml +14 -0
  11. package/dist/chunk-KRYIFLQR.js +92 -0
  12. package/dist/chunk-S7E6TFX6.js +742 -0
  13. package/dist/chunk-VVTGZNBT.js +1519 -0
  14. package/dist/cli/main.d.ts +1 -0
  15. package/dist/cli/main.js +2015 -0
  16. package/dist/index.d.ts +266 -0
  17. package/dist/index.js +86 -0
  18. package/dist/mcp/server.d.ts +1 -0
  19. package/dist/mcp/server.js +376 -0
  20. package/docs/019-USAGE.md +586 -0
  21. package/docs/020-current-implementation.md +364 -0
  22. package/docs/021-DOGFOODING-FINDINGS.md +175 -0
  23. package/docs/BACKLOG.md +80 -0
  24. package/docs/DESIGN.md +439 -0
  25. package/docs/PROJECT.md +88 -0
  26. package/docs/ROADMAP.md +407 -0
  27. package/docs/test-links.md +9 -0
  28. package/package.json +69 -10
  29. package/pnpm-workspace.yaml +5 -0
  30. package/research/config-analysis/01-current-implementation.md +470 -0
  31. package/research/config-analysis/02-strategy-recommendation.md +428 -0
  32. package/research/config-analysis/03-task-candidates.md +715 -0
  33. package/research/config-analysis/033-research-configuration-management.md +828 -0
  34. package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
  35. package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
  36. package/research/dogfood/consolidated-tool-evaluation.md +373 -0
  37. package/research/dogfood/strategy-a/a-synthesis.md +184 -0
  38. package/research/dogfood/strategy-a/a1-docs.md +226 -0
  39. package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
  40. package/research/dogfood/strategy-a/a3-llm.md +164 -0
  41. package/research/dogfood/strategy-b/b-synthesis.md +228 -0
  42. package/research/dogfood/strategy-b/b1-architecture.md +207 -0
  43. package/research/dogfood/strategy-b/b2-gaps.md +258 -0
  44. package/research/dogfood/strategy-b/b3-workflows.md +250 -0
  45. package/research/dogfood/strategy-c/c-synthesis.md +451 -0
  46. package/research/dogfood/strategy-c/c1-explorer.md +192 -0
  47. package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
  48. package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
  49. package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
  50. package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
  51. package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
  52. package/research/effect-cli-error-handling.md +845 -0
  53. package/research/effect-errors-as-values.md +943 -0
  54. package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
  55. package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
  56. package/research/errors-task-analysis/embeddings-analysis.md +709 -0
  57. package/research/errors-task-analysis/index-search-analysis.md +812 -0
  58. package/research/mdcontext-error-analysis.md +521 -0
  59. package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
  60. package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
  61. package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
  62. package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
  63. package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
  64. package/research/semantic-search/002-research-embedding-models.md +490 -0
  65. package/research/semantic-search/003-research-rag-alternatives.md +523 -0
  66. package/research/semantic-search/004-research-vector-search.md +841 -0
  67. package/research/semantic-search/032-research-semantic-search.md +427 -0
  68. package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
  69. package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
  70. package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
  71. package/research/task-management-2026/03-lightweight-file-based.md +567 -0
  72. package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
  73. package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
  74. package/research/task-management-2026/linear/02-api-integrations.md +930 -0
  75. package/research/task-management-2026/linear/03-ai-features.md +368 -0
  76. package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
  77. package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
  78. package/scripts/rebuild-hnswlib.js +63 -0
  79. package/src/cli/argv-preprocessor.test.ts +210 -0
  80. package/src/cli/argv-preprocessor.ts +202 -0
  81. package/src/cli/cli.test.ts +430 -0
  82. package/src/cli/commands/backlinks.ts +54 -0
  83. package/src/cli/commands/context.ts +197 -0
  84. package/src/cli/commands/index-cmd.ts +300 -0
  85. package/src/cli/commands/index.ts +13 -0
  86. package/src/cli/commands/links.ts +52 -0
  87. package/src/cli/commands/search.ts +451 -0
  88. package/src/cli/commands/stats.ts +146 -0
  89. package/src/cli/commands/tree.ts +107 -0
  90. package/src/cli/flag-schemas.ts +275 -0
  91. package/src/cli/help.ts +386 -0
  92. package/src/cli/index.ts +9 -0
  93. package/src/cli/main.ts +145 -0
  94. package/src/cli/options.ts +31 -0
  95. package/src/cli/typo-suggester.test.ts +105 -0
  96. package/src/cli/typo-suggester.ts +130 -0
  97. package/src/cli/utils.ts +126 -0
  98. package/src/core/index.ts +1 -0
  99. package/src/core/types.ts +140 -0
  100. package/src/embeddings/index.ts +8 -0
  101. package/src/embeddings/openai-provider.ts +165 -0
  102. package/src/embeddings/semantic-search.ts +583 -0
  103. package/src/embeddings/types.ts +82 -0
  104. package/src/embeddings/vector-store.ts +299 -0
  105. package/src/index/index.ts +4 -0
  106. package/src/index/indexer.ts +446 -0
  107. package/src/index/storage.ts +196 -0
  108. package/src/index/types.ts +109 -0
  109. package/src/index/watcher.ts +131 -0
  110. package/src/index.ts +8 -0
  111. package/src/mcp/server.ts +483 -0
  112. package/src/parser/index.ts +1 -0
  113. package/src/parser/parser.test.ts +291 -0
  114. package/src/parser/parser.ts +395 -0
  115. package/src/parser/section-filter.ts +270 -0
  116. package/src/search/query-parser.test.ts +260 -0
  117. package/src/search/query-parser.ts +319 -0
  118. package/src/search/searcher.test.ts +182 -0
  119. package/src/search/searcher.ts +602 -0
  120. package/src/summarize/budget-bugs.test.ts +620 -0
  121. package/src/summarize/formatters.ts +419 -0
  122. package/src/summarize/index.ts +20 -0
  123. package/src/summarize/summarizer.test.ts +275 -0
  124. package/src/summarize/summarizer.ts +528 -0
  125. package/src/summarize/verify-bugs.test.ts +238 -0
  126. package/src/utils/index.ts +1 -0
  127. package/src/utils/tokens.test.ts +142 -0
  128. package/src/utils/tokens.ts +186 -0
  129. package/tests/fixtures/cli/.mdcontext/config.json +8 -0
  130. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
  131. package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
  132. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +233 -0
  133. package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
  134. package/tests/fixtures/cli/.mdcontext/vectors.meta.json +1264 -0
  135. package/tests/fixtures/cli/README.md +9 -0
  136. package/tests/fixtures/cli/api-reference.md +11 -0
  137. package/tests/fixtures/cli/getting-started.md +11 -0
  138. package/tsconfig.json +26 -0
  139. package/vitest.config.ts +21 -0
  140. package/vitest.setup.ts +12 -0
@@ -0,0 +1,583 @@
1
+ /**
2
+ * Semantic search functionality
3
+ */
4
+
5
+ import * as fs from 'node:fs/promises'
6
+ import * as path from 'node:path'
7
+ import { Effect } from 'effect'
8
+ import {
9
+ createStorage,
10
+ loadDocumentIndex,
11
+ loadSectionIndex,
12
+ } from '../index/storage.js'
13
+ import type { SectionEntry } from '../index/types.js'
14
+ import { createOpenAIProvider, InvalidApiKeyError } from './openai-provider.js'
15
+ import type {
16
+ EmbeddingProvider,
17
+ SemanticSearchOptions,
18
+ SemanticSearchResult,
19
+ VectorEntry,
20
+ } from './types.js'
21
+ import { createVectorStore, type HnswVectorStore } from './vector-store.js'
22
+
23
+ // ============================================================================
24
+ // Embedding Text Generation
25
+ // ============================================================================
26
+
27
+ const generateEmbeddingText = (
28
+ section: SectionEntry,
29
+ content: string,
30
+ documentTitle: string,
31
+ parentHeading?: string | undefined,
32
+ ): string => {
33
+ const parts: string[] = []
34
+
35
+ parts.push(`# ${section.heading}`)
36
+ if (parentHeading) {
37
+ parts.push(`Parent section: ${parentHeading}`)
38
+ }
39
+ parts.push(`Document: ${documentTitle}`)
40
+ parts.push('')
41
+ parts.push(content)
42
+
43
+ return parts.join('\n')
44
+ }
45
+
46
+ // ============================================================================
47
+ // Cost Estimation
48
+ // ============================================================================
49
+
50
+ // Price per 1M tokens for text-embedding-3-small
51
+ const EMBEDDING_PRICE_PER_MILLION = 0.02
52
+
53
+ export interface DirectoryEstimate {
54
+ readonly directory: string
55
+ readonly fileCount: number
56
+ readonly sectionCount: number
57
+ readonly estimatedTokens: number
58
+ readonly estimatedCost: number
59
+ }
60
+
61
+ export interface EmbeddingEstimate {
62
+ readonly totalFiles: number
63
+ readonly totalSections: number
64
+ readonly totalTokens: number
65
+ readonly totalCost: number
66
+ readonly estimatedTimeSeconds: number
67
+ readonly byDirectory: readonly DirectoryEstimate[]
68
+ }
69
+
70
+ export const estimateEmbeddingCost = (
71
+ rootPath: string,
72
+ options: { excludePatterns?: readonly string[] | undefined } = {},
73
+ ): Effect.Effect<EmbeddingEstimate, Error> =>
74
+ Effect.gen(function* () {
75
+ const resolvedRoot = path.resolve(rootPath)
76
+ const storage = createStorage(resolvedRoot)
77
+
78
+ const docIndex = yield* loadDocumentIndex(storage)
79
+ const sectionIndex = yield* loadSectionIndex(storage)
80
+
81
+ if (!docIndex || !sectionIndex) {
82
+ return yield* Effect.fail(
83
+ new Error("Index not found. Run 'mdcontext index' first."),
84
+ )
85
+ }
86
+
87
+ // Group by directory
88
+ const byDir: Map<
89
+ string,
90
+ { files: Set<string>; sections: number; tokens: number }
91
+ > = new Map()
92
+
93
+ for (const section of Object.values(sectionIndex.sections)) {
94
+ // Skip very short sections (< 10 tokens)
95
+ if (section.tokenCount < 10) continue
96
+
97
+ // Check exclude patterns
98
+ if (options.excludePatterns?.length) {
99
+ const excluded = options.excludePatterns.some((pattern) => {
100
+ const regex = new RegExp(
101
+ `^${pattern.replace(/\*/g, '.*').replace(/\?/g, '.')}$`,
102
+ )
103
+ return regex.test(section.documentPath)
104
+ })
105
+ if (excluded) continue
106
+ }
107
+
108
+ const dir = path.dirname(section.documentPath) || '.'
109
+ if (!byDir.has(dir)) {
110
+ byDir.set(dir, { files: new Set(), sections: 0, tokens: 0 })
111
+ }
112
+ const entry = byDir.get(dir)!
113
+ entry.files.add(section.documentPath)
114
+ entry.sections++
115
+ entry.tokens += section.tokenCount
116
+ }
117
+
118
+ const directoryEstimates: DirectoryEstimate[] = []
119
+ let totalFiles = 0
120
+ let totalSections = 0
121
+ let totalTokens = 0
122
+
123
+ for (const [dir, data] of byDir) {
124
+ directoryEstimates.push({
125
+ directory: dir,
126
+ fileCount: data.files.size,
127
+ sectionCount: data.sections,
128
+ estimatedTokens: data.tokens,
129
+ estimatedCost: (data.tokens / 1_000_000) * EMBEDDING_PRICE_PER_MILLION,
130
+ })
131
+ totalFiles += data.files.size
132
+ totalSections += data.sections
133
+ totalTokens += data.tokens
134
+ }
135
+
136
+ // Sort by directory name
137
+ directoryEstimates.sort((a, b) => a.directory.localeCompare(b.directory))
138
+
139
+ // Estimate time: ~1.5s per 100 sections (API batch processing)
140
+ const estimatedTimeSeconds = Math.ceil(totalSections / 100) * 1.5
141
+
142
+ return {
143
+ totalFiles,
144
+ totalSections,
145
+ totalTokens,
146
+ totalCost: (totalTokens / 1_000_000) * EMBEDDING_PRICE_PER_MILLION,
147
+ estimatedTimeSeconds,
148
+ byDirectory: directoryEstimates,
149
+ }
150
+ })
151
+
152
+ // ============================================================================
153
+ // Build Embeddings
154
+ // ============================================================================
155
+
156
+ export interface FileProgress {
157
+ readonly fileIndex: number
158
+ readonly totalFiles: number
159
+ readonly filePath: string
160
+ readonly sectionCount: number
161
+ }
162
+
163
+ export interface BuildEmbeddingsOptions {
164
+ readonly force?: boolean | undefined
165
+ readonly provider?: EmbeddingProvider | undefined
166
+ readonly excludePatterns?: readonly string[] | undefined
167
+ readonly onFileProgress?: ((progress: FileProgress) => void) | undefined
168
+ }
169
+
170
+ export interface BuildEmbeddingsResult {
171
+ readonly sectionsEmbedded: number
172
+ readonly tokensUsed: number
173
+ readonly cost: number
174
+ readonly duration: number
175
+ readonly filesProcessed: number
176
+ readonly cacheHit?: boolean | undefined
177
+ readonly existingVectors?: number | undefined
178
+ readonly estimatedSavings?: number | undefined
179
+ }
180
+
181
+ export const buildEmbeddings = (
182
+ rootPath: string,
183
+ options: BuildEmbeddingsOptions = {},
184
+ ): Effect.Effect<BuildEmbeddingsResult, Error> =>
185
+ Effect.gen(function* () {
186
+ const startTime = Date.now()
187
+ const resolvedRoot = path.resolve(rootPath)
188
+ const storage = createStorage(resolvedRoot)
189
+
190
+ // Load indexes
191
+ const docIndex = yield* loadDocumentIndex(storage)
192
+ const sectionIndex = yield* loadSectionIndex(storage)
193
+
194
+ if (!docIndex || !sectionIndex) {
195
+ return yield* Effect.fail(
196
+ new Error("Index not found. Run 'mdcontext index' first."),
197
+ )
198
+ }
199
+
200
+ // Get or create provider (wrap in Effect.try to catch MissingApiKeyError)
201
+ const provider =
202
+ options.provider ??
203
+ (yield* Effect.try({
204
+ try: () => createOpenAIProvider(),
205
+ catch: (e) => e as Error,
206
+ }))
207
+ const dimensions = provider.dimensions
208
+
209
+ // Create vector store
210
+ const vectorStore = createVectorStore(
211
+ resolvedRoot,
212
+ dimensions,
213
+ ) as HnswVectorStore
214
+ vectorStore.setProvider(provider.name)
215
+
216
+ // Load existing if not forcing
217
+ if (!options.force) {
218
+ const loaded = yield* vectorStore.load()
219
+ if (loaded) {
220
+ const stats = vectorStore.getStats()
221
+ // Skip if any embeddings exist
222
+ if (stats.count > 0) {
223
+ const duration = Date.now() - startTime
224
+ // Estimate savings based on existing tokens
225
+ const estimatedSavings =
226
+ (stats.totalTokens / 1_000_000) * EMBEDDING_PRICE_PER_MILLION
227
+ return {
228
+ sectionsEmbedded: 0,
229
+ tokensUsed: 0,
230
+ cost: 0,
231
+ duration,
232
+ filesProcessed: 0,
233
+ cacheHit: true,
234
+ existingVectors: stats.count,
235
+ estimatedSavings,
236
+ }
237
+ }
238
+ }
239
+ }
240
+
241
+ // Helper to check if a path matches exclude patterns
242
+ const isExcluded = (docPath: string): boolean => {
243
+ if (!options.excludePatterns?.length) return false
244
+ return options.excludePatterns.some((pattern) => {
245
+ const regex = new RegExp(
246
+ `^${pattern.replace(/\*/g, '.*').replace(/\?/g, '.')}$`,
247
+ )
248
+ return regex.test(docPath)
249
+ })
250
+ }
251
+
252
+ // Group sections by document for efficient file reading
253
+ const sectionsByDoc: Map<
254
+ string,
255
+ { section: SectionEntry; parentHeading: string | undefined }[]
256
+ > = new Map()
257
+
258
+ for (const section of Object.values(sectionIndex.sections)) {
259
+ const document = docIndex.documents[section.documentPath]
260
+ if (!document) continue
261
+
262
+ // Skip very short sections (< 10 tokens)
263
+ if (section.tokenCount < 10) continue
264
+
265
+ // Check exclude patterns
266
+ if (isExcluded(section.documentPath)) continue
267
+
268
+ // Find parent heading if any
269
+ let parentHeading: string | undefined
270
+ if (section.level > 1) {
271
+ const docSections = sectionIndex.byDocument[document.id] ?? []
272
+ for (const sibId of docSections) {
273
+ const sib = sectionIndex.sections[sibId]
274
+ if (
275
+ sib &&
276
+ sib.level === section.level - 1 &&
277
+ sib.startLine < section.startLine
278
+ ) {
279
+ parentHeading = sib.heading
280
+ }
281
+ }
282
+ }
283
+
284
+ const docPath = section.documentPath
285
+ if (!sectionsByDoc.has(docPath)) {
286
+ sectionsByDoc.set(docPath, [])
287
+ }
288
+ sectionsByDoc.get(docPath)!.push({ section, parentHeading })
289
+ }
290
+
291
+ if (sectionsByDoc.size === 0) {
292
+ const duration = Date.now() - startTime
293
+ return {
294
+ sectionsEmbedded: 0,
295
+ tokensUsed: 0,
296
+ cost: 0,
297
+ duration,
298
+ filesProcessed: 0,
299
+ }
300
+ }
301
+
302
+ // Prepare sections for embedding by reading file content
303
+ const sectionsToEmbed: { section: SectionEntry; text: string }[] = []
304
+ const docPaths = Array.from(sectionsByDoc.keys())
305
+ let filesProcessed = 0
306
+
307
+ for (let fileIndex = 0; fileIndex < docPaths.length; fileIndex++) {
308
+ const docPath = docPaths[fileIndex]!
309
+ const sections = sectionsByDoc.get(docPath)!
310
+ const document = docIndex.documents[docPath]
311
+ if (!document) continue
312
+
313
+ // Report file progress
314
+ if (options.onFileProgress) {
315
+ options.onFileProgress({
316
+ fileIndex: fileIndex + 1,
317
+ totalFiles: docPaths.length,
318
+ filePath: docPath,
319
+ sectionCount: sections.length,
320
+ })
321
+ }
322
+
323
+ const filePath = path.join(resolvedRoot, docPath)
324
+ let fileContent: string
325
+ try {
326
+ fileContent = yield* Effect.promise(() =>
327
+ fs.readFile(filePath, 'utf-8'),
328
+ )
329
+ } catch {
330
+ // Skip files that can't be read
331
+ continue
332
+ }
333
+
334
+ filesProcessed++
335
+ const lines = fileContent.split('\n')
336
+
337
+ for (const { section, parentHeading } of sections) {
338
+ // Extract section content from file
339
+ const content = lines
340
+ .slice(section.startLine - 1, section.endLine)
341
+ .join('\n')
342
+
343
+ const text = generateEmbeddingText(
344
+ section,
345
+ content,
346
+ document.title,
347
+ parentHeading,
348
+ )
349
+ sectionsToEmbed.push({ section, text })
350
+ }
351
+ }
352
+
353
+ if (sectionsToEmbed.length === 0) {
354
+ const duration = Date.now() - startTime
355
+ return {
356
+ sectionsEmbedded: 0,
357
+ tokensUsed: 0,
358
+ cost: 0,
359
+ duration,
360
+ filesProcessed,
361
+ }
362
+ }
363
+
364
+ // Generate embeddings
365
+ const texts = sectionsToEmbed.map((s) => s.text)
366
+ const result = yield* Effect.tryPromise({
367
+ try: () => provider.embed(texts),
368
+ catch: (e) => {
369
+ // Preserve InvalidApiKeyError so handleApiKeyError can catch it
370
+ if (e instanceof InvalidApiKeyError) return e
371
+ return new Error(
372
+ `Embedding failed: ${e instanceof Error ? e.message : String(e)}`,
373
+ )
374
+ },
375
+ })
376
+
377
+ // Create vector entries
378
+ const entries: VectorEntry[] = []
379
+ for (let i = 0; i < sectionsToEmbed.length; i++) {
380
+ const { section } = sectionsToEmbed[i] ?? { section: null }
381
+ const embedding = result.embeddings[i]
382
+ if (!section || !embedding) continue
383
+
384
+ entries.push({
385
+ id: section.id,
386
+ sectionId: section.id,
387
+ documentPath: section.documentPath,
388
+ heading: section.heading,
389
+ embedding,
390
+ })
391
+ }
392
+
393
+ // Add to vector store
394
+ yield* vectorStore.add(entries)
395
+ vectorStore.addCost(result.cost, result.tokensUsed)
396
+
397
+ // Save
398
+ yield* vectorStore.save()
399
+
400
+ const duration = Date.now() - startTime
401
+
402
+ return {
403
+ sectionsEmbedded: entries.length,
404
+ tokensUsed: result.tokensUsed,
405
+ cost: result.cost,
406
+ duration,
407
+ filesProcessed,
408
+ }
409
+ })
410
+
411
+ // ============================================================================
412
+ // Semantic Search
413
+ // ============================================================================
414
+
415
+ export const semanticSearch = (
416
+ rootPath: string,
417
+ query: string,
418
+ options: SemanticSearchOptions = {},
419
+ ): Effect.Effect<readonly SemanticSearchResult[], Error> =>
420
+ Effect.gen(function* () {
421
+ const resolvedRoot = path.resolve(rootPath)
422
+
423
+ // Get provider for query embedding (wrap in Effect.try to catch MissingApiKeyError)
424
+ const provider = yield* Effect.try({
425
+ try: () => createOpenAIProvider(),
426
+ catch: (e) => e as Error,
427
+ })
428
+ const dimensions = provider.dimensions
429
+
430
+ // Load vector store
431
+ const vectorStore = createVectorStore(resolvedRoot, dimensions)
432
+ const loaded = yield* vectorStore.load()
433
+
434
+ if (!loaded) {
435
+ return yield* Effect.fail(
436
+ new Error("Embeddings not found. Run 'mdcontext embed' first."),
437
+ )
438
+ }
439
+
440
+ // Embed the query
441
+ const queryResult = yield* Effect.tryPromise({
442
+ try: () => provider.embed([query]),
443
+ catch: (e) =>
444
+ new Error(
445
+ `Query embedding failed: ${e instanceof Error ? e.message : String(e)}`,
446
+ ),
447
+ })
448
+
449
+ const queryVector = queryResult.embeddings[0]
450
+ if (!queryVector) {
451
+ return yield* Effect.fail(new Error('Failed to generate query embedding'))
452
+ }
453
+
454
+ // Search
455
+ const limit = options.limit ?? 10
456
+ const threshold = options.threshold ?? 0
457
+
458
+ const searchResults = yield* vectorStore.search(
459
+ queryVector,
460
+ limit * 2,
461
+ threshold,
462
+ )
463
+
464
+ // Apply path filter if specified
465
+ let filteredResults = searchResults
466
+ if (options.pathPattern) {
467
+ const pattern = options.pathPattern
468
+ .replace(/\./g, '\\.')
469
+ .replace(/\*/g, '.*')
470
+ const regex = new RegExp(`^${pattern}$`, 'i')
471
+ filteredResults = searchResults.filter((r) => regex.test(r.documentPath))
472
+ }
473
+
474
+ // Convert to SemanticSearchResult
475
+ const results: SemanticSearchResult[] = filteredResults
476
+ .slice(0, limit)
477
+ .map((r) => ({
478
+ sectionId: r.sectionId,
479
+ documentPath: r.documentPath,
480
+ heading: r.heading,
481
+ similarity: r.similarity,
482
+ }))
483
+
484
+ return results
485
+ })
486
+
487
+ // ============================================================================
488
+ // Search with Content
489
+ // ============================================================================
490
+
491
+ export const semanticSearchWithContent = (
492
+ rootPath: string,
493
+ query: string,
494
+ options: SemanticSearchOptions = {},
495
+ ): Effect.Effect<readonly SemanticSearchResult[], Error> =>
496
+ Effect.gen(function* () {
497
+ const resolvedRoot = path.resolve(rootPath)
498
+ const results = yield* semanticSearch(resolvedRoot, query, options)
499
+
500
+ const storage = createStorage(resolvedRoot)
501
+ const sectionIndex = yield* loadSectionIndex(storage)
502
+
503
+ if (!sectionIndex) {
504
+ return results
505
+ }
506
+
507
+ const resultsWithContent: SemanticSearchResult[] = []
508
+
509
+ for (const result of results) {
510
+ const section = sectionIndex.sections[result.sectionId]
511
+ if (!section) {
512
+ resultsWithContent.push(result)
513
+ continue
514
+ }
515
+
516
+ const filePath = path.join(resolvedRoot, result.documentPath)
517
+
518
+ try {
519
+ const fileContent = yield* Effect.promise(() =>
520
+ fs.readFile(filePath, 'utf-8'),
521
+ )
522
+
523
+ const lines = fileContent.split('\n')
524
+ const content = lines
525
+ .slice(section.startLine - 1, section.endLine)
526
+ .join('\n')
527
+
528
+ resultsWithContent.push({
529
+ ...result,
530
+ content,
531
+ })
532
+ } catch {
533
+ resultsWithContent.push(result)
534
+ }
535
+ }
536
+
537
+ return resultsWithContent
538
+ })
539
+
540
+ // ============================================================================
541
+ // Get Embedding Stats
542
+ // ============================================================================
543
+
544
+ export interface EmbeddingStats {
545
+ readonly hasEmbeddings: boolean
546
+ readonly count: number
547
+ readonly provider: string
548
+ readonly dimensions: number
549
+ readonly totalCost: number
550
+ readonly totalTokens: number
551
+ }
552
+
553
+ export const getEmbeddingStats = (
554
+ rootPath: string,
555
+ ): Effect.Effect<EmbeddingStats, Error> =>
556
+ Effect.gen(function* () {
557
+ const resolvedRoot = path.resolve(rootPath)
558
+
559
+ // Try to load with default dimensions
560
+ const vectorStore = createVectorStore(resolvedRoot, 1536)
561
+ const loaded = yield* vectorStore.load()
562
+
563
+ if (!loaded) {
564
+ return {
565
+ hasEmbeddings: false,
566
+ count: 0,
567
+ provider: 'none',
568
+ dimensions: 0,
569
+ totalCost: 0,
570
+ totalTokens: 0,
571
+ }
572
+ }
573
+
574
+ const stats = vectorStore.getStats()
575
+ return {
576
+ hasEmbeddings: true,
577
+ count: stats.count,
578
+ provider: stats.provider,
579
+ dimensions: stats.dimensions,
580
+ totalCost: stats.totalCost,
581
+ totalTokens: stats.totalTokens,
582
+ }
583
+ })
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Embedding types for mdcontext
3
+ */
4
+
5
+ // ============================================================================
6
+ // Embedding Provider
7
+ // ============================================================================
8
+
9
+ export interface EmbeddingProvider {
10
+ readonly name: string
11
+ readonly dimensions: number
12
+ embed(texts: string[]): Promise<EmbeddingResult>
13
+ }
14
+
15
+ export interface EmbeddingResult {
16
+ readonly embeddings: readonly number[][]
17
+ readonly tokensUsed: number
18
+ readonly cost: number
19
+ }
20
+
21
+ // ============================================================================
22
+ // Vector Index
23
+ // ============================================================================
24
+
25
+ export interface VectorEntry {
26
+ readonly id: string
27
+ readonly sectionId: string
28
+ readonly documentPath: string
29
+ readonly heading: string
30
+ readonly embedding: readonly number[]
31
+ }
32
+
33
+ export interface VectorIndex {
34
+ readonly version: number
35
+ readonly provider: string
36
+ readonly dimensions: number
37
+ readonly entries: Record<string, VectorEntry>
38
+ readonly totalCost: number
39
+ readonly totalTokens: number
40
+ readonly createdAt: string
41
+ readonly updatedAt: string
42
+ }
43
+
44
+ // ============================================================================
45
+ // Semantic Search
46
+ // ============================================================================
47
+
48
+ export interface SemanticSearchOptions {
49
+ /** Maximum number of results */
50
+ readonly limit?: number | undefined
51
+ /** Minimum similarity threshold (0-1) */
52
+ readonly threshold?: number | undefined
53
+ /** Filter by document path pattern */
54
+ readonly pathPattern?: string | undefined
55
+ }
56
+
57
+ export interface SemanticSearchResult {
58
+ readonly sectionId: string
59
+ readonly documentPath: string
60
+ readonly heading: string
61
+ readonly similarity: number
62
+ readonly content?: string | undefined
63
+ }
64
+
65
+ // ============================================================================
66
+ // Errors
67
+ // ============================================================================
68
+
69
+ export interface EmbedError {
70
+ readonly _tag: 'EmbedError'
71
+ readonly cause: 'RateLimit' | 'ApiKey' | 'Network' | 'Unknown'
72
+ readonly message: string
73
+ }
74
+
75
+ export const embedError = (
76
+ cause: EmbedError['cause'],
77
+ message: string,
78
+ ): EmbedError => ({
79
+ _tag: 'EmbedError',
80
+ cause,
81
+ message,
82
+ })