mdcontext 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. package/.changeset/config.json +9 -9
  2. package/.claude/settings.local.json +25 -0
  3. package/.github/workflows/claude-code-review.yml +44 -0
  4. package/.github/workflows/claude.yml +85 -0
  5. package/CONTRIBUTING.md +186 -0
  6. package/NOTES/NOTES +44 -0
  7. package/README.md +206 -3
  8. package/biome.json +1 -1
  9. package/dist/chunk-23UPXDNL.js +3044 -0
  10. package/dist/chunk-2W7MO2DL.js +1366 -0
  11. package/dist/chunk-3NUAZGMA.js +1689 -0
  12. package/dist/chunk-7TOWB2XB.js +366 -0
  13. package/dist/chunk-7XOTOADQ.js +3065 -0
  14. package/dist/chunk-AH2PDM2K.js +3042 -0
  15. package/dist/chunk-BNXWSZ63.js +3742 -0
  16. package/dist/chunk-BTL5DJVU.js +3222 -0
  17. package/dist/chunk-HDHYG7E4.js +104 -0
  18. package/dist/chunk-HLR4KZBP.js +3234 -0
  19. package/dist/chunk-IP3FRFEB.js +1045 -0
  20. package/dist/chunk-KHU56VDO.js +3042 -0
  21. package/dist/chunk-KRYIFLQR.js +85 -89
  22. package/dist/chunk-LBSDNLEM.js +287 -0
  23. package/dist/chunk-MNTQ7HCP.js +2643 -0
  24. package/dist/chunk-MUJELQQ6.js +1387 -0
  25. package/dist/chunk-MXJGMSLV.js +2199 -0
  26. package/dist/chunk-N6QJGC3Z.js +2636 -0
  27. package/dist/chunk-OBELGBPM.js +1713 -0
  28. package/dist/chunk-OT7R5XTA.js +3192 -0
  29. package/dist/chunk-P7X4RA2T.js +106 -0
  30. package/dist/chunk-PIDUQNC2.js +3185 -0
  31. package/dist/chunk-POGCDIH4.js +3187 -0
  32. package/dist/chunk-PSIEOQGZ.js +3043 -0
  33. package/dist/chunk-PVRT3IHA.js +3238 -0
  34. package/dist/chunk-QNN4TT23.js +1430 -0
  35. package/dist/chunk-RE3R45RJ.js +3042 -0
  36. package/dist/chunk-S7E6TFX6.js +718 -657
  37. package/dist/chunk-SG6GLU4U.js +1378 -0
  38. package/dist/chunk-SJCDV2ST.js +274 -0
  39. package/dist/chunk-SYE5XLF3.js +104 -0
  40. package/dist/chunk-T5VLYBZD.js +103 -0
  41. package/dist/chunk-TOQB7VWU.js +3238 -0
  42. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  43. package/dist/chunk-VVTGZNBT.js +1533 -1423
  44. package/dist/chunk-W7Q4RFEV.js +104 -0
  45. package/dist/chunk-XTYYVRLO.js +3190 -0
  46. package/dist/chunk-Y6MDYVJD.js +3063 -0
  47. package/dist/cli/main.js +4072 -629
  48. package/dist/index.d.ts +420 -33
  49. package/dist/index.js +8 -15
  50. package/dist/mcp/server.js +103 -7
  51. package/dist/schema-BAWSG7KY.js +22 -0
  52. package/dist/schema-E3QUPL26.js +20 -0
  53. package/dist/schema-EHL7WUT6.js +20 -0
  54. package/docs/019-USAGE.md +44 -5
  55. package/docs/020-current-implementation.md +8 -8
  56. package/docs/021-DOGFOODING-FINDINGS.md +1 -1
  57. package/docs/CONFIG.md +1123 -0
  58. package/docs/ERRORS.md +383 -0
  59. package/docs/summarization.md +320 -0
  60. package/justfile +40 -0
  61. package/package.json +39 -33
  62. package/research/INDEX.md +315 -0
  63. package/research/code-review/README.md +90 -0
  64. package/research/code-review/cli-error-handling-review.md +979 -0
  65. package/research/code-review/code-review-validation-report.md +464 -0
  66. package/research/code-review/main-ts-review.md +1128 -0
  67. package/research/config-docs/SUMMARY.md +357 -0
  68. package/research/config-docs/TEST-RESULTS.md +776 -0
  69. package/research/config-docs/TODO.md +542 -0
  70. package/research/config-docs/analysis.md +744 -0
  71. package/research/config-docs/fix-validation.md +502 -0
  72. package/research/config-docs/help-audit.md +264 -0
  73. package/research/config-docs/help-system-analysis.md +890 -0
  74. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  75. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  76. package/research/issue-review.md +603 -0
  77. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  78. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  79. package/research/llm-summarization/anthropic-2026.md +367 -0
  80. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  81. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  82. package/research/llm-summarization/openai-2026.md +473 -0
  83. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  84. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  85. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  86. package/research/llm-summarization/prototype-results.md +56 -0
  87. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  88. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  89. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  90. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  91. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  92. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  93. package/research/mdcontext-pudding/02-search.md +970 -0
  94. package/research/mdcontext-pudding/03-context.md +779 -0
  95. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  96. package/research/mdcontext-pudding/04-tree.md +704 -0
  97. package/research/mdcontext-pudding/05-config.md +1038 -0
  98. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  99. package/research/mdcontext-pudding/06-links.md +679 -0
  100. package/research/mdcontext-pudding/07-stats.md +693 -0
  101. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  102. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  103. package/research/mdcontext-pudding/README.md +168 -0
  104. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  105. package/research/research-quality-review.md +834 -0
  106. package/research/semantic-search/embedding-text-analysis.md +156 -0
  107. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  108. package/research/semantic-search/query-processing-analysis.md +207 -0
  109. package/research/semantic-search/root-cause-and-solution.md +114 -0
  110. package/research/semantic-search/threshold-validation-report.md +69 -0
  111. package/research/semantic-search/vector-search-analysis.md +63 -0
  112. package/research/test-path-issues.md +276 -0
  113. package/review/ALP-76/1-error-type-design.md +962 -0
  114. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  115. package/review/ALP-76/3-error-presentation.md +624 -0
  116. package/review/ALP-76/4-test-coverage.md +625 -0
  117. package/review/ALP-76/5-migration-completeness.md +440 -0
  118. package/review/ALP-76/6-effect-best-practices.md +755 -0
  119. package/scripts/apply-branch-protection.sh +47 -0
  120. package/scripts/branch-protection-templates.json +79 -0
  121. package/scripts/prototype-summarization.ts +346 -0
  122. package/scripts/rebuild-hnswlib.js +32 -37
  123. package/scripts/setup-branch-protection.sh +64 -0
  124. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  125. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  126. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  127. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  128. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  129. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  130. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  131. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  132. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  133. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  134. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  135. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  136. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  137. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  138. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  139. package/src/cli/argv-preprocessor.test.ts +2 -2
  140. package/src/cli/cli.test.ts +230 -33
  141. package/src/cli/commands/config-cmd.ts +642 -0
  142. package/src/cli/commands/context.ts +97 -9
  143. package/src/cli/commands/duplicates.ts +122 -0
  144. package/src/cli/commands/embeddings.ts +529 -0
  145. package/src/cli/commands/index-cmd.ts +210 -30
  146. package/src/cli/commands/index.ts +3 -0
  147. package/src/cli/commands/search.ts +894 -64
  148. package/src/cli/commands/stats.ts +3 -0
  149. package/src/cli/commands/tree.ts +26 -5
  150. package/src/cli/config-layer.ts +176 -0
  151. package/src/cli/error-handler.test.ts +235 -0
  152. package/src/cli/error-handler.ts +655 -0
  153. package/src/cli/flag-schemas.ts +66 -0
  154. package/src/cli/help.ts +209 -7
  155. package/src/cli/main.ts +348 -58
  156. package/src/cli/options.ts +10 -0
  157. package/src/cli/shared-error-handling.ts +199 -0
  158. package/src/cli/utils.ts +150 -17
  159. package/src/config/file-provider.test.ts +320 -0
  160. package/src/config/file-provider.ts +273 -0
  161. package/src/config/index.ts +72 -0
  162. package/src/config/integration.test.ts +667 -0
  163. package/src/config/precedence.test.ts +277 -0
  164. package/src/config/precedence.ts +451 -0
  165. package/src/config/schema.test.ts +414 -0
  166. package/src/config/schema.ts +603 -0
  167. package/src/config/service.test.ts +320 -0
  168. package/src/config/service.ts +243 -0
  169. package/src/config/testing.test.ts +264 -0
  170. package/src/config/testing.ts +110 -0
  171. package/src/core/types.ts +6 -33
  172. package/src/duplicates/detector.test.ts +183 -0
  173. package/src/duplicates/detector.ts +414 -0
  174. package/src/duplicates/index.ts +18 -0
  175. package/src/embeddings/embedding-namespace.test.ts +300 -0
  176. package/src/embeddings/embedding-namespace.ts +947 -0
  177. package/src/embeddings/heading-boost.test.ts +222 -0
  178. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  179. package/src/embeddings/hyde.test.ts +272 -0
  180. package/src/embeddings/hyde.ts +264 -0
  181. package/src/embeddings/index.ts +2 -0
  182. package/src/embeddings/openai-provider.ts +332 -83
  183. package/src/embeddings/pricing.json +22 -0
  184. package/src/embeddings/provider-constants.ts +204 -0
  185. package/src/embeddings/provider-errors.test.ts +967 -0
  186. package/src/embeddings/provider-errors.ts +565 -0
  187. package/src/embeddings/provider-factory.test.ts +240 -0
  188. package/src/embeddings/provider-factory.ts +225 -0
  189. package/src/embeddings/provider-integration.test.ts +788 -0
  190. package/src/embeddings/query-preprocessing.test.ts +187 -0
  191. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  192. package/src/embeddings/semantic-search.ts +780 -93
  193. package/src/embeddings/types.ts +293 -16
  194. package/src/embeddings/vector-store.ts +486 -77
  195. package/src/embeddings/voyage-provider.ts +313 -0
  196. package/src/errors/errors.test.ts +845 -0
  197. package/src/errors/index.ts +533 -0
  198. package/src/index/ignore-patterns.test.ts +354 -0
  199. package/src/index/ignore-patterns.ts +305 -0
  200. package/src/index/indexer.ts +286 -48
  201. package/src/index/storage.ts +94 -30
  202. package/src/index/types.ts +40 -2
  203. package/src/index/watcher.ts +67 -9
  204. package/src/index.ts +22 -0
  205. package/src/integration/search-keyword.test.ts +678 -0
  206. package/src/mcp/server.ts +135 -6
  207. package/src/parser/parser.ts +18 -19
  208. package/src/parser/section-filter.test.ts +277 -0
  209. package/src/parser/section-filter.ts +125 -3
  210. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  211. package/src/search/bm25-store.ts +366 -0
  212. package/src/search/cross-encoder.test.ts +253 -0
  213. package/src/search/cross-encoder.ts +406 -0
  214. package/src/search/fuzzy-search.test.ts +419 -0
  215. package/src/search/fuzzy-search.ts +273 -0
  216. package/src/search/hybrid-search.ts +448 -0
  217. package/src/search/path-matcher.test.ts +276 -0
  218. package/src/search/path-matcher.ts +33 -0
  219. package/src/search/searcher.test.ts +99 -1
  220. package/src/search/searcher.ts +189 -67
  221. package/src/search/wink-bm25.d.ts +30 -0
  222. package/src/summarization/cli-providers/claude.ts +202 -0
  223. package/src/summarization/cli-providers/detection.test.ts +273 -0
  224. package/src/summarization/cli-providers/detection.ts +118 -0
  225. package/src/summarization/cli-providers/index.ts +8 -0
  226. package/src/summarization/cost.test.ts +139 -0
  227. package/src/summarization/cost.ts +102 -0
  228. package/src/summarization/error-handler.test.ts +127 -0
  229. package/src/summarization/error-handler.ts +111 -0
  230. package/src/summarization/index.ts +102 -0
  231. package/src/summarization/pipeline.test.ts +498 -0
  232. package/src/summarization/pipeline.ts +231 -0
  233. package/src/summarization/prompts.test.ts +269 -0
  234. package/src/summarization/prompts.ts +133 -0
  235. package/src/summarization/provider-factory.test.ts +396 -0
  236. package/src/summarization/provider-factory.ts +178 -0
  237. package/src/summarization/types.ts +184 -0
  238. package/src/summarize/summarizer.ts +104 -35
  239. package/src/types/huggingface-transformers.d.ts +66 -0
  240. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  241. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  242. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  243. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
  244. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
  245. package/tests/integration/embed-index.test.ts +712 -0
  246. package/tests/integration/search-context.test.ts +469 -0
  247. package/tests/integration/search-semantic.test.ts +522 -0
  248. package/vitest.config.ts +1 -6
  249. package/AGENTS.md +0 -46
  250. package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
  251. package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
@@ -5,20 +5,83 @@
5
5
  import * as fs from 'node:fs/promises'
6
6
  import * as path from 'node:path'
7
7
  import { Effect } from 'effect'
8
+ import {
9
+ type ApiKeyInvalidError,
10
+ type ApiKeyMissingError,
11
+ DimensionMismatchError,
12
+ EmbeddingError,
13
+ EmbeddingsNotFoundError,
14
+ type FileReadError,
15
+ type IndexCorruptedError,
16
+ IndexNotFoundError,
17
+ type VectorStoreError,
18
+ } from '../errors/index.js'
8
19
  import {
9
20
  createStorage,
10
21
  loadDocumentIndex,
11
22
  loadSectionIndex,
12
23
  } from '../index/storage.js'
13
24
  import type { SectionEntry } from '../index/types.js'
14
- import { createOpenAIProvider, InvalidApiKeyError } from './openai-provider.js'
15
- import type {
16
- EmbeddingProvider,
17
- SemanticSearchOptions,
18
- SemanticSearchResult,
19
- VectorEntry,
25
+ import {
26
+ type ActiveProvider,
27
+ generateNamespace,
28
+ getActiveNamespace,
29
+ writeActiveProvider,
30
+ } from './embedding-namespace.js'
31
+ import { generateHypotheticalDocument, type HydeResult } from './hyde.js'
32
+ import {
33
+ checkPricingFreshness,
34
+ getPricingDate,
35
+ PRICING_DATA,
36
+ wrapEmbedding,
37
+ } from './openai-provider.js'
38
+ import {
39
+ createEmbeddingProviderDirect,
40
+ type ProviderFactoryConfig,
41
+ } from './provider-factory.js'
42
+ import {
43
+ calculateFileImportanceBoost,
44
+ calculateHeadingBoost,
45
+ type EmbeddingProvider,
46
+ hasProviderMetadata,
47
+ preprocessQuery,
48
+ QUALITY_EF_SEARCH,
49
+ type SemanticSearchOptions,
50
+ type SemanticSearchResult,
51
+ type SemanticSearchResultWithStats,
52
+ type VectorEntry,
20
53
  } from './types.js'
21
- import { createVectorStore, type HnswVectorStore } from './vector-store.js'
54
+ import {
55
+ createNamespacedVectorStore,
56
+ type HnswBuildOptions,
57
+ type HnswMismatchWarning,
58
+ type HnswVectorStore,
59
+ type VectorSearchResult,
60
+ type VectorStoreLoadResult,
61
+ } from './vector-store.js'
62
+
63
+ // ============================================================================
64
+ // HNSW Parameter Warning
65
+ // ============================================================================
66
+
67
+ /**
68
+ * Check for HNSW parameter mismatch and log a warning if found.
69
+ * This helps users understand when their config doesn't match the stored index.
70
+ */
71
+ const checkHnswMismatch = (
72
+ mismatch: HnswMismatchWarning | undefined,
73
+ ): Effect.Effect<void, never, never> => {
74
+ if (!mismatch) {
75
+ return Effect.void
76
+ }
77
+
78
+ const { configParams, indexParams } = mismatch
79
+ return Effect.logWarning(
80
+ `HNSW parameter mismatch: Index was built with M=${indexParams.m}, efConstruction=${indexParams.efConstruction}, ` +
81
+ `but config specifies M=${configParams.m}, efConstruction=${configParams.efConstruction}. ` +
82
+ `HNSW parameters only affect index construction. Run 'mdcontext index --embed --force' to rebuild with new parameters.`,
83
+ )
84
+ }
22
85
 
23
86
  // ============================================================================
24
87
  // Embedding Text Generation
@@ -47,8 +110,12 @@ const generateEmbeddingText = (
47
110
  // Cost Estimation
48
111
  // ============================================================================
49
112
 
50
- // Price per 1M tokens for text-embedding-3-small
51
- const EMBEDDING_PRICE_PER_MILLION = 0.02
113
+ // Price per 1M tokens for text-embedding-3-small (from PRICING_DATA)
114
+ const EMBEDDING_PRICE_PER_MILLION =
115
+ PRICING_DATA.prices['text-embedding-3-small'] ?? 0.02
116
+
117
+ // Re-export pricing utilities for CLI use
118
+ export { checkPricingFreshness, getPricingDate }
52
119
 
53
120
  export interface DirectoryEstimate {
54
121
  readonly directory: string
@@ -67,10 +134,24 @@ export interface EmbeddingEstimate {
67
134
  readonly byDirectory: readonly DirectoryEstimate[]
68
135
  }
69
136
 
137
+ /**
138
+ * Estimate the cost of generating embeddings for a directory.
139
+ *
140
+ * @param rootPath - Root directory containing indexed markdown files
141
+ * @param options - Optional exclude patterns
142
+ * @returns Estimate with token counts and costs
143
+ *
144
+ * @throws IndexNotFoundError - Index doesn't exist at path
145
+ * @throws FileReadError - Cannot read index files
146
+ * @throws IndexCorruptedError - Index files are corrupted
147
+ */
70
148
  export const estimateEmbeddingCost = (
71
149
  rootPath: string,
72
150
  options: { excludePatterns?: readonly string[] | undefined } = {},
73
- ): Effect.Effect<EmbeddingEstimate, Error> =>
151
+ ): Effect.Effect<
152
+ EmbeddingEstimate,
153
+ IndexNotFoundError | FileReadError | IndexCorruptedError
154
+ > =>
74
155
  Effect.gen(function* () {
75
156
  const resolvedRoot = path.resolve(rootPath)
76
157
  const storage = createStorage(resolvedRoot)
@@ -79,9 +160,7 @@ export const estimateEmbeddingCost = (
79
160
  const sectionIndex = yield* loadSectionIndex(storage)
80
161
 
81
162
  if (!docIndex || !sectionIndex) {
82
- return yield* Effect.fail(
83
- new Error("Index not found. Run 'mdcontext index' first."),
84
- )
163
+ return yield* Effect.fail(new IndexNotFoundError({ path: resolvedRoot }))
85
164
  }
86
165
 
87
166
  // Group by directory
@@ -160,11 +239,25 @@ export interface FileProgress {
160
239
  readonly sectionCount: number
161
240
  }
162
241
 
242
+ export interface EmbeddingBatchProgress {
243
+ readonly batchIndex: number
244
+ readonly totalBatches: number
245
+ readonly processedSections: number
246
+ readonly totalSections: number
247
+ }
248
+
163
249
  export interface BuildEmbeddingsOptions {
164
250
  readonly force?: boolean | undefined
165
251
  readonly provider?: EmbeddingProvider | undefined
252
+ readonly providerConfig?: ProviderFactoryConfig | undefined
166
253
  readonly excludePatterns?: readonly string[] | undefined
167
254
  readonly onFileProgress?: ((progress: FileProgress) => void) | undefined
255
+ /** Callback for batch progress during embedding API calls */
256
+ readonly onBatchProgress?:
257
+ | ((progress: EmbeddingBatchProgress) => void)
258
+ | undefined
259
+ /** HNSW build parameters for vector index construction */
260
+ readonly hnswOptions?: HnswBuildOptions | undefined
168
261
  }
169
262
 
170
263
  export interface BuildEmbeddingsResult {
@@ -178,10 +271,36 @@ export interface BuildEmbeddingsResult {
178
271
  readonly estimatedSavings?: number | undefined
179
272
  }
180
273
 
274
+ /**
275
+ * Build embeddings for all indexed sections in a directory.
276
+ *
277
+ * @param rootPath - Root directory containing indexed markdown files
278
+ * @param options - Build options (force rebuild, progress callbacks)
279
+ * @returns Result with embedding counts, costs, and timing
280
+ *
281
+ * @throws IndexNotFoundError - Index doesn't exist at path
282
+ * @throws FileReadError - Cannot read index or source files
283
+ * @throws IndexCorruptedError - Index files are corrupted
284
+ * @throws ApiKeyMissingError - API key not set (check provider config)
285
+ * @throws ApiKeyInvalidError - API key rejected by provider
286
+ * @throws EmbeddingError - Embedding API failure (rate limit, quota, network)
287
+ * @throws VectorStoreError - Cannot save vector index
288
+ * @throws DimensionMismatchError - Existing embeddings have different dimensions
289
+ */
181
290
  export const buildEmbeddings = (
182
291
  rootPath: string,
183
292
  options: BuildEmbeddingsOptions = {},
184
- ): Effect.Effect<BuildEmbeddingsResult, Error> =>
293
+ ): Effect.Effect<
294
+ BuildEmbeddingsResult,
295
+ | IndexNotFoundError
296
+ | FileReadError
297
+ | IndexCorruptedError
298
+ | ApiKeyMissingError
299
+ | ApiKeyInvalidError
300
+ | EmbeddingError
301
+ | VectorStoreError
302
+ | DimensionMismatchError
303
+ > =>
185
304
  Effect.gen(function* () {
186
305
  const startTime = Date.now()
187
306
  const resolvedRoot = path.resolve(rootPath)
@@ -192,31 +311,52 @@ export const buildEmbeddings = (
192
311
  const sectionIndex = yield* loadSectionIndex(storage)
193
312
 
194
313
  if (!docIndex || !sectionIndex) {
195
- return yield* Effect.fail(
196
- new Error("Index not found. Run 'mdcontext index' first."),
197
- )
314
+ return yield* Effect.fail(new IndexNotFoundError({ path: resolvedRoot }))
198
315
  }
199
316
 
200
- // Get or create provider (wrap in Effect.try to catch MissingApiKeyError)
317
+ // Get or create provider - use factory for config-driven provider selection
318
+ // Priority: explicit provider > providerConfig > default (openai)
319
+ const providerConfig = options.providerConfig ?? { provider: 'openai' }
201
320
  const provider =
202
- options.provider ??
203
- (yield* Effect.try({
204
- try: () => createOpenAIProvider(),
205
- catch: (e) => e as Error,
206
- }))
321
+ options.provider ?? (yield* createEmbeddingProviderDirect(providerConfig))
207
322
  const dimensions = provider.dimensions
208
323
 
209
- // Create vector store
210
- const vectorStore = createVectorStore(
324
+ // Extract provider info for namespacing from the actual provider instance
325
+ // This ensures we use the correct values even when options.provider is explicitly set
326
+ let providerName: string
327
+ let providerModel: string
328
+
329
+ if (hasProviderMetadata(provider)) {
330
+ // Provider has metadata - extract provider name from provider.name (format: "provider:model")
331
+ const nameParts = provider.name.split(':')
332
+ providerName = nameParts[0] || 'openai'
333
+ providerModel = provider.model
334
+ } else {
335
+ // Fallback to config values for providers without metadata
336
+ providerName = providerConfig.provider ?? 'openai'
337
+ providerModel = providerConfig.model ?? 'text-embedding-3-small'
338
+ }
339
+
340
+ // Create namespaced vector store for this provider/model/dimensions combination
341
+ const vectorStore = createNamespacedVectorStore(
211
342
  resolvedRoot,
343
+ providerName,
344
+ providerModel,
212
345
  dimensions,
346
+ options.hnswOptions,
213
347
  ) as HnswVectorStore
214
- vectorStore.setProvider(provider.name)
348
+
349
+ // Set provider metadata
350
+ if (hasProviderMetadata(provider)) {
351
+ vectorStore.setProvider(provider.name, provider.model, provider.baseURL)
352
+ } else {
353
+ vectorStore.setProvider(providerName, providerModel, undefined)
354
+ }
215
355
 
216
356
  // Load existing if not forcing
217
357
  if (!options.force) {
218
- const loaded = yield* vectorStore.load()
219
- if (loaded) {
358
+ const loadResult = yield* vectorStore.load()
359
+ if (loadResult.loaded) {
220
360
  const stats = vectorStore.getStats()
221
361
  // Skip if any embeddings exist
222
362
  if (stats.count > 0) {
@@ -321,18 +461,26 @@ export const buildEmbeddings = (
321
461
  }
322
462
 
323
463
  const filePath = path.join(resolvedRoot, docPath)
324
- let fileContent: string
325
- try {
326
- fileContent = yield* Effect.promise(() =>
327
- fs.readFile(filePath, 'utf-8'),
328
- )
329
- } catch {
330
- // Skip files that can't be read
464
+
465
+ // Note: catchAll is intentional - file read failures during embedding
466
+ // should skip the file with a warning rather than abort the entire operation.
467
+ // A warning is logged below when the read fails.
468
+ const fileContentResult = yield* Effect.promise(() =>
469
+ fs.readFile(filePath, 'utf-8'),
470
+ ).pipe(
471
+ Effect.map((content) => ({ ok: true as const, content })),
472
+ Effect.catchAll(() =>
473
+ Effect.succeed({ ok: false as const, content: '' }),
474
+ ),
475
+ )
476
+
477
+ if (!fileContentResult.ok) {
478
+ yield* Effect.logWarning(`Skipping file (cannot read): ${docPath}`)
331
479
  continue
332
480
  }
333
481
 
334
482
  filesProcessed++
335
- const lines = fileContent.split('\n')
483
+ const lines = fileContentResult.content.split('\n')
336
484
 
337
485
  for (const { section, parentHeading } of sections) {
338
486
  // Extract section content from file
@@ -363,16 +511,20 @@ export const buildEmbeddings = (
363
511
 
364
512
  // Generate embeddings
365
513
  const texts = sectionsToEmbed.map((s) => s.text)
366
- const result = yield* Effect.tryPromise({
367
- try: () => provider.embed(texts),
368
- catch: (e) => {
369
- // Preserve InvalidApiKeyError so handleApiKeyError can catch it
370
- if (e instanceof InvalidApiKeyError) return e
371
- return new Error(
372
- `Embedding failed: ${e instanceof Error ? e.message : String(e)}`,
373
- )
374
- },
375
- })
514
+ const result = yield* wrapEmbedding(
515
+ provider.embed(texts, {
516
+ onBatchProgress: options.onBatchProgress
517
+ ? (p) =>
518
+ options.onBatchProgress?.({
519
+ batchIndex: p.batchIndex,
520
+ totalBatches: p.totalBatches,
521
+ processedSections: p.processedTexts,
522
+ totalSections: p.totalTexts,
523
+ })
524
+ : undefined,
525
+ }),
526
+ providerConfig.provider ?? 'openai',
527
+ )
376
528
 
377
529
  // Create vector entries
378
530
  const entries: VectorEntry[] = []
@@ -397,6 +549,22 @@ export const buildEmbeddings = (
397
549
  // Save
398
550
  yield* vectorStore.save()
399
551
 
552
+ // Set this namespace as the active provider
553
+ const namespace = generateNamespace(providerName, providerModel, dimensions)
554
+ yield* writeActiveProvider(resolvedRoot, {
555
+ namespace,
556
+ provider: providerName,
557
+ model: providerModel,
558
+ dimensions,
559
+ activatedAt: new Date().toISOString(),
560
+ }).pipe(
561
+ Effect.catchAll((e) => {
562
+ // Don't fail the build if we can't write the active provider file
563
+ console.warn(`Warning: Could not set active provider: ${e.message}`)
564
+ return Effect.succeed(undefined)
565
+ }),
566
+ )
567
+
400
568
  const duration = Date.now() - startTime
401
569
 
402
570
  return {
@@ -408,57 +576,239 @@ export const buildEmbeddings = (
408
576
  }
409
577
  })
410
578
 
579
+ // ============================================================================
580
+ // Context Lines Helper
581
+ // ============================================================================
582
+
583
+ /**
584
+ * Add context lines to search results by loading section content from files.
585
+ * This helper is used by both semanticSearch and semanticSearchWithStats to avoid code duplication.
586
+ */
587
+ const addContextLinesToResults = (
588
+ limitedResults: readonly VectorSearchResult[],
589
+ sectionIndex: { sections: Record<string, SectionEntry> },
590
+ resolvedRoot: string,
591
+ options: {
592
+ contextBefore?: number | undefined
593
+ contextAfter?: number | undefined
594
+ },
595
+ ): Effect.Effect<readonly SemanticSearchResult[], FileReadError, never> =>
596
+ Effect.gen(function* () {
597
+ const contextBefore = options.contextBefore ?? 0
598
+ const contextAfter = options.contextAfter ?? 0
599
+
600
+ const resultsWithContext: SemanticSearchResult[] = []
601
+ const fileCache = new Map<string, string>()
602
+
603
+ for (const r of limitedResults) {
604
+ const section = sectionIndex.sections[r.sectionId]
605
+ if (!section) {
606
+ resultsWithContext.push({
607
+ sectionId: r.sectionId,
608
+ documentPath: r.documentPath,
609
+ heading: r.heading,
610
+ similarity: r.similarity,
611
+ })
612
+ continue
613
+ }
614
+
615
+ let fileContent = fileCache.get(r.documentPath)
616
+ if (!fileContent) {
617
+ const filePath = path.join(resolvedRoot, r.documentPath)
618
+ const contentResult = yield* Effect.promise(() =>
619
+ fs.readFile(filePath, 'utf-8'),
620
+ ).pipe(
621
+ Effect.map((content) => content),
622
+ Effect.catchAll(() => Effect.succeed(null as string | null)),
623
+ )
624
+
625
+ if (contentResult) {
626
+ fileContent = contentResult
627
+ fileCache.set(r.documentPath, fileContent)
628
+ }
629
+ }
630
+
631
+ if (fileContent) {
632
+ const lines = fileContent.split('\n')
633
+ const startIdx = Math.max(0, section.startLine - 1 - contextBefore)
634
+ const endIdx = Math.min(lines.length, section.endLine + contextAfter)
635
+
636
+ const contextLines: {
637
+ lineNumber: number
638
+ line: string
639
+ isMatch: boolean
640
+ }[] = []
641
+ for (let i = startIdx; i < endIdx; i++) {
642
+ const line = lines[i]
643
+ if (line !== undefined) {
644
+ contextLines.push({
645
+ lineNumber: i + 1,
646
+ line,
647
+ isMatch: i >= section.startLine - 1 && i < section.endLine,
648
+ })
649
+ }
650
+ }
651
+
652
+ resultsWithContext.push({
653
+ sectionId: r.sectionId,
654
+ documentPath: r.documentPath,
655
+ heading: r.heading,
656
+ similarity: r.similarity,
657
+ contextLines,
658
+ })
659
+ } else {
660
+ resultsWithContext.push({
661
+ sectionId: r.sectionId,
662
+ documentPath: r.documentPath,
663
+ heading: r.heading,
664
+ similarity: r.similarity,
665
+ })
666
+ }
667
+ }
668
+
669
+ return resultsWithContext
670
+ })
671
+
411
672
  // ============================================================================
412
673
  // Semantic Search
413
674
  // ============================================================================
414
675
 
676
+ /**
677
+ * Perform semantic search over embedded sections.
678
+ *
679
+ * @param rootPath - Root directory containing embeddings
680
+ * @param query - Natural language search query
681
+ * @param options - Search options (limit, threshold, path filter)
682
+ * @returns Ranked list of matching sections by similarity
683
+ *
684
+ * @throws EmbeddingsNotFoundError - No embeddings exist (run index --embed first)
685
+ * @throws ApiKeyMissingError - API key not set (check provider config)
686
+ * @throws ApiKeyInvalidError - API key rejected by provider
687
+ * @throws EmbeddingError - Embedding API failure (rate limit, quota, network)
688
+ * @throws VectorStoreError - Cannot load or search vector index
689
+ * @throws DimensionMismatchError - Corpus has different dimensions than current provider
690
+ */
415
691
  export const semanticSearch = (
416
692
  rootPath: string,
417
693
  query: string,
418
694
  options: SemanticSearchOptions = {},
419
- ): Effect.Effect<readonly SemanticSearchResult[], Error> =>
695
+ ): Effect.Effect<
696
+ readonly SemanticSearchResult[],
697
+ | EmbeddingsNotFoundError
698
+ | FileReadError
699
+ | IndexCorruptedError
700
+ | ApiKeyMissingError
701
+ | ApiKeyInvalidError
702
+ | EmbeddingError
703
+ | VectorStoreError
704
+ | DimensionMismatchError
705
+ > =>
420
706
  Effect.gen(function* () {
421
707
  const resolvedRoot = path.resolve(rootPath)
422
708
 
423
- // Get provider for query embedding (wrap in Effect.try to catch MissingApiKeyError)
424
- const provider = yield* Effect.try({
425
- try: () => createOpenAIProvider(),
426
- catch: (e) => e as Error,
427
- })
709
+ // Get active namespace to determine which embedding index to use
710
+ const activeProvider = yield* getActiveNamespace(resolvedRoot).pipe(
711
+ Effect.catchAll(() => Effect.succeed(null as ActiveProvider | null)),
712
+ )
713
+
714
+ if (!activeProvider) {
715
+ return yield* Effect.fail(
716
+ new EmbeddingsNotFoundError({ path: resolvedRoot }),
717
+ )
718
+ }
719
+
720
+ // Create provider for query embedding
721
+ const provider = yield* createEmbeddingProviderDirect(
722
+ options.providerConfig ?? { provider: 'openai' },
723
+ )
428
724
  const dimensions = provider.dimensions
429
725
 
430
- // Load vector store
431
- const vectorStore = createVectorStore(resolvedRoot, dimensions)
432
- const loaded = yield* vectorStore.load()
726
+ // Get current provider name for error messages
727
+ const currentProviderName = options.providerConfig?.provider ?? 'openai'
433
728
 
434
- if (!loaded) {
729
+ // Verify dimensions match the active namespace
730
+ if (dimensions !== activeProvider.dimensions) {
435
731
  return yield* Effect.fail(
436
- new Error("Embeddings not found. Run 'mdcontext embed' first."),
732
+ new DimensionMismatchError({
733
+ corpusDimensions: activeProvider.dimensions,
734
+ providerDimensions: dimensions,
735
+ corpusProvider: `${activeProvider.provider}:${activeProvider.model}`,
736
+ currentProvider: currentProviderName,
737
+ path: resolvedRoot,
738
+ }),
437
739
  )
438
740
  }
439
741
 
440
- // Embed the query
441
- const queryResult = yield* Effect.tryPromise({
442
- try: () => provider.embed([query]),
443
- catch: (e) =>
444
- new Error(
445
- `Query embedding failed: ${e instanceof Error ? e.message : String(e)}`,
446
- ),
447
- })
742
+ // Load vector store from the active namespace
743
+ const vectorStore = createNamespacedVectorStore(
744
+ resolvedRoot,
745
+ activeProvider.provider,
746
+ activeProvider.model,
747
+ activeProvider.dimensions,
748
+ )
749
+ const loadResult = yield* vectorStore.load()
750
+
751
+ if (!loadResult.loaded) {
752
+ return yield* Effect.fail(
753
+ new EmbeddingsNotFoundError({ path: resolvedRoot }),
754
+ )
755
+ }
756
+
757
+ // Check for HNSW parameter mismatch
758
+ yield* checkHnswMismatch(loadResult.hnswMismatch)
759
+
760
+ // Determine the text to embed
761
+ // If HyDE is enabled, generate a hypothetical document first
762
+ let textToEmbed: string
763
+ let hydeResult: HydeResult | undefined
764
+
765
+ if (options.hyde) {
766
+ // Generate hypothetical document using LLM
767
+ hydeResult = yield* generateHypotheticalDocument(query, {
768
+ model: options.hydeOptions?.model,
769
+ maxTokens: options.hydeOptions?.maxTokens,
770
+ temperature: options.hydeOptions?.temperature,
771
+ })
772
+ textToEmbed = hydeResult.hypotheticalDocument
773
+ yield* Effect.logDebug(
774
+ `HyDE generated ${hydeResult.tokensUsed} tokens ($${hydeResult.cost.toFixed(6)})`,
775
+ )
776
+ } else {
777
+ // Preprocess query for better recall (unless disabled)
778
+ textToEmbed = options.skipPreprocessing ? query : preprocessQuery(query)
779
+ }
780
+
781
+ // Embed the query (or hypothetical document)
782
+ const queryResult = yield* wrapEmbedding(
783
+ provider.embed([textToEmbed]),
784
+ currentProviderName,
785
+ )
448
786
 
449
787
  const queryVector = queryResult.embeddings[0]
450
788
  if (!queryVector) {
451
- return yield* Effect.fail(new Error('Failed to generate query embedding'))
789
+ return yield* Effect.fail(
790
+ new EmbeddingError({
791
+ reason: 'Unknown',
792
+ message: 'Failed to generate query embedding',
793
+ provider: currentProviderName,
794
+ }),
795
+ )
452
796
  }
453
797
 
454
798
  // Search
455
799
  const limit = options.limit ?? 10
456
800
  const threshold = options.threshold ?? 0
457
801
 
802
+ // Convert quality mode to efSearch value
803
+ const efSearch = options.quality
804
+ ? QUALITY_EF_SEARCH[options.quality]
805
+ : undefined
806
+
458
807
  const searchResults = yield* vectorStore.search(
459
808
  queryVector,
460
809
  limit * 2,
461
810
  threshold,
811
+ { efSearch },
462
812
  )
463
813
 
464
814
  // Apply path filter if specified
@@ -471,28 +821,313 @@ export const semanticSearch = (
471
821
  filteredResults = searchResults.filter((r) => regex.test(r.documentPath))
472
822
  }
473
823
 
474
- // Convert to SemanticSearchResult
475
- const results: SemanticSearchResult[] = filteredResults
476
- .slice(0, limit)
477
- .map((r) => ({
824
+ // Apply ranking boost (heading + file importance, enabled by default)
825
+ const applyBoost = options.headingBoost !== false
826
+ const boostedResults = applyBoost
827
+ ? filteredResults.map((r) => ({
828
+ ...r,
829
+ similarity: Math.min(
830
+ 1,
831
+ r.similarity +
832
+ calculateHeadingBoost(r.heading, query) +
833
+ calculateFileImportanceBoost(r.documentPath),
834
+ ),
835
+ }))
836
+ : filteredResults
837
+
838
+ // Re-sort by boosted similarity
839
+ const sortedResults = boostedResults.sort(
840
+ (a, b) => b.similarity - a.similarity,
841
+ )
842
+ const limitedResults = sortedResults.slice(0, limit)
843
+
844
+ // If context lines are requested, load section content
845
+ let results: readonly SemanticSearchResult[]
846
+ if (
847
+ options.contextBefore !== undefined ||
848
+ options.contextAfter !== undefined
849
+ ) {
850
+ const storage = createStorage(resolvedRoot)
851
+ const sectionIndex = yield* loadSectionIndex(storage)
852
+
853
+ if (sectionIndex) {
854
+ results = yield* addContextLinesToResults(
855
+ limitedResults,
856
+ sectionIndex,
857
+ resolvedRoot,
858
+ options,
859
+ )
860
+ } else {
861
+ results = limitedResults.map((r) => ({
862
+ sectionId: r.sectionId,
863
+ documentPath: r.documentPath,
864
+ heading: r.heading,
865
+ similarity: r.similarity,
866
+ }))
867
+ }
868
+ } else {
869
+ results = limitedResults.map((r) => ({
478
870
  sectionId: r.sectionId,
479
871
  documentPath: r.documentPath,
480
872
  heading: r.heading,
481
873
  similarity: r.similarity,
482
874
  }))
875
+ }
483
876
 
484
877
  return results
485
878
  })
486
879
 
880
+ /**
881
+ * Perform semantic search with stats about below-threshold results.
882
+ * Use this when you want to provide feedback to users about results that
883
+ * didn't meet the threshold.
884
+ *
885
+ * @param rootPath - Root directory containing embeddings
886
+ * @param query - Natural language search query
887
+ * @param options - Search options (limit, threshold, path filter)
888
+ * @returns Results with optional below-threshold stats
889
+ *
890
+ * @throws EmbeddingsNotFoundError - No embeddings exist (run index --embed first)
891
+ * @throws ApiKeyMissingError - API key not set (check provider config)
892
+ * @throws ApiKeyInvalidError - API key rejected by provider
893
+ * @throws EmbeddingError - Embedding API failure (rate limit, quota, network)
894
+ * @throws VectorStoreError - Cannot load or search vector index
895
+ * @throws DimensionMismatchError - Corpus has different dimensions than current provider
896
+ */
897
+ export const semanticSearchWithStats = (
898
+ rootPath: string,
899
+ query: string,
900
+ options: SemanticSearchOptions = {},
901
+ ): Effect.Effect<
902
+ SemanticSearchResultWithStats,
903
+ | EmbeddingsNotFoundError
904
+ | FileReadError
905
+ | IndexCorruptedError
906
+ | ApiKeyMissingError
907
+ | ApiKeyInvalidError
908
+ | EmbeddingError
909
+ | VectorStoreError
910
+ | DimensionMismatchError
911
+ > =>
912
+ Effect.gen(function* () {
913
+ const resolvedRoot = path.resolve(rootPath)
914
+
915
+ // Get active namespace to determine which embedding index to use
916
+ const activeProvider = yield* getActiveNamespace(resolvedRoot).pipe(
917
+ Effect.catchAll(() => Effect.succeed(null as ActiveProvider | null)),
918
+ )
919
+
920
+ if (!activeProvider) {
921
+ return yield* Effect.fail(
922
+ new EmbeddingsNotFoundError({ path: resolvedRoot }),
923
+ )
924
+ }
925
+
926
+ // Create provider for query embedding
927
+ const provider = yield* createEmbeddingProviderDirect(
928
+ options.providerConfig ?? { provider: 'openai' },
929
+ )
930
+ const dimensions = provider.dimensions
931
+
932
+ // Get current provider name for error messages
933
+ const currentProviderName = options.providerConfig?.provider ?? 'openai'
934
+
935
+ // Verify dimensions match the active namespace
936
+ if (dimensions !== activeProvider.dimensions) {
937
+ return yield* Effect.fail(
938
+ new DimensionMismatchError({
939
+ corpusDimensions: activeProvider.dimensions,
940
+ providerDimensions: dimensions,
941
+ corpusProvider: `${activeProvider.provider}:${activeProvider.model}`,
942
+ currentProvider: currentProviderName,
943
+ path: resolvedRoot,
944
+ }),
945
+ )
946
+ }
947
+
948
+ // Load vector store from the active namespace
949
+ const vectorStore = createNamespacedVectorStore(
950
+ resolvedRoot,
951
+ activeProvider.provider,
952
+ activeProvider.model,
953
+ activeProvider.dimensions,
954
+ )
955
+ const loadResult = yield* vectorStore.load()
956
+
957
+ if (!loadResult.loaded) {
958
+ return yield* Effect.fail(
959
+ new EmbeddingsNotFoundError({ path: resolvedRoot }),
960
+ )
961
+ }
962
+
963
+ // Check for HNSW parameter mismatch
964
+ yield* checkHnswMismatch(loadResult.hnswMismatch)
965
+
966
+ // Determine the text to embed
967
+ // If HyDE is enabled, generate a hypothetical document first
968
+ let textToEmbed: string
969
+ let hydeResult: HydeResult | undefined
970
+
971
+ if (options.hyde) {
972
+ // Generate hypothetical document using LLM
973
+ hydeResult = yield* generateHypotheticalDocument(query, {
974
+ model: options.hydeOptions?.model,
975
+ maxTokens: options.hydeOptions?.maxTokens,
976
+ temperature: options.hydeOptions?.temperature,
977
+ })
978
+ textToEmbed = hydeResult.hypotheticalDocument
979
+ yield* Effect.logDebug(
980
+ `HyDE generated ${hydeResult.tokensUsed} tokens ($${hydeResult.cost.toFixed(6)})`,
981
+ )
982
+ } else {
983
+ // Preprocess query for better recall (unless disabled)
984
+ textToEmbed = options.skipPreprocessing ? query : preprocessQuery(query)
985
+ }
986
+
987
+ // Embed the query (or hypothetical document)
988
+ const queryResult = yield* wrapEmbedding(
989
+ provider.embed([textToEmbed]),
990
+ currentProviderName,
991
+ )
992
+
993
+ const queryVector = queryResult.embeddings[0]
994
+ if (!queryVector) {
995
+ return yield* Effect.fail(
996
+ new EmbeddingError({
997
+ reason: 'Unknown',
998
+ message: 'Failed to generate query embedding',
999
+ provider: currentProviderName,
1000
+ }),
1001
+ )
1002
+ }
1003
+
1004
+ // Search with stats
1005
+ const limit = options.limit ?? 10
1006
+ const threshold = options.threshold ?? 0
1007
+
1008
+ // Convert quality mode to efSearch value
1009
+ const efSearch = options.quality
1010
+ ? QUALITY_EF_SEARCH[options.quality]
1011
+ : undefined
1012
+
1013
+ const searchResultWithStats = yield* vectorStore.searchWithStats(
1014
+ queryVector,
1015
+ limit * 2,
1016
+ threshold,
1017
+ { efSearch },
1018
+ )
1019
+
1020
+ // Apply path filter if specified
1021
+ let filteredResults = searchResultWithStats.results
1022
+ if (options.pathPattern) {
1023
+ const pattern = options.pathPattern
1024
+ .replace(/\./g, '\\.')
1025
+ .replace(/\*/g, '.*')
1026
+ const regex = new RegExp(`^${pattern}$`, 'i')
1027
+ filteredResults = searchResultWithStats.results.filter((r) =>
1028
+ regex.test(r.documentPath),
1029
+ )
1030
+ }
1031
+
1032
+ // Apply ranking boost (heading + file importance, enabled by default)
1033
+ const applyBoost = options.headingBoost !== false
1034
+ const boostedResults = applyBoost
1035
+ ? filteredResults.map((r) => ({
1036
+ ...r,
1037
+ similarity: Math.min(
1038
+ 1,
1039
+ r.similarity +
1040
+ calculateHeadingBoost(r.heading, query) +
1041
+ calculateFileImportanceBoost(r.documentPath),
1042
+ ),
1043
+ }))
1044
+ : filteredResults
1045
+
1046
+ // Re-sort by boosted similarity and convert to SemanticSearchResult
1047
+ const sortedResults = boostedResults.sort(
1048
+ (a, b) => b.similarity - a.similarity,
1049
+ )
1050
+ const totalAvailable = sortedResults.length
1051
+ const limitedResults = sortedResults.slice(0, limit)
1052
+
1053
+ // If context lines are requested, load section content
1054
+ let results: readonly SemanticSearchResult[]
1055
+ if (
1056
+ options.contextBefore !== undefined ||
1057
+ options.contextAfter !== undefined
1058
+ ) {
1059
+ const storage = createStorage(resolvedRoot)
1060
+ const sectionIndex = yield* loadSectionIndex(storage)
1061
+
1062
+ if (sectionIndex) {
1063
+ results = yield* addContextLinesToResults(
1064
+ limitedResults,
1065
+ sectionIndex,
1066
+ resolvedRoot,
1067
+ options,
1068
+ )
1069
+ } else {
1070
+ results = limitedResults.map((r) => ({
1071
+ sectionId: r.sectionId,
1072
+ documentPath: r.documentPath,
1073
+ heading: r.heading,
1074
+ similarity: r.similarity,
1075
+ }))
1076
+ }
1077
+ } else {
1078
+ results = limitedResults.map((r) => ({
1079
+ sectionId: r.sectionId,
1080
+ documentPath: r.documentPath,
1081
+ heading: r.heading,
1082
+ similarity: r.similarity,
1083
+ }))
1084
+ }
1085
+
1086
+ return {
1087
+ results,
1088
+ belowThresholdCount: searchResultWithStats.belowThresholdCount,
1089
+ belowThresholdHighest:
1090
+ searchResultWithStats.belowThresholdHighest ?? undefined,
1091
+ totalAvailable,
1092
+ }
1093
+ })
1094
+
487
1095
  // ============================================================================
488
1096
  // Search with Content
489
1097
  // ============================================================================
490
1098
 
1099
+ /**
1100
+ * Perform semantic search and include section content in results.
1101
+ *
1102
+ * @param rootPath - Root directory containing embeddings
1103
+ * @param query - Natural language search query
1104
+ * @param options - Search options (limit, threshold, path filter)
1105
+ * @returns Ranked list of matching sections with content
1106
+ *
1107
+ * @throws EmbeddingsNotFoundError - No embeddings exist (run index --embed first)
1108
+ * @throws FileReadError - Cannot read index files
1109
+ * @throws IndexCorruptedError - Index files are corrupted
1110
+ * @throws ApiKeyMissingError - API key not set (check provider config)
1111
+ * @throws ApiKeyInvalidError - API key rejected by provider
1112
+ * @throws EmbeddingError - Embedding API failure (rate limit, quota, network)
1113
+ * @throws VectorStoreError - Cannot load or search vector index
1114
+ * @throws DimensionMismatchError - Corpus has different dimensions than current provider
1115
+ */
491
1116
  export const semanticSearchWithContent = (
492
1117
  rootPath: string,
493
1118
  query: string,
494
1119
  options: SemanticSearchOptions = {},
495
- ): Effect.Effect<readonly SemanticSearchResult[], Error> =>
1120
+ ): Effect.Effect<
1121
+ readonly SemanticSearchResult[],
1122
+ | EmbeddingsNotFoundError
1123
+ | FileReadError
1124
+ | IndexCorruptedError
1125
+ | ApiKeyMissingError
1126
+ | ApiKeyInvalidError
1127
+ | EmbeddingError
1128
+ | VectorStoreError
1129
+ | DimensionMismatchError
1130
+ > =>
496
1131
  Effect.gen(function* () {
497
1132
  const resolvedRoot = path.resolve(rootPath)
498
1133
  const results = yield* semanticSearch(resolvedRoot, query, options)
@@ -515,23 +1150,35 @@ export const semanticSearchWithContent = (
515
1150
 
516
1151
  const filePath = path.join(resolvedRoot, result.documentPath)
517
1152
 
518
- try {
519
- const fileContent = yield* Effect.promise(() =>
520
- fs.readFile(filePath, 'utf-8'),
521
- )
522
-
523
- const lines = fileContent.split('\n')
524
- const content = lines
525
- .slice(section.startLine - 1, section.endLine)
526
- .join('\n')
1153
+ // Note: catchAll is intentional - file read failures during search result
1154
+ // enrichment should skip content loading with a warning, not fail the search.
1155
+ // Results are still returned without content when files can't be read.
1156
+ const fileContentResult = yield* Effect.promise(() =>
1157
+ fs.readFile(filePath, 'utf-8'),
1158
+ ).pipe(
1159
+ Effect.map((content) => ({ ok: true as const, content })),
1160
+ Effect.catchAll(() =>
1161
+ Effect.succeed({ ok: false as const, content: '' }),
1162
+ ),
1163
+ )
527
1164
 
528
- resultsWithContent.push({
529
- ...result,
530
- content,
531
- })
532
- } catch {
1165
+ if (!fileContentResult.ok) {
1166
+ yield* Effect.logWarning(
1167
+ `Skipping content load (cannot read): ${result.documentPath}`,
1168
+ )
533
1169
  resultsWithContent.push(result)
1170
+ continue
534
1171
  }
1172
+
1173
+ const lines = fileContentResult.content.split('\n')
1174
+ const content = lines
1175
+ .slice(section.startLine - 1, section.endLine)
1176
+ .join('\n')
1177
+
1178
+ resultsWithContent.push({
1179
+ ...result,
1180
+ content,
1181
+ })
535
1182
  }
536
1183
 
537
1184
  return resultsWithContent
@@ -545,22 +1192,60 @@ export interface EmbeddingStats {
545
1192
  readonly hasEmbeddings: boolean
546
1193
  readonly count: number
547
1194
  readonly provider: string
1195
+ readonly model?: string | undefined
548
1196
  readonly dimensions: number
549
1197
  readonly totalCost: number
550
1198
  readonly totalTokens: number
551
1199
  }
552
1200
 
1201
+ /**
1202
+ * Get statistics about stored embeddings.
1203
+ * Uses the active namespace to find the current embedding index.
1204
+ *
1205
+ * @param rootPath - Root directory containing embeddings
1206
+ * @returns Embedding statistics (count, provider, costs)
1207
+ *
1208
+ * @throws VectorStoreError - Cannot load vector index metadata
1209
+ */
553
1210
  export const getEmbeddingStats = (
554
1211
  rootPath: string,
555
- ): Effect.Effect<EmbeddingStats, Error> =>
1212
+ ): Effect.Effect<EmbeddingStats, VectorStoreError> =>
556
1213
  Effect.gen(function* () {
557
1214
  const resolvedRoot = path.resolve(rootPath)
558
1215
 
559
- // Try to load with default dimensions
560
- const vectorStore = createVectorStore(resolvedRoot, 1536)
561
- const loaded = yield* vectorStore.load()
1216
+ // Get the active namespace to find where embeddings are stored
1217
+ const activeProvider = yield* getActiveNamespace(resolvedRoot).pipe(
1218
+ Effect.catchAll(() => Effect.succeed(null as ActiveProvider | null)),
1219
+ )
562
1220
 
563
- if (!loaded) {
1221
+ if (!activeProvider) {
1222
+ return {
1223
+ hasEmbeddings: false,
1224
+ count: 0,
1225
+ provider: 'none',
1226
+ dimensions: 0,
1227
+ totalCost: 0,
1228
+ totalTokens: 0,
1229
+ }
1230
+ }
1231
+
1232
+ // Load the namespaced vector store to get stats
1233
+ const vectorStore = createNamespacedVectorStore(
1234
+ resolvedRoot,
1235
+ activeProvider.provider,
1236
+ activeProvider.model,
1237
+ activeProvider.dimensions,
1238
+ )
1239
+
1240
+ const loadResult = yield* vectorStore
1241
+ .load()
1242
+ .pipe(
1243
+ Effect.catchAll(() =>
1244
+ Effect.succeed({ loaded: false } as VectorStoreLoadResult),
1245
+ ),
1246
+ )
1247
+
1248
+ if (!loadResult.loaded) {
564
1249
  return {
565
1250
  hasEmbeddings: false,
566
1251
  count: 0,
@@ -572,12 +1257,14 @@ export const getEmbeddingStats = (
572
1257
  }
573
1258
 
574
1259
  const stats = vectorStore.getStats()
1260
+
575
1261
  return {
576
1262
  hasEmbeddings: true,
577
1263
  count: stats.count,
578
- provider: stats.provider,
1264
+ provider: stats.provider || 'openai',
1265
+ model: stats.providerModel,
579
1266
  dimensions: stats.dimensions,
580
- totalCost: stats.totalCost,
581
- totalTokens: stats.totalTokens,
1267
+ totalCost: stats.totalCost || 0,
1268
+ totalTokens: stats.totalTokens || 0,
582
1269
  }
583
1270
  })