mdcontext 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. package/.changeset/config.json +9 -9
  2. package/.claude/settings.local.json +25 -0
  3. package/.github/workflows/claude-code-review.yml +44 -0
  4. package/.github/workflows/claude.yml +85 -0
  5. package/CONTRIBUTING.md +186 -0
  6. package/NOTES/NOTES +44 -0
  7. package/README.md +206 -3
  8. package/biome.json +1 -1
  9. package/dist/chunk-23UPXDNL.js +3044 -0
  10. package/dist/chunk-2W7MO2DL.js +1366 -0
  11. package/dist/chunk-3NUAZGMA.js +1689 -0
  12. package/dist/chunk-7TOWB2XB.js +366 -0
  13. package/dist/chunk-7XOTOADQ.js +3065 -0
  14. package/dist/chunk-AH2PDM2K.js +3042 -0
  15. package/dist/chunk-BNXWSZ63.js +3742 -0
  16. package/dist/chunk-BTL5DJVU.js +3222 -0
  17. package/dist/chunk-HDHYG7E4.js +104 -0
  18. package/dist/chunk-HLR4KZBP.js +3234 -0
  19. package/dist/chunk-IP3FRFEB.js +1045 -0
  20. package/dist/chunk-KHU56VDO.js +3042 -0
  21. package/dist/chunk-KRYIFLQR.js +85 -89
  22. package/dist/chunk-LBSDNLEM.js +287 -0
  23. package/dist/chunk-MNTQ7HCP.js +2643 -0
  24. package/dist/chunk-MUJELQQ6.js +1387 -0
  25. package/dist/chunk-MXJGMSLV.js +2199 -0
  26. package/dist/chunk-N6QJGC3Z.js +2636 -0
  27. package/dist/chunk-OBELGBPM.js +1713 -0
  28. package/dist/chunk-OT7R5XTA.js +3192 -0
  29. package/dist/chunk-P7X4RA2T.js +106 -0
  30. package/dist/chunk-PIDUQNC2.js +3185 -0
  31. package/dist/chunk-POGCDIH4.js +3187 -0
  32. package/dist/chunk-PSIEOQGZ.js +3043 -0
  33. package/dist/chunk-PVRT3IHA.js +3238 -0
  34. package/dist/chunk-QNN4TT23.js +1430 -0
  35. package/dist/chunk-RE3R45RJ.js +3042 -0
  36. package/dist/chunk-S7E6TFX6.js +718 -657
  37. package/dist/chunk-SG6GLU4U.js +1378 -0
  38. package/dist/chunk-SJCDV2ST.js +274 -0
  39. package/dist/chunk-SYE5XLF3.js +104 -0
  40. package/dist/chunk-T5VLYBZD.js +103 -0
  41. package/dist/chunk-TOQB7VWU.js +3238 -0
  42. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  43. package/dist/chunk-VVTGZNBT.js +1533 -1423
  44. package/dist/chunk-W7Q4RFEV.js +104 -0
  45. package/dist/chunk-XTYYVRLO.js +3190 -0
  46. package/dist/chunk-Y6MDYVJD.js +3063 -0
  47. package/dist/cli/main.js +4072 -629
  48. package/dist/index.d.ts +420 -33
  49. package/dist/index.js +8 -15
  50. package/dist/mcp/server.js +103 -7
  51. package/dist/schema-BAWSG7KY.js +22 -0
  52. package/dist/schema-E3QUPL26.js +20 -0
  53. package/dist/schema-EHL7WUT6.js +20 -0
  54. package/docs/019-USAGE.md +44 -5
  55. package/docs/020-current-implementation.md +8 -8
  56. package/docs/021-DOGFOODING-FINDINGS.md +1 -1
  57. package/docs/CONFIG.md +1123 -0
  58. package/docs/ERRORS.md +383 -0
  59. package/docs/summarization.md +320 -0
  60. package/justfile +40 -0
  61. package/package.json +39 -33
  62. package/research/INDEX.md +315 -0
  63. package/research/code-review/README.md +90 -0
  64. package/research/code-review/cli-error-handling-review.md +979 -0
  65. package/research/code-review/code-review-validation-report.md +464 -0
  66. package/research/code-review/main-ts-review.md +1128 -0
  67. package/research/config-docs/SUMMARY.md +357 -0
  68. package/research/config-docs/TEST-RESULTS.md +776 -0
  69. package/research/config-docs/TODO.md +542 -0
  70. package/research/config-docs/analysis.md +744 -0
  71. package/research/config-docs/fix-validation.md +502 -0
  72. package/research/config-docs/help-audit.md +264 -0
  73. package/research/config-docs/help-system-analysis.md +890 -0
  74. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  75. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  76. package/research/issue-review.md +603 -0
  77. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  78. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  79. package/research/llm-summarization/anthropic-2026.md +367 -0
  80. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  81. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  82. package/research/llm-summarization/openai-2026.md +473 -0
  83. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  84. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  85. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  86. package/research/llm-summarization/prototype-results.md +56 -0
  87. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  88. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  89. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  90. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  91. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  92. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  93. package/research/mdcontext-pudding/02-search.md +970 -0
  94. package/research/mdcontext-pudding/03-context.md +779 -0
  95. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  96. package/research/mdcontext-pudding/04-tree.md +704 -0
  97. package/research/mdcontext-pudding/05-config.md +1038 -0
  98. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  99. package/research/mdcontext-pudding/06-links.md +679 -0
  100. package/research/mdcontext-pudding/07-stats.md +693 -0
  101. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  102. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  103. package/research/mdcontext-pudding/README.md +168 -0
  104. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  105. package/research/research-quality-review.md +834 -0
  106. package/research/semantic-search/embedding-text-analysis.md +156 -0
  107. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  108. package/research/semantic-search/query-processing-analysis.md +207 -0
  109. package/research/semantic-search/root-cause-and-solution.md +114 -0
  110. package/research/semantic-search/threshold-validation-report.md +69 -0
  111. package/research/semantic-search/vector-search-analysis.md +63 -0
  112. package/research/test-path-issues.md +276 -0
  113. package/review/ALP-76/1-error-type-design.md +962 -0
  114. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  115. package/review/ALP-76/3-error-presentation.md +624 -0
  116. package/review/ALP-76/4-test-coverage.md +625 -0
  117. package/review/ALP-76/5-migration-completeness.md +440 -0
  118. package/review/ALP-76/6-effect-best-practices.md +755 -0
  119. package/scripts/apply-branch-protection.sh +47 -0
  120. package/scripts/branch-protection-templates.json +79 -0
  121. package/scripts/prototype-summarization.ts +346 -0
  122. package/scripts/rebuild-hnswlib.js +32 -37
  123. package/scripts/setup-branch-protection.sh +64 -0
  124. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  125. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  126. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  127. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  128. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  129. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  130. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  131. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  132. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  133. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  134. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  135. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  136. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  137. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  138. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  139. package/src/cli/argv-preprocessor.test.ts +2 -2
  140. package/src/cli/cli.test.ts +230 -33
  141. package/src/cli/commands/config-cmd.ts +642 -0
  142. package/src/cli/commands/context.ts +97 -9
  143. package/src/cli/commands/duplicates.ts +122 -0
  144. package/src/cli/commands/embeddings.ts +529 -0
  145. package/src/cli/commands/index-cmd.ts +210 -30
  146. package/src/cli/commands/index.ts +3 -0
  147. package/src/cli/commands/search.ts +894 -64
  148. package/src/cli/commands/stats.ts +3 -0
  149. package/src/cli/commands/tree.ts +26 -5
  150. package/src/cli/config-layer.ts +176 -0
  151. package/src/cli/error-handler.test.ts +235 -0
  152. package/src/cli/error-handler.ts +655 -0
  153. package/src/cli/flag-schemas.ts +66 -0
  154. package/src/cli/help.ts +209 -7
  155. package/src/cli/main.ts +348 -58
  156. package/src/cli/options.ts +10 -0
  157. package/src/cli/shared-error-handling.ts +199 -0
  158. package/src/cli/utils.ts +150 -17
  159. package/src/config/file-provider.test.ts +320 -0
  160. package/src/config/file-provider.ts +273 -0
  161. package/src/config/index.ts +72 -0
  162. package/src/config/integration.test.ts +667 -0
  163. package/src/config/precedence.test.ts +277 -0
  164. package/src/config/precedence.ts +451 -0
  165. package/src/config/schema.test.ts +414 -0
  166. package/src/config/schema.ts +603 -0
  167. package/src/config/service.test.ts +320 -0
  168. package/src/config/service.ts +243 -0
  169. package/src/config/testing.test.ts +264 -0
  170. package/src/config/testing.ts +110 -0
  171. package/src/core/types.ts +6 -33
  172. package/src/duplicates/detector.test.ts +183 -0
  173. package/src/duplicates/detector.ts +414 -0
  174. package/src/duplicates/index.ts +18 -0
  175. package/src/embeddings/embedding-namespace.test.ts +300 -0
  176. package/src/embeddings/embedding-namespace.ts +947 -0
  177. package/src/embeddings/heading-boost.test.ts +222 -0
  178. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  179. package/src/embeddings/hyde.test.ts +272 -0
  180. package/src/embeddings/hyde.ts +264 -0
  181. package/src/embeddings/index.ts +2 -0
  182. package/src/embeddings/openai-provider.ts +332 -83
  183. package/src/embeddings/pricing.json +22 -0
  184. package/src/embeddings/provider-constants.ts +204 -0
  185. package/src/embeddings/provider-errors.test.ts +967 -0
  186. package/src/embeddings/provider-errors.ts +565 -0
  187. package/src/embeddings/provider-factory.test.ts +240 -0
  188. package/src/embeddings/provider-factory.ts +225 -0
  189. package/src/embeddings/provider-integration.test.ts +788 -0
  190. package/src/embeddings/query-preprocessing.test.ts +187 -0
  191. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  192. package/src/embeddings/semantic-search.ts +780 -93
  193. package/src/embeddings/types.ts +293 -16
  194. package/src/embeddings/vector-store.ts +486 -77
  195. package/src/embeddings/voyage-provider.ts +313 -0
  196. package/src/errors/errors.test.ts +845 -0
  197. package/src/errors/index.ts +533 -0
  198. package/src/index/ignore-patterns.test.ts +354 -0
  199. package/src/index/ignore-patterns.ts +305 -0
  200. package/src/index/indexer.ts +286 -48
  201. package/src/index/storage.ts +94 -30
  202. package/src/index/types.ts +40 -2
  203. package/src/index/watcher.ts +67 -9
  204. package/src/index.ts +22 -0
  205. package/src/integration/search-keyword.test.ts +678 -0
  206. package/src/mcp/server.ts +135 -6
  207. package/src/parser/parser.ts +18 -19
  208. package/src/parser/section-filter.test.ts +277 -0
  209. package/src/parser/section-filter.ts +125 -3
  210. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  211. package/src/search/bm25-store.ts +366 -0
  212. package/src/search/cross-encoder.test.ts +253 -0
  213. package/src/search/cross-encoder.ts +406 -0
  214. package/src/search/fuzzy-search.test.ts +419 -0
  215. package/src/search/fuzzy-search.ts +273 -0
  216. package/src/search/hybrid-search.ts +448 -0
  217. package/src/search/path-matcher.test.ts +276 -0
  218. package/src/search/path-matcher.ts +33 -0
  219. package/src/search/searcher.test.ts +99 -1
  220. package/src/search/searcher.ts +189 -67
  221. package/src/search/wink-bm25.d.ts +30 -0
  222. package/src/summarization/cli-providers/claude.ts +202 -0
  223. package/src/summarization/cli-providers/detection.test.ts +273 -0
  224. package/src/summarization/cli-providers/detection.ts +118 -0
  225. package/src/summarization/cli-providers/index.ts +8 -0
  226. package/src/summarization/cost.test.ts +139 -0
  227. package/src/summarization/cost.ts +102 -0
  228. package/src/summarization/error-handler.test.ts +127 -0
  229. package/src/summarization/error-handler.ts +111 -0
  230. package/src/summarization/index.ts +102 -0
  231. package/src/summarization/pipeline.test.ts +498 -0
  232. package/src/summarization/pipeline.ts +231 -0
  233. package/src/summarization/prompts.test.ts +269 -0
  234. package/src/summarization/prompts.ts +133 -0
  235. package/src/summarization/provider-factory.test.ts +396 -0
  236. package/src/summarization/provider-factory.ts +178 -0
  237. package/src/summarization/types.ts +184 -0
  238. package/src/summarize/summarizer.ts +104 -35
  239. package/src/types/huggingface-transformers.d.ts +66 -0
  240. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  241. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  242. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  243. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
  244. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
  245. package/tests/integration/embed-index.test.ts +712 -0
  246. package/tests/integration/search-context.test.ts +469 -0
  247. package/tests/integration/search-semantic.test.ts +522 -0
  248. package/vitest.config.ts +1 -6
  249. package/AGENTS.md +0 -46
  250. package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
  251. package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
@@ -1,12 +1,28 @@
1
1
  /**
2
2
  * Vector store using hnswlib-node
3
+ *
4
+ * Supports both legacy (flat) and namespaced storage layouts:
5
+ * - Legacy: .mdcontext/vectors.bin, .mdcontext/vectors.meta.bin
6
+ * - Namespaced: .mdcontext/embeddings/{namespace}/vectors.bin, vectors.meta.bin
7
+ *
8
+ * New indexes are written using namespaced storage. Existing legacy indexes
9
+ * continue to be loaded from their original flat locations; this module does
10
+ * not perform automatic migration between layouts.
3
11
  */
4
12
 
5
13
  import * as fs from 'node:fs/promises'
6
14
  import * as path from 'node:path'
15
+ import * as msgpack from '@msgpack/msgpack'
7
16
  import { Effect } from 'effect'
8
17
  import HierarchicalNSW from 'hnswlib-node'
18
+ import { DimensionMismatchError, VectorStoreError } from '../errors/index.js'
9
19
  import { INDEX_DIR } from '../index/types.js'
20
+ import {
21
+ generateNamespace,
22
+ getNamespaceDir,
23
+ getMetaPath as getNamespacedMetaPath,
24
+ getVectorPath as getNamespacedVectorPath,
25
+ } from './embedding-namespace.js'
10
26
  import type { VectorEntry, VectorIndex } from './types.js'
11
27
 
12
28
  // ============================================================================
@@ -14,24 +30,49 @@ import type { VectorEntry, VectorIndex } from './types.js'
14
30
  // ============================================================================
15
31
 
16
32
  const VECTOR_INDEX_FILE = 'vectors.bin'
17
- const VECTOR_META_FILE = 'vectors.meta.json'
33
+ const VECTOR_META_FILE = 'vectors.meta.bin'
18
34
  const INDEX_VERSION = 1
19
35
 
20
36
  // ============================================================================
21
37
  // Vector Store
22
38
  // ============================================================================
23
39
 
40
+ export interface VectorSearchOptions {
41
+ /** efSearch parameter for HNSW (controls recall/speed tradeoff, default: 100) */
42
+ readonly efSearch?: number | undefined
43
+ }
44
+
24
45
  export interface VectorStore {
25
46
  readonly rootPath: string
26
47
  readonly dimensions: number
27
- add(entries: VectorEntry[]): Effect.Effect<void, Error>
48
+ add(entries: VectorEntry[]): Effect.Effect<void, VectorStoreError>
28
49
  search(
29
50
  vector: number[],
30
51
  limit: number,
31
52
  threshold?: number,
32
- ): Effect.Effect<VectorSearchResult[], Error>
33
- save(): Effect.Effect<void, Error>
34
- load(): Effect.Effect<boolean, Error>
53
+ options?: VectorSearchOptions,
54
+ ): Effect.Effect<VectorSearchResult[], VectorStoreError>
55
+ /**
56
+ * Search with additional stats about below-threshold results.
57
+ * Used to provide feedback when 0 results pass the threshold.
58
+ */
59
+ searchWithStats(
60
+ vector: number[],
61
+ limit: number,
62
+ threshold?: number,
63
+ options?: VectorSearchOptions,
64
+ ): Effect.Effect<VectorSearchResultWithStats, VectorStoreError>
65
+ save(): Effect.Effect<void, VectorStoreError>
66
+ /**
67
+ * Load the vector store from disk.
68
+ *
69
+ * @returns VectorStoreLoadResult with loaded status and any warnings
70
+ * @throws DimensionMismatchError if the stored dimensions don't match current provider
71
+ */
72
+ load(): Effect.Effect<
73
+ VectorStoreLoadResult,
74
+ VectorStoreError | DimensionMismatchError
75
+ >
35
76
  getStats(): VectorStoreStats
36
77
  }
37
78
 
@@ -43,14 +84,48 @@ export interface VectorSearchResult {
43
84
  readonly similarity: number
44
85
  }
45
86
 
87
+ /**
88
+ * Extended search result with metadata about below-threshold results.
89
+ * Used to provide user feedback when 0 results pass the threshold.
90
+ */
91
+ export interface VectorSearchResultWithStats {
92
+ readonly results: VectorSearchResult[]
93
+ /** Number of results that were found but below threshold */
94
+ readonly belowThresholdCount: number
95
+ /** Highest similarity score among below-threshold results (if any) */
96
+ readonly belowThresholdHighest: number | null
97
+ }
98
+
46
99
  export interface VectorStoreStats {
47
100
  readonly count: number
48
101
  readonly dimensions: number
49
102
  readonly provider: string
103
+ readonly providerModel?: string | undefined
50
104
  readonly totalCost: number
51
105
  readonly totalTokens: number
52
106
  }
53
107
 
108
+ /**
109
+ * Result of loading a vector store, including any warnings about config mismatches.
110
+ */
111
+ export interface VectorStoreLoadResult {
112
+ /** Whether the index was loaded successfully */
113
+ readonly loaded: boolean
114
+ /** Warning about HNSW parameter mismatch (if any) */
115
+ readonly hnswMismatch?: HnswMismatchWarning | undefined
116
+ }
117
+
118
+ /**
119
+ * Warning when HNSW parameters in config differ from stored index parameters.
120
+ * The index was built with different parameters than currently configured.
121
+ */
122
+ export interface HnswMismatchWarning {
123
+ /** Current config values */
124
+ readonly configParams: { m: number; efConstruction: number }
125
+ /** Values stored in the index */
126
+ readonly indexParams: { m: number; efConstruction: number }
127
+ }
128
+
54
129
  // ============================================================================
55
130
  // Implementation
56
131
  // ============================================================================
@@ -64,24 +139,73 @@ class HnswVectorStore implements VectorStore {
64
139
  private idToIndex: Map<string, number> = new Map()
65
140
  private nextIndex = 0
66
141
  private provider = 'unknown'
142
+ private providerModel: string | undefined = undefined
143
+ private providerBaseURL: string | undefined = undefined
67
144
  private totalCost = 0
68
145
  private totalTokens = 0
69
146
 
70
- constructor(rootPath: string, dimensions: number) {
147
+ // HNSW build parameters
148
+ private readonly hnswM: number
149
+ private readonly hnswEfConstruction: number
150
+
151
+ // Namespace support - when set, uses namespaced storage paths
152
+ private namespace: string | undefined = undefined
153
+
154
+ constructor(
155
+ rootPath: string,
156
+ dimensions: number,
157
+ hnswOptions?: HnswBuildOptions,
158
+ ) {
71
159
  this.rootPath = path.resolve(rootPath)
72
160
  this.dimensions = dimensions
161
+ this.hnswM = hnswOptions?.m ?? 16
162
+ this.hnswEfConstruction = hnswOptions?.efConstruction ?? 200
163
+ }
164
+
165
+ /**
166
+ * Set the namespace for this vector store.
167
+ * When set, all storage operations use the namespaced path.
168
+ */
169
+ setNamespace(namespace: string): void {
170
+ this.namespace = namespace
171
+ }
172
+
173
+ /**
174
+ * Get the current namespace (if any).
175
+ */
176
+ getNamespace(): string | undefined {
177
+ return this.namespace
73
178
  }
74
179
 
180
+ /**
181
+ * Get the index directory path.
182
+ * Returns namespaced path if namespace is set, otherwise legacy path.
183
+ */
75
184
  private getIndexDir(): string {
185
+ if (this.namespace) {
186
+ return getNamespaceDir(this.rootPath, this.namespace)
187
+ }
76
188
  return path.join(this.rootPath, INDEX_DIR)
77
189
  }
78
190
 
191
+ /**
192
+ * Get the vector index file path.
193
+ */
79
194
  private getVectorPath(): string {
80
- return path.join(this.getIndexDir(), VECTOR_INDEX_FILE)
195
+ if (this.namespace) {
196
+ return getNamespacedVectorPath(this.rootPath, this.namespace)
197
+ }
198
+ return path.join(this.rootPath, INDEX_DIR, VECTOR_INDEX_FILE)
81
199
  }
82
200
 
201
+ /**
202
+ * Get the metadata file path.
203
+ */
83
204
  private getMetaPath(): string {
84
- return path.join(this.getIndexDir(), VECTOR_META_FILE)
205
+ if (this.namespace) {
206
+ return getNamespacedMetaPath(this.rootPath, this.namespace)
207
+ }
208
+ return path.join(this.rootPath, INDEX_DIR, VECTOR_META_FILE)
85
209
  }
86
210
 
87
211
  private ensureIndex(): HierarchicalNSW.HierarchicalNSW {
@@ -91,32 +215,41 @@ class HnswVectorStore implements VectorStore {
91
215
  'cosine',
92
216
  this.dimensions,
93
217
  )
94
- this.index.initIndex(10000, 16, 200, 100)
218
+ // Use configured HNSW parameters (M, efConstruction, randomSeed)
219
+ this.index.initIndex(10000, this.hnswM, this.hnswEfConstruction, 100)
95
220
  }
96
221
  return this.index
97
222
  }
98
223
 
99
- add(entries: VectorEntry[]): Effect.Effect<void, Error> {
100
- return Effect.sync(() => {
101
- const index = this.ensureIndex()
224
+ add(entries: VectorEntry[]): Effect.Effect<void, VectorStoreError> {
225
+ return Effect.try({
226
+ try: () => {
227
+ const index = this.ensureIndex()
102
228
 
103
- for (const entry of entries) {
104
- // Skip if already exists
105
- if (this.idToIndex.has(entry.id)) {
106
- continue
107
- }
229
+ for (const entry of entries) {
230
+ // Skip if already exists
231
+ if (this.idToIndex.has(entry.id)) {
232
+ continue
233
+ }
108
234
 
109
- const idx = this.nextIndex++
235
+ const idx = this.nextIndex++
110
236
 
111
- // Resize if needed
112
- if (idx >= index.getMaxElements()) {
113
- index.resizeIndex(index.getMaxElements() * 2)
114
- }
237
+ // Resize if needed
238
+ if (idx >= index.getMaxElements()) {
239
+ index.resizeIndex(index.getMaxElements() * 2)
240
+ }
115
241
 
116
- index.addPoint(entry.embedding as number[], idx)
117
- this.entries.set(idx, entry)
118
- this.idToIndex.set(entry.id, idx)
119
- }
242
+ index.addPoint(entry.embedding as number[], idx)
243
+ this.entries.set(idx, entry)
244
+ this.idToIndex.set(entry.id, idx)
245
+ }
246
+ },
247
+ catch: (e) =>
248
+ new VectorStoreError({
249
+ operation: 'add',
250
+ message: e instanceof Error ? e.message : String(e),
251
+ cause: e,
252
+ }),
120
253
  })
121
254
  }
122
255
 
@@ -124,36 +257,120 @@ class HnswVectorStore implements VectorStore {
124
257
  vector: number[],
125
258
  limit: number,
126
259
  threshold = 0,
127
- ): Effect.Effect<VectorSearchResult[], Error> {
128
- return Effect.sync(() => {
129
- if (!this.index || this.entries.size === 0) {
130
- return []
131
- }
132
-
133
- const result = this.index.searchKnn(
134
- vector,
135
- Math.min(limit, this.entries.size),
136
- )
137
- const results: VectorSearchResult[] = []
260
+ options?: VectorSearchOptions,
261
+ ): Effect.Effect<VectorSearchResult[], VectorStoreError> {
262
+ return Effect.try({
263
+ try: () => {
264
+ if (!this.index || this.entries.size === 0) {
265
+ return []
266
+ }
138
267
 
139
- for (let i = 0; i < result.neighbors.length; i++) {
140
- const idx = result.neighbors[i]
141
- const distance = result.distances[i]
268
+ // Set efSearch if provided (controls recall/speed tradeoff)
269
+ if (options?.efSearch !== undefined) {
270
+ this.index.setEf(options.efSearch)
271
+ }
142
272
 
143
- if (idx === undefined || distance === undefined) {
144
- continue
273
+ const result = this.index.searchKnn(
274
+ vector,
275
+ Math.min(limit, this.entries.size),
276
+ )
277
+ const results: VectorSearchResult[] = []
278
+
279
+ for (let i = 0; i < result.neighbors.length; i++) {
280
+ const idx = result.neighbors[i]
281
+ const distance = result.distances[i]
282
+
283
+ if (idx === undefined || distance === undefined) {
284
+ continue
285
+ }
286
+
287
+ // Convert distance to similarity (cosine distance to cosine similarity)
288
+ // hnswlib returns 1 - cosine_similarity for cosine space
289
+ const similarity = 1 - distance
290
+
291
+ if (similarity < threshold) {
292
+ continue
293
+ }
294
+
295
+ const entry = this.entries.get(idx)
296
+ if (entry) {
297
+ results.push({
298
+ id: entry.id,
299
+ sectionId: entry.sectionId,
300
+ documentPath: entry.documentPath,
301
+ heading: entry.heading,
302
+ similarity,
303
+ })
304
+ }
145
305
  }
146
306
 
147
- // Convert distance to similarity (cosine distance to cosine similarity)
148
- // hnswlib returns 1 - cosine_similarity for cosine space
149
- const similarity = 1 - distance
307
+ return results
308
+ },
309
+ catch: (e) =>
310
+ new VectorStoreError({
311
+ operation: 'search',
312
+ message: e instanceof Error ? e.message : String(e),
313
+ cause: e,
314
+ }),
315
+ })
316
+ }
150
317
 
151
- if (similarity < threshold) {
152
- continue
318
+ searchWithStats(
319
+ vector: number[],
320
+ limit: number,
321
+ threshold = 0,
322
+ options?: VectorSearchOptions,
323
+ ): Effect.Effect<VectorSearchResultWithStats, VectorStoreError> {
324
+ return Effect.try({
325
+ try: () => {
326
+ if (!this.index || this.entries.size === 0) {
327
+ return {
328
+ results: [],
329
+ belowThresholdCount: 0,
330
+ belowThresholdHighest: null,
331
+ }
153
332
  }
154
333
 
155
- const entry = this.entries.get(idx)
156
- if (entry) {
334
+ // Set efSearch if provided (controls recall/speed tradeoff)
335
+ if (options?.efSearch !== undefined) {
336
+ this.index.setEf(options.efSearch)
337
+ }
338
+
339
+ const result = this.index.searchKnn(
340
+ vector,
341
+ Math.min(limit, this.entries.size),
342
+ )
343
+ const results: VectorSearchResult[] = []
344
+ let belowThresholdCount = 0
345
+ let belowThresholdHighest: number | null = null
346
+
347
+ for (let i = 0; i < result.neighbors.length; i++) {
348
+ const idx = result.neighbors[i]
349
+ const distance = result.distances[i]
350
+
351
+ if (idx === undefined || distance === undefined) {
352
+ continue
353
+ }
354
+
355
+ // Convert distance to similarity (cosine distance to cosine similarity)
356
+ // hnswlib returns 1 - cosine_similarity for cosine space
357
+ const similarity = 1 - distance
358
+
359
+ const entry = this.entries.get(idx)
360
+ if (!entry) continue
361
+
362
+ if (similarity < threshold) {
363
+ // Track below-threshold stats
364
+ belowThresholdCount++
365
+ if (
366
+ belowThresholdHighest === null ||
367
+ similarity > belowThresholdHighest
368
+ ) {
369
+ belowThresholdHighest = similarity
370
+ }
371
+ continue
372
+ }
373
+
157
374
  results.push({
158
375
  id: entry.id,
159
376
  sectionId: entry.sectionId,
@@ -162,13 +379,23 @@ class HnswVectorStore implements VectorStore {
162
379
  similarity,
163
380
  })
164
381
  }
165
- }
166
382
 
167
- return results
383
+ return {
384
+ results,
385
+ belowThresholdCount,
386
+ belowThresholdHighest,
387
+ }
388
+ },
389
+ catch: (e) =>
390
+ new VectorStoreError({
391
+ operation: 'search',
392
+ message: e instanceof Error ? e.message : String(e),
393
+ cause: e,
394
+ }),
168
395
  })
169
396
  }
170
397
 
171
- save(): Effect.Effect<void, Error> {
398
+ save(): Effect.Effect<void, VectorStoreError> {
172
399
  return Effect.gen(
173
400
  function* (this: HnswVectorStore) {
174
401
  if (!this.index) {
@@ -176,17 +403,33 @@ class HnswVectorStore implements VectorStore {
176
403
  }
177
404
 
178
405
  const indexDir = this.getIndexDir()
179
- yield* Effect.promise(() => fs.mkdir(indexDir, { recursive: true }))
406
+ yield* Effect.tryPromise({
407
+ try: () => fs.mkdir(indexDir, { recursive: true }),
408
+ catch: (e) =>
409
+ new VectorStoreError({
410
+ operation: 'save',
411
+ message: `Failed to create directory: ${e instanceof Error ? e.message : String(e)}`,
412
+ cause: e,
413
+ }),
414
+ })
180
415
 
181
416
  // Save the hnswlib index
182
- yield* Effect.promise(() =>
183
- this.index!.writeIndex(this.getVectorPath()),
184
- )
417
+ yield* Effect.tryPromise({
418
+ try: () => this.index!.writeIndex(this.getVectorPath()),
419
+ catch: (e) =>
420
+ new VectorStoreError({
421
+ operation: 'save',
422
+ message: `Failed to write index: ${e instanceof Error ? e.message : String(e)}`,
423
+ cause: e,
424
+ }),
425
+ })
185
426
 
186
427
  // Save metadata
187
428
  const meta: VectorIndex = {
188
429
  version: INDEX_VERSION,
189
430
  provider: this.provider,
431
+ providerModel: this.providerModel,
432
+ providerBaseURL: this.providerBaseURL,
190
433
  dimensions: this.dimensions,
191
434
  entries: Object.fromEntries(
192
435
  Array.from(this.entries.entries()).map(([idx, entry]) => [
@@ -198,44 +441,135 @@ class HnswVectorStore implements VectorStore {
198
441
  totalTokens: this.totalTokens,
199
442
  createdAt: new Date().toISOString(),
200
443
  updatedAt: new Date().toISOString(),
444
+ // Store HNSW build parameters for validation on load
445
+ hnswParams: {
446
+ m: this.hnswM,
447
+ efConstruction: this.hnswEfConstruction,
448
+ },
201
449
  }
202
450
 
203
- yield* Effect.promise(() =>
204
- fs.writeFile(this.getMetaPath(), JSON.stringify(meta, null, 2)),
205
- )
451
+ yield* Effect.tryPromise({
452
+ try: async () => {
453
+ // Size validation
454
+ const estimatedSize = this.entries.size * 15000
455
+ if (estimatedSize > 100_000_000) {
456
+ console.warn(
457
+ `Large metadata detected: ~${(estimatedSize / 1e6).toFixed(0)}MB. ` +
458
+ `Consider indexing subdirectories separately.`,
459
+ )
460
+ }
461
+
462
+ // Encode with MessagePack and write
463
+ const encoded = msgpack.encode(meta)
464
+ await fs.writeFile(this.getMetaPath(), encoded)
465
+ },
466
+ catch: (e) =>
467
+ new VectorStoreError({
468
+ operation: 'save',
469
+ message: `Failed to write metadata: ${e instanceof Error ? e.message : String(e)}`,
470
+ cause: e,
471
+ }),
472
+ })
206
473
  }.bind(this),
207
474
  )
208
475
  }
209
476
 
210
- load(): Effect.Effect<boolean, Error> {
477
+ load(): Effect.Effect<
478
+ VectorStoreLoadResult,
479
+ VectorStoreError | DimensionMismatchError
480
+ > {
211
481
  return Effect.gen(
212
482
  function* (this: HnswVectorStore) {
213
483
  const vectorPath = this.getVectorPath()
214
484
  const metaPath = this.getMetaPath()
215
485
 
216
- // Check if files exist
486
+ // Check if files exist - catch file not found gracefully
487
+ // For metadata, check both binary (.bin) and JSON (.json) for migration
217
488
  const filesExist = yield* Effect.tryPromise({
218
489
  try: async () => {
219
490
  await fs.access(vectorPath)
220
- await fs.access(metaPath)
221
- return true
491
+ // Check if either binary or JSON metadata exists
492
+ try {
493
+ await fs.access(metaPath)
494
+ return true
495
+ } catch {
496
+ const jsonPath = metaPath.replace('.bin', '.json')
497
+ await fs.access(jsonPath)
498
+ return true
499
+ }
222
500
  },
223
- catch: () => false as const,
224
- }).pipe(Effect.catchAll(() => Effect.succeed(false)))
501
+ catch: () =>
502
+ new VectorStoreError({
503
+ operation: 'load',
504
+ message: 'Files not found',
505
+ }),
506
+ }).pipe(
507
+ Effect.catchTag('VectorStoreError', () => Effect.succeed(false)),
508
+ )
225
509
 
226
510
  if (!filesExist) {
227
- return false
511
+ return { loaded: false }
228
512
  }
229
513
 
230
- // Load metadata first
231
- const metaContent = yield* Effect.promise(() =>
232
- fs.readFile(metaPath, 'utf-8'),
233
- )
234
- const meta = JSON.parse(metaContent) as VectorIndex
514
+ // Load metadata - try binary first, fall back to JSON for migration
515
+ const loadedMeta = yield* Effect.tryPromise({
516
+ try: async () => {
517
+ // Try binary format first (new)
518
+ try {
519
+ await fs.access(metaPath)
520
+ const buffer = await fs.readFile(metaPath)
521
+ return msgpack.decode(buffer) as VectorIndex
522
+ } catch {
523
+ // Fall back to JSON for migration (old)
524
+ const jsonPath = metaPath.replace('.bin', '.json')
525
+ try {
526
+ await fs.access(jsonPath)
527
+ const json = await fs.readFile(jsonPath, 'utf-8')
528
+ const meta = JSON.parse(json) as VectorIndex
529
+
530
+ // Auto-migrate to binary format (safe for concurrent access)
531
+ try {
532
+ const encoded = msgpack.encode(meta)
533
+ await fs.writeFile(metaPath, encoded)
534
+
535
+ // Remove old JSON file (ignore errors if already deleted by another process)
536
+ await fs.unlink(jsonPath).catch(() => {})
537
+ } catch {
538
+ // Migration failed, but we have the data - continue
539
+ }
540
+
541
+ return meta
542
+ } catch {
543
+ throw new Error('Metadata file not found')
544
+ }
545
+ }
546
+ },
547
+ catch: (e) =>
548
+ new VectorStoreError({
549
+ operation: 'load',
550
+ message: `Failed to read metadata: ${e instanceof Error ? e.message : String(e)}`,
551
+ cause: e,
552
+ }),
553
+ })
554
+
555
+ // Apply legacy index migration: default to 'openai' if provider is missing
556
+ const meta: VectorIndex = {
557
+ ...loadedMeta,
558
+ provider: loadedMeta.provider || 'openai',
559
+ }
235
560
 
236
- // Verify dimensions match
561
+ // Verify dimensions match - fail with clear error if mismatch
237
562
  if (meta.dimensions !== this.dimensions) {
238
- return false
563
+ return yield* Effect.fail(
564
+ new DimensionMismatchError({
565
+ corpusDimensions: meta.dimensions,
566
+ providerDimensions: this.dimensions,
567
+ corpusProvider: meta.providerModel
568
+ ? `${meta.provider}:${meta.providerModel}`
569
+ : meta.provider,
570
+ path: this.rootPath,
571
+ }),
572
+ )
239
573
  }
240
574
 
241
575
  // Load the hnswlib index
@@ -243,7 +577,15 @@ class HnswVectorStore implements VectorStore {
243
577
  'cosine',
244
578
  this.dimensions,
245
579
  )
246
- yield* Effect.promise(() => this.index!.readIndex(vectorPath))
580
+ yield* Effect.tryPromise({
581
+ try: () => this.index!.readIndex(vectorPath),
582
+ catch: (e) =>
583
+ new VectorStoreError({
584
+ operation: 'load',
585
+ message: `Failed to read index: ${e instanceof Error ? e.message : String(e)}`,
586
+ cause: e,
587
+ }),
588
+ })
247
589
 
248
590
  // Restore entries
249
591
  this.entries.clear()
@@ -258,10 +600,28 @@ class HnswVectorStore implements VectorStore {
258
600
  }
259
601
 
260
602
  this.provider = meta.provider
603
+ this.providerModel = meta.providerModel
604
+ this.providerBaseURL = meta.providerBaseURL
261
605
  this.totalCost = meta.totalCost
262
606
  this.totalTokens = meta.totalTokens
263
607
 
264
- return true
608
+ // Check for HNSW parameter mismatch
609
+ let hnswMismatch: HnswMismatchWarning | undefined
610
+ if (meta.hnswParams) {
611
+ const indexM = meta.hnswParams.m
612
+ const indexEf = meta.hnswParams.efConstruction
613
+ if (indexM !== this.hnswM || indexEf !== this.hnswEfConstruction) {
614
+ hnswMismatch = {
615
+ configParams: {
616
+ m: this.hnswM,
617
+ efConstruction: this.hnswEfConstruction,
618
+ },
619
+ indexParams: { m: indexM, efConstruction: indexEf },
620
+ }
621
+ }
622
+ }
623
+
624
+ return { loaded: true, hnswMismatch }
265
625
  }.bind(this),
266
626
  )
267
627
  }
@@ -271,13 +631,16 @@ class HnswVectorStore implements VectorStore {
271
631
  count: this.entries.size,
272
632
  dimensions: this.dimensions,
273
633
  provider: this.provider,
634
+ providerModel: this.providerModel,
274
635
  totalCost: this.totalCost,
275
636
  totalTokens: this.totalTokens,
276
637
  }
277
638
  }
278
639
 
279
- setProvider(name: string): void {
640
+ setProvider(name: string, model?: string, baseURL?: string): void {
280
641
  this.provider = name
642
+ this.providerModel = model
643
+ this.providerBaseURL = baseURL
281
644
  }
282
645
 
283
646
  addCost(cost: number, tokens: number): void {
@@ -290,10 +653,56 @@ class HnswVectorStore implements VectorStore {
290
653
  // Factory
291
654
  // ============================================================================
292
655
 
656
+ /**
657
+ * HNSW build parameters for index construction.
658
+ * These affect index quality and build time - changes require index rebuild.
659
+ */
660
+ export interface HnswBuildOptions {
661
+ /** Max connections per node (default: 16). Higher = better recall, larger index. */
662
+ readonly m?: number | undefined
663
+ /** Construction-time search width (default: 200). Higher = better quality, slower builds. */
664
+ readonly efConstruction?: number | undefined
665
+ }
666
+
667
+ /**
668
+ * Create a vector store for the given root path.
669
+ *
670
+ * @param rootPath - Root directory containing the index
671
+ * @param dimensions - Embedding dimensions
672
+ * @param hnswOptions - Optional HNSW build parameters
673
+ * @returns A new VectorStore instance
674
+ */
293
675
  export const createVectorStore = (
294
676
  rootPath: string,
295
677
  dimensions: number,
296
- ): VectorStore => new HnswVectorStore(rootPath, dimensions)
678
+ hnswOptions?: HnswBuildOptions,
679
+ ): VectorStore => new HnswVectorStore(rootPath, dimensions, hnswOptions)
680
+
681
+ /**
682
+ * Create a namespaced vector store for a specific provider/model.
683
+ *
684
+ * Uses the new namespaced storage structure:
685
+ * .mdcontext/embeddings/{provider}_{model}_{dimensions}/vectors.bin
686
+ *
687
+ * @param rootPath - Root directory containing the index
688
+ * @param provider - Provider name (e.g., "openai", "voyage")
689
+ * @param model - Model name (e.g., "text-embedding-3-small")
690
+ * @param dimensions - Embedding dimensions
691
+ * @param hnswOptions - Optional HNSW build parameters
692
+ * @returns A new VectorStore instance with namespace set
693
+ */
694
+ export const createNamespacedVectorStore = (
695
+ rootPath: string,
696
+ provider: string,
697
+ model: string,
698
+ dimensions: number,
699
+ hnswOptions?: HnswBuildOptions,
700
+ ): VectorStore => {
701
+ const namespace = generateNamespace(provider, model, dimensions)
702
+ const store = new HnswVectorStore(rootPath, dimensions, hnswOptions)
703
+ store.setNamespace(namespace)
704
+ return store
705
+ }
297
706
 
298
707
  // Export the class for type access
299
708
  export { HnswVectorStore }