mdcontext 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. package/.changeset/config.json +9 -9
  2. package/.claude/settings.local.json +25 -0
  3. package/.github/workflows/claude-code-review.yml +44 -0
  4. package/.github/workflows/claude.yml +85 -0
  5. package/CONTRIBUTING.md +186 -0
  6. package/NOTES/NOTES +44 -0
  7. package/README.md +206 -3
  8. package/biome.json +1 -1
  9. package/dist/chunk-23UPXDNL.js +3044 -0
  10. package/dist/chunk-2W7MO2DL.js +1366 -0
  11. package/dist/chunk-3NUAZGMA.js +1689 -0
  12. package/dist/chunk-7TOWB2XB.js +366 -0
  13. package/dist/chunk-7XOTOADQ.js +3065 -0
  14. package/dist/chunk-AH2PDM2K.js +3042 -0
  15. package/dist/chunk-BNXWSZ63.js +3742 -0
  16. package/dist/chunk-BTL5DJVU.js +3222 -0
  17. package/dist/chunk-HDHYG7E4.js +104 -0
  18. package/dist/chunk-HLR4KZBP.js +3234 -0
  19. package/dist/chunk-IP3FRFEB.js +1045 -0
  20. package/dist/chunk-KHU56VDO.js +3042 -0
  21. package/dist/chunk-KRYIFLQR.js +85 -89
  22. package/dist/chunk-LBSDNLEM.js +287 -0
  23. package/dist/chunk-MNTQ7HCP.js +2643 -0
  24. package/dist/chunk-MUJELQQ6.js +1387 -0
  25. package/dist/chunk-MXJGMSLV.js +2199 -0
  26. package/dist/chunk-N6QJGC3Z.js +2636 -0
  27. package/dist/chunk-OBELGBPM.js +1713 -0
  28. package/dist/chunk-OT7R5XTA.js +3192 -0
  29. package/dist/chunk-P7X4RA2T.js +106 -0
  30. package/dist/chunk-PIDUQNC2.js +3185 -0
  31. package/dist/chunk-POGCDIH4.js +3187 -0
  32. package/dist/chunk-PSIEOQGZ.js +3043 -0
  33. package/dist/chunk-PVRT3IHA.js +3238 -0
  34. package/dist/chunk-QNN4TT23.js +1430 -0
  35. package/dist/chunk-RE3R45RJ.js +3042 -0
  36. package/dist/chunk-S7E6TFX6.js +718 -657
  37. package/dist/chunk-SG6GLU4U.js +1378 -0
  38. package/dist/chunk-SJCDV2ST.js +274 -0
  39. package/dist/chunk-SYE5XLF3.js +104 -0
  40. package/dist/chunk-T5VLYBZD.js +103 -0
  41. package/dist/chunk-TOQB7VWU.js +3238 -0
  42. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  43. package/dist/chunk-VVTGZNBT.js +1533 -1423
  44. package/dist/chunk-W7Q4RFEV.js +104 -0
  45. package/dist/chunk-XTYYVRLO.js +3190 -0
  46. package/dist/chunk-Y6MDYVJD.js +3063 -0
  47. package/dist/cli/main.js +4072 -629
  48. package/dist/index.d.ts +420 -33
  49. package/dist/index.js +8 -15
  50. package/dist/mcp/server.js +103 -7
  51. package/dist/schema-BAWSG7KY.js +22 -0
  52. package/dist/schema-E3QUPL26.js +20 -0
  53. package/dist/schema-EHL7WUT6.js +20 -0
  54. package/docs/019-USAGE.md +44 -5
  55. package/docs/020-current-implementation.md +8 -8
  56. package/docs/021-DOGFOODING-FINDINGS.md +1 -1
  57. package/docs/CONFIG.md +1123 -0
  58. package/docs/ERRORS.md +383 -0
  59. package/docs/summarization.md +320 -0
  60. package/justfile +40 -0
  61. package/package.json +39 -33
  62. package/research/INDEX.md +315 -0
  63. package/research/code-review/README.md +90 -0
  64. package/research/code-review/cli-error-handling-review.md +979 -0
  65. package/research/code-review/code-review-validation-report.md +464 -0
  66. package/research/code-review/main-ts-review.md +1128 -0
  67. package/research/config-docs/SUMMARY.md +357 -0
  68. package/research/config-docs/TEST-RESULTS.md +776 -0
  69. package/research/config-docs/TODO.md +542 -0
  70. package/research/config-docs/analysis.md +744 -0
  71. package/research/config-docs/fix-validation.md +502 -0
  72. package/research/config-docs/help-audit.md +264 -0
  73. package/research/config-docs/help-system-analysis.md +890 -0
  74. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  75. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  76. package/research/issue-review.md +603 -0
  77. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  78. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  79. package/research/llm-summarization/anthropic-2026.md +367 -0
  80. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  81. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  82. package/research/llm-summarization/openai-2026.md +473 -0
  83. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  84. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  85. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  86. package/research/llm-summarization/prototype-results.md +56 -0
  87. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  88. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  89. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  90. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  91. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  92. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  93. package/research/mdcontext-pudding/02-search.md +970 -0
  94. package/research/mdcontext-pudding/03-context.md +779 -0
  95. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  96. package/research/mdcontext-pudding/04-tree.md +704 -0
  97. package/research/mdcontext-pudding/05-config.md +1038 -0
  98. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  99. package/research/mdcontext-pudding/06-links.md +679 -0
  100. package/research/mdcontext-pudding/07-stats.md +693 -0
  101. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  102. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  103. package/research/mdcontext-pudding/README.md +168 -0
  104. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  105. package/research/research-quality-review.md +834 -0
  106. package/research/semantic-search/embedding-text-analysis.md +156 -0
  107. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  108. package/research/semantic-search/query-processing-analysis.md +207 -0
  109. package/research/semantic-search/root-cause-and-solution.md +114 -0
  110. package/research/semantic-search/threshold-validation-report.md +69 -0
  111. package/research/semantic-search/vector-search-analysis.md +63 -0
  112. package/research/test-path-issues.md +276 -0
  113. package/review/ALP-76/1-error-type-design.md +962 -0
  114. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  115. package/review/ALP-76/3-error-presentation.md +624 -0
  116. package/review/ALP-76/4-test-coverage.md +625 -0
  117. package/review/ALP-76/5-migration-completeness.md +440 -0
  118. package/review/ALP-76/6-effect-best-practices.md +755 -0
  119. package/scripts/apply-branch-protection.sh +47 -0
  120. package/scripts/branch-protection-templates.json +79 -0
  121. package/scripts/prototype-summarization.ts +346 -0
  122. package/scripts/rebuild-hnswlib.js +32 -37
  123. package/scripts/setup-branch-protection.sh +64 -0
  124. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  125. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  126. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  127. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  128. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  129. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  130. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  131. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  132. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  133. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  134. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  135. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  136. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  137. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  138. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  139. package/src/cli/argv-preprocessor.test.ts +2 -2
  140. package/src/cli/cli.test.ts +230 -33
  141. package/src/cli/commands/config-cmd.ts +642 -0
  142. package/src/cli/commands/context.ts +97 -9
  143. package/src/cli/commands/duplicates.ts +122 -0
  144. package/src/cli/commands/embeddings.ts +529 -0
  145. package/src/cli/commands/index-cmd.ts +210 -30
  146. package/src/cli/commands/index.ts +3 -0
  147. package/src/cli/commands/search.ts +894 -64
  148. package/src/cli/commands/stats.ts +3 -0
  149. package/src/cli/commands/tree.ts +26 -5
  150. package/src/cli/config-layer.ts +176 -0
  151. package/src/cli/error-handler.test.ts +235 -0
  152. package/src/cli/error-handler.ts +655 -0
  153. package/src/cli/flag-schemas.ts +66 -0
  154. package/src/cli/help.ts +209 -7
  155. package/src/cli/main.ts +348 -58
  156. package/src/cli/options.ts +10 -0
  157. package/src/cli/shared-error-handling.ts +199 -0
  158. package/src/cli/utils.ts +150 -17
  159. package/src/config/file-provider.test.ts +320 -0
  160. package/src/config/file-provider.ts +273 -0
  161. package/src/config/index.ts +72 -0
  162. package/src/config/integration.test.ts +667 -0
  163. package/src/config/precedence.test.ts +277 -0
  164. package/src/config/precedence.ts +451 -0
  165. package/src/config/schema.test.ts +414 -0
  166. package/src/config/schema.ts +603 -0
  167. package/src/config/service.test.ts +320 -0
  168. package/src/config/service.ts +243 -0
  169. package/src/config/testing.test.ts +264 -0
  170. package/src/config/testing.ts +110 -0
  171. package/src/core/types.ts +6 -33
  172. package/src/duplicates/detector.test.ts +183 -0
  173. package/src/duplicates/detector.ts +414 -0
  174. package/src/duplicates/index.ts +18 -0
  175. package/src/embeddings/embedding-namespace.test.ts +300 -0
  176. package/src/embeddings/embedding-namespace.ts +947 -0
  177. package/src/embeddings/heading-boost.test.ts +222 -0
  178. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  179. package/src/embeddings/hyde.test.ts +272 -0
  180. package/src/embeddings/hyde.ts +264 -0
  181. package/src/embeddings/index.ts +2 -0
  182. package/src/embeddings/openai-provider.ts +332 -83
  183. package/src/embeddings/pricing.json +22 -0
  184. package/src/embeddings/provider-constants.ts +204 -0
  185. package/src/embeddings/provider-errors.test.ts +967 -0
  186. package/src/embeddings/provider-errors.ts +565 -0
  187. package/src/embeddings/provider-factory.test.ts +240 -0
  188. package/src/embeddings/provider-factory.ts +225 -0
  189. package/src/embeddings/provider-integration.test.ts +788 -0
  190. package/src/embeddings/query-preprocessing.test.ts +187 -0
  191. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  192. package/src/embeddings/semantic-search.ts +780 -93
  193. package/src/embeddings/types.ts +293 -16
  194. package/src/embeddings/vector-store.ts +486 -77
  195. package/src/embeddings/voyage-provider.ts +313 -0
  196. package/src/errors/errors.test.ts +845 -0
  197. package/src/errors/index.ts +533 -0
  198. package/src/index/ignore-patterns.test.ts +354 -0
  199. package/src/index/ignore-patterns.ts +305 -0
  200. package/src/index/indexer.ts +286 -48
  201. package/src/index/storage.ts +94 -30
  202. package/src/index/types.ts +40 -2
  203. package/src/index/watcher.ts +67 -9
  204. package/src/index.ts +22 -0
  205. package/src/integration/search-keyword.test.ts +678 -0
  206. package/src/mcp/server.ts +135 -6
  207. package/src/parser/parser.ts +18 -19
  208. package/src/parser/section-filter.test.ts +277 -0
  209. package/src/parser/section-filter.ts +125 -3
  210. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  211. package/src/search/bm25-store.ts +366 -0
  212. package/src/search/cross-encoder.test.ts +253 -0
  213. package/src/search/cross-encoder.ts +406 -0
  214. package/src/search/fuzzy-search.test.ts +419 -0
  215. package/src/search/fuzzy-search.ts +273 -0
  216. package/src/search/hybrid-search.ts +448 -0
  217. package/src/search/path-matcher.test.ts +276 -0
  218. package/src/search/path-matcher.ts +33 -0
  219. package/src/search/searcher.test.ts +99 -1
  220. package/src/search/searcher.ts +189 -67
  221. package/src/search/wink-bm25.d.ts +30 -0
  222. package/src/summarization/cli-providers/claude.ts +202 -0
  223. package/src/summarization/cli-providers/detection.test.ts +273 -0
  224. package/src/summarization/cli-providers/detection.ts +118 -0
  225. package/src/summarization/cli-providers/index.ts +8 -0
  226. package/src/summarization/cost.test.ts +139 -0
  227. package/src/summarization/cost.ts +102 -0
  228. package/src/summarization/error-handler.test.ts +127 -0
  229. package/src/summarization/error-handler.ts +111 -0
  230. package/src/summarization/index.ts +102 -0
  231. package/src/summarization/pipeline.test.ts +498 -0
  232. package/src/summarization/pipeline.ts +231 -0
  233. package/src/summarization/prompts.test.ts +269 -0
  234. package/src/summarization/prompts.ts +133 -0
  235. package/src/summarization/provider-factory.test.ts +396 -0
  236. package/src/summarization/provider-factory.ts +178 -0
  237. package/src/summarization/types.ts +184 -0
  238. package/src/summarize/summarizer.ts +104 -35
  239. package/src/types/huggingface-transformers.d.ts +66 -0
  240. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  241. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  242. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  243. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
  244. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
  245. package/tests/integration/embed-index.test.ts +712 -0
  246. package/tests/integration/search-context.test.ts +469 -0
  247. package/tests/integration/search-semantic.test.ts +522 -0
  248. package/vitest.config.ts +1 -6
  249. package/AGENTS.md +0 -46
  250. package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
  251. package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
@@ -6,10 +6,43 @@
6
6
  // Embedding Provider
7
7
  // ============================================================================
8
8
 
9
+ export interface BatchProgress {
10
+ readonly batchIndex: number
11
+ readonly totalBatches: number
12
+ readonly processedTexts: number
13
+ readonly totalTexts: number
14
+ }
15
+
16
+ export interface EmbedOptions {
17
+ readonly onBatchProgress?: ((progress: BatchProgress) => void) | undefined
18
+ }
19
+
9
20
  export interface EmbeddingProvider {
10
21
  readonly name: string
11
22
  readonly dimensions: number
12
- embed(texts: string[]): Promise<EmbeddingResult>
23
+ embed(texts: string[], options?: EmbedOptions): Promise<EmbeddingResult>
24
+ }
25
+
26
+ /**
27
+ * Extended embedding provider with metadata about the underlying service.
28
+ * Implementations like OpenAIProvider include these additional properties.
29
+ */
30
+ export interface EmbeddingProviderWithMetadata extends EmbeddingProvider {
31
+ readonly model: string
32
+ readonly baseURL: string | undefined
33
+ }
34
+
35
+ /**
36
+ * Type guard to check if an EmbeddingProvider has extended metadata.
37
+ * Use this instead of unsafe type casting when accessing model/baseURL.
38
+ */
39
+ export const hasProviderMetadata = (
40
+ provider: EmbeddingProvider,
41
+ ): provider is EmbeddingProviderWithMetadata => {
42
+ return (
43
+ 'model' in provider &&
44
+ typeof (provider as EmbeddingProviderWithMetadata).model === 'string'
45
+ )
13
46
  }
14
47
 
15
48
  export interface EmbeddingResult {
@@ -33,14 +66,56 @@ export interface VectorEntry {
33
66
  export interface VectorIndex {
34
67
  readonly version: number
35
68
  readonly provider: string
69
+ readonly providerModel?: string | undefined
70
+ readonly providerBaseURL?: string | undefined
36
71
  readonly dimensions: number
37
72
  readonly entries: Record<string, VectorEntry>
38
73
  readonly totalCost: number
39
74
  readonly totalTokens: number
40
75
  readonly createdAt: string
41
76
  readonly updatedAt: string
77
+ /**
78
+ * HNSW index build parameters (stored for validation on load).
79
+ * These affect index quality and build time - changes require rebuild.
80
+ */
81
+ readonly hnswParams?: HnswIndexParams | undefined
82
+ }
83
+
84
+ /**
85
+ * HNSW index parameters stored in metadata.
86
+ * Used to detect config/index mismatches and recommend rebuilds.
87
+ */
88
+ export interface HnswIndexParams {
89
+ /** Max connections per node (M parameter). Default: 16 */
90
+ readonly m: number
91
+ /** Construction-time search width. Default: 200 */
92
+ readonly efConstruction: number
42
93
  }
43
94
 
95
+ // ============================================================================
96
+ // Quality Modes
97
+ // ============================================================================
98
+
99
+ /**
100
+ * Search quality modes for HNSW efSearch parameter.
101
+ * Higher efSearch values give better recall at the cost of speed.
102
+ *
103
+ * - 'fast': efSearch=64, ~40% faster, slight recall reduction
104
+ * - 'balanced': efSearch=100 (default), good balance
105
+ * - 'thorough': efSearch=256, ~30% slower, best recall
106
+ */
107
+ export type SearchQuality = 'fast' | 'balanced' | 'thorough'
108
+
109
+ /**
110
+ * efSearch values for each quality mode.
111
+ * These control the size of the dynamic candidate list during search.
112
+ */
113
+ export const QUALITY_EF_SEARCH: Record<SearchQuality, number> = {
114
+ fast: 64,
115
+ balanced: 100,
116
+ thorough: 256,
117
+ } as const
118
+
44
119
  // ============================================================================
45
120
  // Semantic Search
46
121
  // ============================================================================
@@ -52,6 +127,183 @@ export interface SemanticSearchOptions {
52
127
  readonly threshold?: number | undefined
53
128
  /** Filter by document path pattern */
54
129
  readonly pathPattern?: string | undefined
130
+ /** Search quality mode: fast, balanced (default), or thorough */
131
+ readonly quality?: SearchQuality | undefined
132
+ /** Provider configuration override */
133
+ readonly providerConfig?:
134
+ | {
135
+ readonly provider:
136
+ | 'openai'
137
+ | 'ollama'
138
+ | 'lm-studio'
139
+ | 'openrouter'
140
+ | 'voyage'
141
+ readonly baseURL?: string | undefined
142
+ readonly model?: string | undefined
143
+ }
144
+ | undefined
145
+ /**
146
+ * Skip query preprocessing (normalize, lowercase, strip punctuation).
147
+ * Default: false (preprocessing enabled for better recall).
148
+ * Set to true for exact query matching.
149
+ */
150
+ readonly skipPreprocessing?: boolean | undefined
151
+ /**
152
+ * Boost results where query terms appear in section headings.
153
+ * Improves navigation queries like "installation guide" or "API reference".
154
+ * Default: true (heading boost enabled).
155
+ */
156
+ readonly headingBoost?: boolean | undefined
157
+ /**
158
+ * Use HyDE (Hypothetical Document Embeddings) for query expansion.
159
+ * Generates a hypothetical document answering the query using an LLM,
160
+ * then searches using that document's embedding.
161
+ *
162
+ * Best for: complex questions, "how to" queries, ambiguous searches
163
+ * Adds: ~1-2s latency, LLM API cost
164
+ * Improvement: 10-30% better recall on complex queries
165
+ *
166
+ * Default: false (disabled)
167
+ */
168
+ readonly hyde?: boolean | undefined
169
+ /**
170
+ * HyDE configuration options (only used when hyde: true).
171
+ */
172
+ readonly hydeOptions?:
173
+ | {
174
+ /** Model for hypothetical document generation. Default: gpt-4o-mini */
175
+ readonly model?: string | undefined
176
+ /** Max tokens for generation. Default: 256 */
177
+ readonly maxTokens?: number | undefined
178
+ /** Generation temperature (0-1). Default: 0.3 */
179
+ readonly temperature?: number | undefined
180
+ }
181
+ | undefined
182
+ /** Lines of context before matches */
183
+ readonly contextBefore?: number | undefined
184
+ /** Lines of context after matches */
185
+ readonly contextAfter?: number | undefined
186
+ }
187
+
188
+ // ============================================================================
189
+ // Query Preprocessing
190
+ // ============================================================================
191
+
192
+ // ============================================================================
193
+ // Heading Boost
194
+ // ============================================================================
195
+
196
+ /** Boost factor per matched term in heading (0.05 = 5% boost per term) */
197
+ const HEADING_BOOST_FACTOR = 0.05
198
+
199
+ /** Boost factor for important files like README (0.03 = 3% boost) */
200
+ const FILE_IMPORTANCE_BOOST = 0.03
201
+
202
+ /**
203
+ * Important file patterns that get ranking boost.
204
+ * These are typically entry points or high-value documentation.
205
+ */
206
+ const IMPORTANT_FILE_PATTERNS = [
207
+ /^readme\.md$/i, // Root README
208
+ /\/readme\.md$/i, // Nested README
209
+ /^index\.md$/i, // Index files
210
+ /\/index\.md$/i,
211
+ /^getting-?started/i, // Getting started guides
212
+ /\/getting-?started/i,
213
+ /^introduction/i, // Introductions
214
+ /\/introduction/i,
215
+ /^overview/i, // Overviews
216
+ /\/overview/i,
217
+ /^quickstart/i, // Quickstart guides
218
+ /\/quickstart/i,
219
+ /^changelog\.md$/i, // Changelogs (useful for "what changed" queries)
220
+ /\/changelog\.md$/i,
221
+ ]
222
+
223
+ /**
224
+ * Calculate file importance boost for a search result.
225
+ * Boosts results from important files like README, index, getting-started.
226
+ *
227
+ * @param documentPath - Path to the document
228
+ * @returns Boost value to add to similarity score (0.0 to 0.03)
229
+ */
230
+ export const calculateFileImportanceBoost = (documentPath: string): number => {
231
+ const isImportant = IMPORTANT_FILE_PATTERNS.some((pattern) =>
232
+ pattern.test(documentPath),
233
+ )
234
+ return isImportant ? FILE_IMPORTANCE_BOOST : 0
235
+ }
236
+
237
+ /**
238
+ * Calculate heading match boost for a search result.
239
+ * Boosts results where query terms appear in section headings.
240
+ *
241
+ * @param heading - Section heading to check
242
+ * @param query - Original search query (will be normalized)
243
+ * @returns Boost value to add to similarity score (0.0 to ~0.15 typically)
244
+ */
245
+ export const calculateHeadingBoost = (
246
+ heading: string,
247
+ query: string,
248
+ ): number => {
249
+ const queryTerms = query.toLowerCase().split(/\s+/).filter(Boolean)
250
+ if (queryTerms.length === 0) return 0
251
+
252
+ const headingLower = heading.toLowerCase()
253
+ const matchCount = queryTerms.filter((term) =>
254
+ headingLower.includes(term),
255
+ ).length
256
+
257
+ return matchCount * HEADING_BOOST_FACTOR
258
+ }
259
+
260
+ /**
261
+ * Calculate combined ranking boost for a search result.
262
+ * Combines heading match boost and file importance boost.
263
+ *
264
+ * @param heading - Section heading
265
+ * @param query - Search query
266
+ * @param documentPath - Path to the document
267
+ * @returns Combined boost value (0.0 to ~0.18 typically)
268
+ */
269
+ export const calculateRankingBoost = (
270
+ heading: string,
271
+ query: string,
272
+ documentPath: string,
273
+ ): number => {
274
+ const headingBoost = calculateHeadingBoost(heading, query)
275
+ const fileBoost = calculateFileImportanceBoost(documentPath)
276
+ return headingBoost + fileBoost
277
+ }
278
+
279
+ // ============================================================================
280
+ // Query Preprocessing
281
+ // ============================================================================
282
+
283
+ /**
284
+ * Preprocess a search query before embedding to reduce noise and improve recall.
285
+ *
286
+ * Transformations applied:
287
+ * - Convert to lowercase (embeddings are case-insensitive)
288
+ * - Replace punctuation with spaces (preserves word boundaries)
289
+ * - Collapse multiple spaces to single space
290
+ * - Trim leading/trailing whitespace
291
+ *
292
+ * This provides 2-5% precision improvement for most queries.
293
+ *
294
+ * @param query - Raw search query
295
+ * @returns Normalized query string
296
+ */
297
+ export const preprocessQuery = (query: string): string => {
298
+ return (
299
+ query
300
+ .toLowerCase()
301
+ // Replace punctuation with spaces (preserves word boundaries)
302
+ .replace(/[^\w\s]/g, ' ')
303
+ // Collapse multiple spaces
304
+ .replace(/\s+/g, ' ')
305
+ .trim()
306
+ )
55
307
  }
56
308
 
57
309
  export interface SemanticSearchResult {
@@ -60,23 +312,48 @@ export interface SemanticSearchResult {
60
312
  readonly heading: string
61
313
  readonly similarity: number
62
314
  readonly content?: string | undefined
315
+ /** Context lines with their line numbers (when context is requested) */
316
+ readonly contextLines?: readonly ContextLine[] | undefined
63
317
  }
64
318
 
65
- // ============================================================================
66
- // Errors
67
- // ============================================================================
319
+ export interface ContextLine {
320
+ /** The line number (1-based) */
321
+ readonly lineNumber: number
322
+ /** The line text */
323
+ readonly line: string
324
+ /**
325
+ * Whether this line is part of the matched result.
326
+ *
327
+ * - For keyword search: true when the line directly matches the query.
328
+ * - For semantic/hybrid search: true when the line lies within the
329
+ * selected/matched section span, even if it is not a direct text match.
330
+ */
331
+ readonly isMatch: boolean
332
+ }
68
333
 
69
- export interface EmbedError {
70
- readonly _tag: 'EmbedError'
71
- readonly cause: 'RateLimit' | 'ApiKey' | 'Network' | 'Unknown'
72
- readonly message: string
334
+ /**
335
+ * Extended semantic search result including metadata about below-threshold results.
336
+ * Used to provide user feedback when 0 results pass the threshold.
337
+ */
338
+ export interface SemanticSearchResultWithStats {
339
+ readonly results: readonly SemanticSearchResult[]
340
+ /** Number of results found below threshold (only set when includeBelowThresholdStats is true) */
341
+ readonly belowThresholdCount?: number | undefined
342
+ /** Highest similarity among below-threshold results */
343
+ readonly belowThresholdHighest?: number | undefined
344
+ /** Total results available above threshold before limit was applied */
345
+ readonly totalAvailable?: number | undefined
73
346
  }
74
347
 
75
- export const embedError = (
76
- cause: EmbedError['cause'],
77
- message: string,
78
- ): EmbedError => ({
79
- _tag: 'EmbedError',
80
- cause,
81
- message,
82
- })
348
+ // ============================================================================
349
+ // Errors
350
+ // ============================================================================
351
+ // NOTE: Embedding-related errors are defined in src/errors/index.ts:
352
+ // - EmbeddingError: For embedding operation failures (rate limits, quota, network)
353
+ // - ApiKeyMissingError: For missing API keys
354
+ // - ApiKeyInvalidError: For invalid/rejected API keys
355
+ //
356
+ // Use these centralized error types instead of defining errors here.
357
+ // Example:
358
+ // import { EmbeddingError } from '../errors/index.js'
359
+ // new EmbeddingError({ reason: 'RateLimit', message: 'Rate limited' })