mdcontext 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. package/.changeset/config.json +9 -9
  2. package/.claude/settings.local.json +25 -0
  3. package/.github/workflows/claude-code-review.yml +44 -0
  4. package/.github/workflows/claude.yml +85 -0
  5. package/CONTRIBUTING.md +186 -0
  6. package/NOTES/NOTES +44 -0
  7. package/README.md +206 -3
  8. package/biome.json +1 -1
  9. package/dist/chunk-23UPXDNL.js +3044 -0
  10. package/dist/chunk-2W7MO2DL.js +1366 -0
  11. package/dist/chunk-3NUAZGMA.js +1689 -0
  12. package/dist/chunk-7TOWB2XB.js +366 -0
  13. package/dist/chunk-7XOTOADQ.js +3065 -0
  14. package/dist/chunk-AH2PDM2K.js +3042 -0
  15. package/dist/chunk-BNXWSZ63.js +3742 -0
  16. package/dist/chunk-BTL5DJVU.js +3222 -0
  17. package/dist/chunk-HDHYG7E4.js +104 -0
  18. package/dist/chunk-HLR4KZBP.js +3234 -0
  19. package/dist/chunk-IP3FRFEB.js +1045 -0
  20. package/dist/chunk-KHU56VDO.js +3042 -0
  21. package/dist/chunk-KRYIFLQR.js +85 -89
  22. package/dist/chunk-LBSDNLEM.js +287 -0
  23. package/dist/chunk-MNTQ7HCP.js +2643 -0
  24. package/dist/chunk-MUJELQQ6.js +1387 -0
  25. package/dist/chunk-MXJGMSLV.js +2199 -0
  26. package/dist/chunk-N6QJGC3Z.js +2636 -0
  27. package/dist/chunk-OBELGBPM.js +1713 -0
  28. package/dist/chunk-OT7R5XTA.js +3192 -0
  29. package/dist/chunk-P7X4RA2T.js +106 -0
  30. package/dist/chunk-PIDUQNC2.js +3185 -0
  31. package/dist/chunk-POGCDIH4.js +3187 -0
  32. package/dist/chunk-PSIEOQGZ.js +3043 -0
  33. package/dist/chunk-PVRT3IHA.js +3238 -0
  34. package/dist/chunk-QNN4TT23.js +1430 -0
  35. package/dist/chunk-RE3R45RJ.js +3042 -0
  36. package/dist/chunk-S7E6TFX6.js +718 -657
  37. package/dist/chunk-SG6GLU4U.js +1378 -0
  38. package/dist/chunk-SJCDV2ST.js +274 -0
  39. package/dist/chunk-SYE5XLF3.js +104 -0
  40. package/dist/chunk-T5VLYBZD.js +103 -0
  41. package/dist/chunk-TOQB7VWU.js +3238 -0
  42. package/dist/chunk-VFNMZ4ZQ.js +3228 -0
  43. package/dist/chunk-VVTGZNBT.js +1533 -1423
  44. package/dist/chunk-W7Q4RFEV.js +104 -0
  45. package/dist/chunk-XTYYVRLO.js +3190 -0
  46. package/dist/chunk-Y6MDYVJD.js +3063 -0
  47. package/dist/cli/main.js +4072 -629
  48. package/dist/index.d.ts +420 -33
  49. package/dist/index.js +8 -15
  50. package/dist/mcp/server.js +103 -7
  51. package/dist/schema-BAWSG7KY.js +22 -0
  52. package/dist/schema-E3QUPL26.js +20 -0
  53. package/dist/schema-EHL7WUT6.js +20 -0
  54. package/docs/019-USAGE.md +44 -5
  55. package/docs/020-current-implementation.md +8 -8
  56. package/docs/021-DOGFOODING-FINDINGS.md +1 -1
  57. package/docs/CONFIG.md +1123 -0
  58. package/docs/ERRORS.md +383 -0
  59. package/docs/summarization.md +320 -0
  60. package/justfile +40 -0
  61. package/package.json +39 -33
  62. package/research/INDEX.md +315 -0
  63. package/research/code-review/README.md +90 -0
  64. package/research/code-review/cli-error-handling-review.md +979 -0
  65. package/research/code-review/code-review-validation-report.md +464 -0
  66. package/research/code-review/main-ts-review.md +1128 -0
  67. package/research/config-docs/SUMMARY.md +357 -0
  68. package/research/config-docs/TEST-RESULTS.md +776 -0
  69. package/research/config-docs/TODO.md +542 -0
  70. package/research/config-docs/analysis.md +744 -0
  71. package/research/config-docs/fix-validation.md +502 -0
  72. package/research/config-docs/help-audit.md +264 -0
  73. package/research/config-docs/help-system-analysis.md +890 -0
  74. package/research/frontmatter/COMMENTS-ARE-SKIPPED.md +149 -0
  75. package/research/frontmatter/LLM-CODE-NAVIGATION.md +276 -0
  76. package/research/issue-review.md +603 -0
  77. package/research/llm-summarization/agent-cli-tools-2026.md +1082 -0
  78. package/research/llm-summarization/alternative-providers-2026.md +1428 -0
  79. package/research/llm-summarization/anthropic-2026.md +367 -0
  80. package/research/llm-summarization/claude-cli-integration.md +1706 -0
  81. package/research/llm-summarization/cli-integration-patterns.md +3155 -0
  82. package/research/llm-summarization/openai-2026.md +473 -0
  83. package/research/llm-summarization/openai-compatible-providers-2026.md +1022 -0
  84. package/research/llm-summarization/opencode-cli-integration.md +1552 -0
  85. package/research/llm-summarization/prompt-engineering-2026.md +1426 -0
  86. package/research/llm-summarization/prototype-results.md +56 -0
  87. package/research/llm-summarization/provider-switching-patterns-2026.md +2153 -0
  88. package/research/llm-summarization/typescript-llm-libraries-2026.md +2436 -0
  89. package/research/mdcontext-pudding/00-EXECUTIVE-SUMMARY.md +282 -0
  90. package/research/mdcontext-pudding/01-index-embed.md +956 -0
  91. package/research/mdcontext-pudding/02-search-COMMANDS.md +142 -0
  92. package/research/mdcontext-pudding/02-search-SUMMARY.md +146 -0
  93. package/research/mdcontext-pudding/02-search.md +970 -0
  94. package/research/mdcontext-pudding/03-context.md +779 -0
  95. package/research/mdcontext-pudding/04-navigation-and-analytics.md +803 -0
  96. package/research/mdcontext-pudding/04-tree.md +704 -0
  97. package/research/mdcontext-pudding/05-config.md +1038 -0
  98. package/research/mdcontext-pudding/06-links-summary.txt +87 -0
  99. package/research/mdcontext-pudding/06-links.md +679 -0
  100. package/research/mdcontext-pudding/07-stats.md +693 -0
  101. package/research/mdcontext-pudding/BUG-FIX-PLAN.md +388 -0
  102. package/research/mdcontext-pudding/P0-BUG-VALIDATION.md +167 -0
  103. package/research/mdcontext-pudding/README.md +168 -0
  104. package/research/mdcontext-pudding/TESTING-SUMMARY.md +128 -0
  105. package/research/research-quality-review.md +834 -0
  106. package/research/semantic-search/embedding-text-analysis.md +156 -0
  107. package/research/semantic-search/multi-word-failure-reproduction.md +171 -0
  108. package/research/semantic-search/query-processing-analysis.md +207 -0
  109. package/research/semantic-search/root-cause-and-solution.md +114 -0
  110. package/research/semantic-search/threshold-validation-report.md +69 -0
  111. package/research/semantic-search/vector-search-analysis.md +63 -0
  112. package/research/test-path-issues.md +276 -0
  113. package/review/ALP-76/1-error-type-design.md +962 -0
  114. package/review/ALP-76/2-error-handling-patterns.md +906 -0
  115. package/review/ALP-76/3-error-presentation.md +624 -0
  116. package/review/ALP-76/4-test-coverage.md +625 -0
  117. package/review/ALP-76/5-migration-completeness.md +440 -0
  118. package/review/ALP-76/6-effect-best-practices.md +755 -0
  119. package/scripts/apply-branch-protection.sh +47 -0
  120. package/scripts/branch-protection-templates.json +79 -0
  121. package/scripts/prototype-summarization.ts +346 -0
  122. package/scripts/rebuild-hnswlib.js +32 -37
  123. package/scripts/setup-branch-protection.sh +64 -0
  124. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/active-provider.json +7 -0
  125. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.json +541 -0
  126. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/bm25.meta.json +5 -0
  127. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/config.json +8 -0
  128. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  129. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  130. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/documents.json +60 -0
  131. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/links.json +13 -0
  132. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/.mdcontext/indexes/sections.json +1197 -0
  133. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/configuration-management.md +99 -0
  134. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/distributed-systems.md +92 -0
  135. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/error-handling.md +78 -0
  136. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/failure-automation.md +55 -0
  137. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/job-context.md +69 -0
  138. package/src/__tests__/fixtures/semantic-search/multi-word-corpus/process-orchestration.md +99 -0
  139. package/src/cli/argv-preprocessor.test.ts +2 -2
  140. package/src/cli/cli.test.ts +230 -33
  141. package/src/cli/commands/config-cmd.ts +642 -0
  142. package/src/cli/commands/context.ts +97 -9
  143. package/src/cli/commands/duplicates.ts +122 -0
  144. package/src/cli/commands/embeddings.ts +529 -0
  145. package/src/cli/commands/index-cmd.ts +210 -30
  146. package/src/cli/commands/index.ts +3 -0
  147. package/src/cli/commands/search.ts +894 -64
  148. package/src/cli/commands/stats.ts +3 -0
  149. package/src/cli/commands/tree.ts +26 -5
  150. package/src/cli/config-layer.ts +176 -0
  151. package/src/cli/error-handler.test.ts +235 -0
  152. package/src/cli/error-handler.ts +655 -0
  153. package/src/cli/flag-schemas.ts +66 -0
  154. package/src/cli/help.ts +209 -7
  155. package/src/cli/main.ts +348 -58
  156. package/src/cli/options.ts +10 -0
  157. package/src/cli/shared-error-handling.ts +199 -0
  158. package/src/cli/utils.ts +150 -17
  159. package/src/config/file-provider.test.ts +320 -0
  160. package/src/config/file-provider.ts +273 -0
  161. package/src/config/index.ts +72 -0
  162. package/src/config/integration.test.ts +667 -0
  163. package/src/config/precedence.test.ts +277 -0
  164. package/src/config/precedence.ts +451 -0
  165. package/src/config/schema.test.ts +414 -0
  166. package/src/config/schema.ts +603 -0
  167. package/src/config/service.test.ts +320 -0
  168. package/src/config/service.ts +243 -0
  169. package/src/config/testing.test.ts +264 -0
  170. package/src/config/testing.ts +110 -0
  171. package/src/core/types.ts +6 -33
  172. package/src/duplicates/detector.test.ts +183 -0
  173. package/src/duplicates/detector.ts +414 -0
  174. package/src/duplicates/index.ts +18 -0
  175. package/src/embeddings/embedding-namespace.test.ts +300 -0
  176. package/src/embeddings/embedding-namespace.ts +947 -0
  177. package/src/embeddings/heading-boost.test.ts +222 -0
  178. package/src/embeddings/hnsw-build-options.test.ts +198 -0
  179. package/src/embeddings/hyde.test.ts +272 -0
  180. package/src/embeddings/hyde.ts +264 -0
  181. package/src/embeddings/index.ts +2 -0
  182. package/src/embeddings/openai-provider.ts +332 -83
  183. package/src/embeddings/pricing.json +22 -0
  184. package/src/embeddings/provider-constants.ts +204 -0
  185. package/src/embeddings/provider-errors.test.ts +967 -0
  186. package/src/embeddings/provider-errors.ts +565 -0
  187. package/src/embeddings/provider-factory.test.ts +240 -0
  188. package/src/embeddings/provider-factory.ts +225 -0
  189. package/src/embeddings/provider-integration.test.ts +788 -0
  190. package/src/embeddings/query-preprocessing.test.ts +187 -0
  191. package/src/embeddings/semantic-search-threshold.test.ts +508 -0
  192. package/src/embeddings/semantic-search.ts +780 -93
  193. package/src/embeddings/types.ts +293 -16
  194. package/src/embeddings/vector-store.ts +486 -77
  195. package/src/embeddings/voyage-provider.ts +313 -0
  196. package/src/errors/errors.test.ts +845 -0
  197. package/src/errors/index.ts +533 -0
  198. package/src/index/ignore-patterns.test.ts +354 -0
  199. package/src/index/ignore-patterns.ts +305 -0
  200. package/src/index/indexer.ts +286 -48
  201. package/src/index/storage.ts +94 -30
  202. package/src/index/types.ts +40 -2
  203. package/src/index/watcher.ts +67 -9
  204. package/src/index.ts +22 -0
  205. package/src/integration/search-keyword.test.ts +678 -0
  206. package/src/mcp/server.ts +135 -6
  207. package/src/parser/parser.ts +18 -19
  208. package/src/parser/section-filter.test.ts +277 -0
  209. package/src/parser/section-filter.ts +125 -3
  210. package/src/search/__tests__/hybrid-search.test.ts +650 -0
  211. package/src/search/bm25-store.ts +366 -0
  212. package/src/search/cross-encoder.test.ts +253 -0
  213. package/src/search/cross-encoder.ts +406 -0
  214. package/src/search/fuzzy-search.test.ts +419 -0
  215. package/src/search/fuzzy-search.ts +273 -0
  216. package/src/search/hybrid-search.ts +448 -0
  217. package/src/search/path-matcher.test.ts +276 -0
  218. package/src/search/path-matcher.ts +33 -0
  219. package/src/search/searcher.test.ts +99 -1
  220. package/src/search/searcher.ts +189 -67
  221. package/src/search/wink-bm25.d.ts +30 -0
  222. package/src/summarization/cli-providers/claude.ts +202 -0
  223. package/src/summarization/cli-providers/detection.test.ts +273 -0
  224. package/src/summarization/cli-providers/detection.ts +118 -0
  225. package/src/summarization/cli-providers/index.ts +8 -0
  226. package/src/summarization/cost.test.ts +139 -0
  227. package/src/summarization/cost.ts +102 -0
  228. package/src/summarization/error-handler.test.ts +127 -0
  229. package/src/summarization/error-handler.ts +111 -0
  230. package/src/summarization/index.ts +102 -0
  231. package/src/summarization/pipeline.test.ts +498 -0
  232. package/src/summarization/pipeline.ts +231 -0
  233. package/src/summarization/prompts.test.ts +269 -0
  234. package/src/summarization/prompts.ts +133 -0
  235. package/src/summarization/provider-factory.test.ts +396 -0
  236. package/src/summarization/provider-factory.ts +178 -0
  237. package/src/summarization/types.ts +184 -0
  238. package/src/summarize/summarizer.ts +104 -35
  239. package/src/types/huggingface-transformers.d.ts +66 -0
  240. package/tests/fixtures/cli/.mdcontext/active-provider.json +7 -0
  241. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.bin +0 -0
  242. package/tests/fixtures/cli/.mdcontext/embeddings/openai_text-embedding-3-small_512/vectors.meta.bin +0 -0
  243. package/tests/fixtures/cli/.mdcontext/indexes/documents.json +4 -4
  244. package/tests/fixtures/cli/.mdcontext/indexes/sections.json +14 -0
  245. package/tests/integration/embed-index.test.ts +712 -0
  246. package/tests/integration/search-context.test.ts +469 -0
  247. package/tests/integration/search-semantic.test.ts +522 -0
  248. package/vitest.config.ts +1 -6
  249. package/AGENTS.md +0 -46
  250. package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
  251. package/tests/fixtures/cli/.mdcontext/vectors.meta.json +0 -1264
@@ -0,0 +1,448 @@
1
+ /**
2
+ * Hybrid Search with Reciprocal Rank Fusion (RRF)
3
+ *
4
+ * Combines BM25 keyword search with semantic vector search for improved
5
+ * recall (15-30% improvement over single-method retrieval).
6
+ *
7
+ * RRF Formula: score(doc) = Σ weight / (k + rank)
8
+ * k = 60 (standard smoothing constant from research)
9
+ */
10
+
11
+ import * as path from 'node:path'
12
+ import { Effect } from 'effect'
13
+ import { listNamespaces } from '../embeddings/embedding-namespace.js'
14
+ import { semanticSearch } from '../embeddings/semantic-search.js'
15
+ import type {
16
+ SearchQuality,
17
+ SemanticSearchResult,
18
+ } from '../embeddings/types.js'
19
+ import type {
20
+ ApiKeyInvalidError,
21
+ ApiKeyMissingError,
22
+ EmbeddingError,
23
+ FileReadError,
24
+ VectorStoreError,
25
+ } from '../errors/index.js'
26
+ import {
27
+ type BM25SearchResult,
28
+ bm25IndexExists,
29
+ bm25Search,
30
+ } from './bm25-store.js'
31
+ import {
32
+ isRerankerAvailable,
33
+ type RerankerError,
34
+ rerankResults,
35
+ } from './cross-encoder.js'
36
+ import { matchPath } from './path-matcher.js'
37
+
38
+ // ============================================================================
39
+ // Types
40
+ // ============================================================================
41
+
42
+ export type SearchMode = 'hybrid' | 'semantic' | 'keyword'
43
+
44
+ export interface HybridSearchOptions {
45
+ /** Maximum number of results */
46
+ readonly limit?: number
47
+ /** Minimum similarity threshold for semantic search (0-1) */
48
+ readonly threshold?: number
49
+ /** Filter by document path pattern */
50
+ readonly pathPattern?: string
51
+ /** Force a specific search mode */
52
+ readonly mode?: SearchMode
53
+ /** BM25 weight for RRF (default: 1.0) */
54
+ readonly bm25Weight?: number
55
+ /** Semantic weight for RRF (default: 1.0) */
56
+ readonly semanticWeight?: number
57
+ /** RRF k constant (default: 60) */
58
+ readonly rrfK?: number
59
+ /** Enable cross-encoder re-ranking for improved precision */
60
+ readonly rerank?: boolean
61
+ /** Search quality mode: fast, balanced (default), or thorough */
62
+ readonly quality?: SearchQuality | undefined
63
+ /** Lines of context before matches */
64
+ readonly contextBefore?: number | undefined
65
+ /** Lines of context after matches */
66
+ readonly contextAfter?: number | undefined
67
+ }
68
+
69
+ export interface HybridSearchResult {
70
+ readonly sectionId: string
71
+ readonly documentPath: string
72
+ readonly heading: string
73
+ /** Combined RRF score (higher is better) */
74
+ readonly score: number
75
+ /** Semantic similarity if available (0-1) */
76
+ readonly similarity?: number
77
+ /** BM25 score if available */
78
+ readonly bm25Score?: number
79
+ /** Which search methods contributed to this result */
80
+ readonly sources: readonly ('semantic' | 'keyword')[]
81
+ /** Cross-encoder re-ranking score (if reranking was enabled) */
82
+ readonly rerankerScore?: number
83
+ /** Context lines with their line numbers (when context is requested) */
84
+ readonly contextLines?: readonly ContextLine[] | undefined
85
+ }
86
+
87
+ export interface ContextLine {
88
+ /** The line number (1-based) */
89
+ readonly lineNumber: number
90
+ /** The line text */
91
+ readonly line: string
92
+ /** Whether this is a matching line (for keyword search) */
93
+ readonly isMatch: boolean
94
+ }
95
+
96
+ export interface HybridSearchStats {
97
+ readonly mode: SearchMode
98
+ readonly modeReason: string
99
+ readonly semanticResults: number
100
+ readonly keywordResults: number
101
+ readonly combinedResults: number
102
+ readonly bm25Available: boolean
103
+ readonly embeddingsAvailable: boolean
104
+ /** Whether re-ranking was applied */
105
+ readonly reranked?: boolean
106
+ /** Total unique results available before limit was applied */
107
+ readonly totalAvailable?: number
108
+ }
109
+
110
+ // ============================================================================
111
+ // RRF Fusion
112
+ // ============================================================================
113
+
114
+ /**
115
+ * Reciprocal Rank Fusion (RRF) combines rankings from multiple retrieval methods.
116
+ *
117
+ * For each document, RRF score = Σ weight / (k + rank)
118
+ * where k is a smoothing constant (60 by default from research).
119
+ *
120
+ * This approach:
121
+ * - Doesn't require score normalization between methods
122
+ * - Gives higher weight to documents ranked highly by both methods
123
+ * - Naturally handles missing results from either method
124
+ */
125
+ const fusionRRF = (
126
+ semanticResults: readonly SemanticSearchResult[],
127
+ keywordResults: readonly BM25SearchResult[],
128
+ options: {
129
+ bm25Weight: number
130
+ semanticWeight: number
131
+ rrfK: number
132
+ limit: number
133
+ },
134
+ ): { results: HybridSearchResult[]; totalAvailable: number } => {
135
+ const { bm25Weight, semanticWeight, rrfK, limit } = options
136
+
137
+ // Map to accumulate RRF scores by sectionId
138
+ const scoreMap = new Map<
139
+ string,
140
+ {
141
+ documentPath: string
142
+ heading: string
143
+ rrfScore: number
144
+ similarity?: number
145
+ bm25Score?: number
146
+ sources: Set<'semantic' | 'keyword'>
147
+ contextLines?: readonly ContextLine[]
148
+ }
149
+ >()
150
+
151
+ // Add semantic results (rank is 1-indexed)
152
+ for (let rank = 0; rank < semanticResults.length; rank++) {
153
+ const result = semanticResults[rank]
154
+ if (!result) continue
155
+
156
+ const rrfContribution = semanticWeight / (rrfK + rank + 1)
157
+
158
+ const existing = scoreMap.get(result.sectionId)
159
+ if (existing) {
160
+ existing.rrfScore += rrfContribution
161
+ existing.similarity = result.similarity
162
+ existing.sources.add('semantic')
163
+ if (result.contextLines && !existing.contextLines) {
164
+ existing.contextLines = result.contextLines
165
+ }
166
+ } else {
167
+ const entry: {
168
+ documentPath: string
169
+ heading: string
170
+ rrfScore: number
171
+ similarity?: number
172
+ bm25Score?: number
173
+ sources: Set<'semantic' | 'keyword'>
174
+ contextLines?: readonly ContextLine[]
175
+ } = {
176
+ documentPath: result.documentPath,
177
+ heading: result.heading,
178
+ rrfScore: rrfContribution,
179
+ similarity: result.similarity,
180
+ sources: new Set(['semantic']),
181
+ }
182
+ if (result.contextLines) {
183
+ entry.contextLines = result.contextLines
184
+ }
185
+ scoreMap.set(result.sectionId, entry)
186
+ }
187
+ }
188
+
189
+ // Add keyword (BM25) results
190
+ for (const result of keywordResults) {
191
+ const rrfContribution = bm25Weight / (rrfK + result.rank)
192
+
193
+ const existing = scoreMap.get(result.sectionId)
194
+ if (existing) {
195
+ existing.rrfScore += rrfContribution
196
+ existing.bm25Score = result.score
197
+ existing.sources.add('keyword')
198
+ } else {
199
+ scoreMap.set(result.sectionId, {
200
+ documentPath: result.documentPath,
201
+ heading: result.heading,
202
+ rrfScore: rrfContribution,
203
+ bm25Score: result.score,
204
+ sources: new Set(['keyword']),
205
+ })
206
+ }
207
+ }
208
+
209
+ // Convert to array and sort by RRF score
210
+ const allResults: HybridSearchResult[] = Array.from(scoreMap.entries())
211
+ .map(([sectionId, data]) => {
212
+ const result: HybridSearchResult = {
213
+ sectionId,
214
+ documentPath: data.documentPath,
215
+ heading: data.heading,
216
+ score: data.rrfScore,
217
+ sources: Array.from(data.sources) as readonly (
218
+ | 'semantic'
219
+ | 'keyword'
220
+ )[],
221
+ }
222
+ if (data.similarity !== undefined) {
223
+ ;(result as { similarity: number }).similarity = data.similarity
224
+ }
225
+ if (data.bm25Score !== undefined) {
226
+ ;(result as { bm25Score: number }).bm25Score = data.bm25Score
227
+ }
228
+ if (data.contextLines !== undefined) {
229
+ ;(result as { contextLines: readonly ContextLine[] }).contextLines =
230
+ data.contextLines
231
+ }
232
+ return result
233
+ })
234
+ .sort((a, b) => b.score - a.score)
235
+
236
+ return {
237
+ results: allResults.slice(0, limit),
238
+ totalAvailable: allResults.length,
239
+ }
240
+ }
241
+
242
+ // ============================================================================
243
+ // Hybrid Search
244
+ // ============================================================================
245
+
246
+ /**
247
+ * Perform hybrid search combining semantic and keyword (BM25) search.
248
+ *
249
+ * Mode detection priority:
250
+ * 1. Explicit mode option
251
+ * 2. 'hybrid' if both indexes available
252
+ * 3. 'semantic' if only embeddings available
253
+ * 4. 'keyword' if only BM25 available
254
+ * 5. Error if neither available
255
+ *
256
+ * @param rootPath - Root directory containing indexes
257
+ * @param query - Search query text
258
+ * @param options - Search options
259
+ * @returns Ranked list of results with combined scores
260
+ */
261
+ export const hybridSearch = (
262
+ rootPath: string,
263
+ query: string,
264
+ options: HybridSearchOptions = {},
265
+ ): Effect.Effect<
266
+ { results: readonly HybridSearchResult[]; stats: HybridSearchStats },
267
+ | FileReadError
268
+ | ApiKeyMissingError
269
+ | ApiKeyInvalidError
270
+ | EmbeddingError
271
+ | VectorStoreError
272
+ | RerankerError
273
+ > =>
274
+ Effect.gen(function* () {
275
+ const resolvedRoot = path.resolve(rootPath)
276
+ const limit = options.limit ?? 10
277
+ const threshold = options.threshold ?? 0.35
278
+ const bm25Weight = options.bm25Weight ?? 1.0
279
+ const semanticWeight = options.semanticWeight ?? 1.0
280
+ const rrfK = options.rrfK ?? 60
281
+
282
+ // Check index availability
283
+ const hasBM25 = yield* bm25IndexExists(resolvedRoot)
284
+
285
+ // Check for embeddings by trying semantic search
286
+ // This is a lightweight check that fails fast if no embeddings exist
287
+ let hasEmbeddings = false
288
+ let semanticResults: readonly SemanticSearchResult[] = []
289
+
290
+ if (options.mode !== 'keyword') {
291
+ const semanticEffect = semanticSearch(resolvedRoot, query, {
292
+ limit: limit * 2, // Get more for better fusion
293
+ threshold,
294
+ pathPattern: options.pathPattern,
295
+ quality: options.quality,
296
+ contextBefore: options.contextBefore,
297
+ contextAfter: options.contextAfter,
298
+ })
299
+
300
+ const semanticTry = yield* Effect.either(semanticEffect)
301
+ if (semanticTry._tag === 'Right') {
302
+ hasEmbeddings = true
303
+ semanticResults = semanticTry.right
304
+ }
305
+ }
306
+
307
+ // Get BM25 results if available
308
+ let keywordResults: readonly BM25SearchResult[] = []
309
+ if (hasBM25 && options.mode !== 'semantic') {
310
+ const rawResults = yield* bm25Search(resolvedRoot, query, limit * 2)
311
+ // Apply path pattern filter if specified
312
+ keywordResults = options.pathPattern
313
+ ? rawResults.filter((r) =>
314
+ matchPath(r.documentPath, options.pathPattern!),
315
+ )
316
+ : rawResults
317
+ }
318
+
319
+ // Determine effective mode and reason
320
+ let effectiveMode: SearchMode
321
+ let modeReason: string
322
+
323
+ if (options.mode) {
324
+ effectiveMode = options.mode
325
+ modeReason = `--mode ${options.mode}`
326
+ } else if (hasEmbeddings && hasBM25) {
327
+ effectiveMode = 'hybrid'
328
+ modeReason = 'both indexes available'
329
+ } else if (hasEmbeddings) {
330
+ effectiveMode = 'semantic'
331
+ modeReason = 'embeddings available, no BM25 index'
332
+ } else if (hasBM25) {
333
+ effectiveMode = 'keyword'
334
+ modeReason = 'BM25 available, no embeddings'
335
+ } else {
336
+ effectiveMode = 'keyword'
337
+ modeReason = 'no indexes available'
338
+ }
339
+
340
+ // Perform fusion based on mode
341
+ let results: HybridSearchResult[]
342
+ let totalAvailable: number | undefined
343
+
344
+ if (effectiveMode === 'hybrid') {
345
+ const fusionResult = fusionRRF(semanticResults, keywordResults, {
346
+ bm25Weight,
347
+ semanticWeight,
348
+ rrfK,
349
+ limit,
350
+ })
351
+ results = fusionResult.results
352
+ totalAvailable = fusionResult.totalAvailable
353
+ } else if (effectiveMode === 'semantic') {
354
+ // Convert semantic results to hybrid format
355
+ totalAvailable = semanticResults.length
356
+ results = semanticResults.slice(0, limit).map((r, idx) => ({
357
+ sectionId: r.sectionId,
358
+ documentPath: r.documentPath,
359
+ heading: r.heading,
360
+ score: semanticWeight / (rrfK + idx + 1), // RRF-style score for consistency
361
+ similarity: r.similarity,
362
+ sources: ['semantic'] as const,
363
+ }))
364
+ } else {
365
+ // Convert keyword results to hybrid format
366
+ totalAvailable = keywordResults.length
367
+ results = keywordResults.slice(0, limit).map((r) => ({
368
+ sectionId: r.sectionId,
369
+ documentPath: r.documentPath,
370
+ heading: r.heading,
371
+ score: bm25Weight / (rrfK + r.rank),
372
+ bm25Score: r.score,
373
+ sources: ['keyword'] as const,
374
+ }))
375
+ }
376
+
377
+ // Apply cross-encoder re-ranking if enabled
378
+ let reranked = false
379
+ if (options.rerank && results.length > 0) {
380
+ // Check if reranker is available
381
+ const rerankerAvailable = yield* isRerankerAvailable()
382
+ if (rerankerAvailable) {
383
+ // Re-rank using cross-encoder (top 20 -> top N)
384
+ const rerankedResults = yield* rerankResults(
385
+ query,
386
+ results,
387
+ (r) => `${r.heading} (${r.documentPath})`,
388
+ { topK: 20, returnTopN: limit },
389
+ )
390
+
391
+ // Update results with reranker scores
392
+ results = rerankedResults.map((rr) => ({
393
+ ...rr.item,
394
+ rerankerScore: rr.rerankerScore,
395
+ }))
396
+ reranked = true
397
+ }
398
+ }
399
+
400
+ const stats: HybridSearchStats = {
401
+ mode: effectiveMode,
402
+ modeReason,
403
+ semanticResults: semanticResults.length,
404
+ keywordResults: keywordResults.length,
405
+ combinedResults: results.length,
406
+ bm25Available: hasBM25,
407
+ embeddingsAvailable: hasEmbeddings,
408
+ reranked,
409
+ totalAvailable,
410
+ }
411
+
412
+ return { results, stats }
413
+ })
414
+
415
+ // ============================================================================
416
+ // Mode Detection Helper
417
+ // ============================================================================
418
+
419
+ /**
420
+ * Detect available search modes for a directory
421
+ */
422
+ export const detectSearchModes = (
423
+ rootPath: string,
424
+ ): Effect.Effect<
425
+ { hasBM25: boolean; hasEmbeddings: boolean; recommendedMode: SearchMode },
426
+ never
427
+ > =>
428
+ Effect.gen(function* () {
429
+ const resolvedRoot = path.resolve(rootPath)
430
+ const hasBM25 = yield* bm25IndexExists(resolvedRoot)
431
+
432
+ // Check embeddings by looking for namespaced vector stores
433
+ const hasEmbeddings = yield* listNamespaces(resolvedRoot).pipe(
434
+ Effect.map((namespaces) => namespaces.length > 0),
435
+ Effect.catchAll(() => Effect.succeed(false)),
436
+ )
437
+
438
+ let recommendedMode: SearchMode
439
+ if (hasBM25 && hasEmbeddings) {
440
+ recommendedMode = 'hybrid'
441
+ } else if (hasEmbeddings) {
442
+ recommendedMode = 'semantic'
443
+ } else {
444
+ recommendedMode = 'keyword'
445
+ }
446
+
447
+ return { hasBM25, hasEmbeddings, recommendedMode }
448
+ })