grepmind-core 0.1.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (370) hide show
  1. package/LICENSE +190 -0
  2. package/dist/config/types.d.ts +174 -0
  3. package/dist/config/types.d.ts.map +1 -0
  4. package/dist/config/types.js +137 -0
  5. package/dist/config/types.js.map +1 -0
  6. package/dist/git.d.ts +98 -0
  7. package/dist/git.d.ts.map +1 -0
  8. package/dist/git.js +298 -0
  9. package/dist/git.js.map +1 -0
  10. package/dist/git.test.d.ts +7 -0
  11. package/dist/git.test.d.ts.map +1 -0
  12. package/dist/git.test.js +242 -0
  13. package/dist/git.test.js.map +1 -0
  14. package/dist/index.d.ts +44 -0
  15. package/dist/index.d.ts.map +1 -0
  16. package/dist/index.js +67 -0
  17. package/dist/index.js.map +1 -0
  18. package/dist/indexer/branch.d.ts +121 -0
  19. package/dist/indexer/branch.d.ts.map +1 -0
  20. package/dist/indexer/branch.js +451 -0
  21. package/dist/indexer/branch.js.map +1 -0
  22. package/dist/indexer/chunker.d.ts +9 -0
  23. package/dist/indexer/chunker.d.ts.map +1 -0
  24. package/dist/indexer/chunker.js +70 -0
  25. package/dist/indexer/chunker.js.map +1 -0
  26. package/dist/indexer/chunker.test.d.ts +2 -0
  27. package/dist/indexer/chunker.test.d.ts.map +1 -0
  28. package/dist/indexer/chunker.test.js +180 -0
  29. package/dist/indexer/chunker.test.js.map +1 -0
  30. package/dist/indexer/code/branch.d.ts +155 -0
  31. package/dist/indexer/code/branch.d.ts.map +1 -0
  32. package/dist/indexer/code/branch.js +550 -0
  33. package/dist/indexer/code/branch.js.map +1 -0
  34. package/dist/indexer/code/branch.test.d.ts +7 -0
  35. package/dist/indexer/code/branch.test.d.ts.map +1 -0
  36. package/dist/indexer/code/branch.test.js +241 -0
  37. package/dist/indexer/code/branch.test.js.map +1 -0
  38. package/dist/indexer/code/chunker.d.ts +61 -0
  39. package/dist/indexer/code/chunker.d.ts.map +1 -0
  40. package/dist/indexer/code/chunker.js +311 -0
  41. package/dist/indexer/code/chunker.js.map +1 -0
  42. package/dist/indexer/code/chunker.test.d.ts +2 -0
  43. package/dist/indexer/code/chunker.test.d.ts.map +1 -0
  44. package/dist/indexer/code/chunker.test.js +552 -0
  45. package/dist/indexer/code/chunker.test.js.map +1 -0
  46. package/dist/indexer/code/fts.test.d.ts +2 -0
  47. package/dist/indexer/code/fts.test.d.ts.map +1 -0
  48. package/dist/indexer/code/fts.test.js +14 -0
  49. package/dist/indexer/code/fts.test.js.map +1 -0
  50. package/dist/indexer/code/graph/embedded.d.ts +11 -0
  51. package/dist/indexer/code/graph/embedded.d.ts.map +1 -0
  52. package/dist/indexer/code/graph/embedded.js +152 -0
  53. package/dist/indexer/code/graph/embedded.js.map +1 -0
  54. package/dist/indexer/code/graph/embedded.test.d.ts +2 -0
  55. package/dist/indexer/code/graph/embedded.test.d.ts.map +1 -0
  56. package/dist/indexer/code/graph/embedded.test.js +105 -0
  57. package/dist/indexer/code/graph/embedded.test.js.map +1 -0
  58. package/dist/indexer/code/graph/facts.d.ts +11 -0
  59. package/dist/indexer/code/graph/facts.d.ts.map +1 -0
  60. package/dist/indexer/code/graph/facts.js +456 -0
  61. package/dist/indexer/code/graph/facts.js.map +1 -0
  62. package/dist/indexer/code/graph/facts.test.d.ts +2 -0
  63. package/dist/indexer/code/graph/facts.test.d.ts.map +1 -0
  64. package/dist/indexer/code/graph/facts.test.js +181 -0
  65. package/dist/indexer/code/graph/facts.test.js.map +1 -0
  66. package/dist/indexer/code/graph/id.d.ts +14 -0
  67. package/dist/indexer/code/graph/id.d.ts.map +1 -0
  68. package/dist/indexer/code/graph/id.js +40 -0
  69. package/dist/indexer/code/graph/id.js.map +1 -0
  70. package/dist/indexer/code/graph/id.test.d.ts +2 -0
  71. package/dist/indexer/code/graph/id.test.d.ts.map +1 -0
  72. package/dist/indexer/code/graph/id.test.js +86 -0
  73. package/dist/indexer/code/graph/id.test.js.map +1 -0
  74. package/dist/indexer/code/graph/index.d.ts +133 -0
  75. package/dist/indexer/code/graph/index.d.ts.map +1 -0
  76. package/dist/indexer/code/graph/index.js +1876 -0
  77. package/dist/indexer/code/graph/index.js.map +1 -0
  78. package/dist/indexer/code/graph/index.test.d.ts +2 -0
  79. package/dist/indexer/code/graph/index.test.d.ts.map +1 -0
  80. package/dist/indexer/code/graph/index.test.js +210 -0
  81. package/dist/indexer/code/graph/index.test.js.map +1 -0
  82. package/dist/indexer/code/graph/queries.d.ts +22 -0
  83. package/dist/indexer/code/graph/queries.d.ts.map +1 -0
  84. package/dist/indexer/code/graph/queries.js +79 -0
  85. package/dist/indexer/code/graph/queries.js.map +1 -0
  86. package/dist/indexer/code/graph/queries.test.d.ts +2 -0
  87. package/dist/indexer/code/graph/queries.test.d.ts.map +1 -0
  88. package/dist/indexer/code/graph/queries.test.js +108 -0
  89. package/dist/indexer/code/graph/queries.test.js.map +1 -0
  90. package/dist/indexer/code/graph/resolver.d.ts +136 -0
  91. package/dist/indexer/code/graph/resolver.d.ts.map +1 -0
  92. package/dist/indexer/code/graph/resolver.js +839 -0
  93. package/dist/indexer/code/graph/resolver.js.map +1 -0
  94. package/dist/indexer/code/graph/resolver.test.d.ts +2 -0
  95. package/dist/indexer/code/graph/resolver.test.d.ts.map +1 -0
  96. package/dist/indexer/code/graph/resolver.test.js +482 -0
  97. package/dist/indexer/code/graph/resolver.test.js.map +1 -0
  98. package/dist/indexer/code/graph/semantic.d.ts +33 -0
  99. package/dist/indexer/code/graph/semantic.d.ts.map +1 -0
  100. package/dist/indexer/code/graph/semantic.js +279 -0
  101. package/dist/indexer/code/graph/semantic.js.map +1 -0
  102. package/dist/indexer/code/graph/semantic.test.d.ts +2 -0
  103. package/dist/indexer/code/graph/semantic.test.d.ts.map +1 -0
  104. package/dist/indexer/code/graph/semantic.test.js +127 -0
  105. package/dist/indexer/code/graph/semantic.test.js.map +1 -0
  106. package/dist/indexer/code/index.d.ts +404 -0
  107. package/dist/indexer/code/index.d.ts.map +1 -0
  108. package/dist/indexer/code/index.js +2070 -0
  109. package/dist/indexer/code/index.js.map +1 -0
  110. package/dist/indexer/code/languages/bash.d.ts +14 -0
  111. package/dist/indexer/code/languages/bash.d.ts.map +1 -0
  112. package/dist/indexer/code/languages/bash.js +125 -0
  113. package/dist/indexer/code/languages/bash.js.map +1 -0
  114. package/dist/indexer/code/languages/css.d.ts +16 -0
  115. package/dist/indexer/code/languages/css.d.ts.map +1 -0
  116. package/dist/indexer/code/languages/css.js +204 -0
  117. package/dist/indexer/code/languages/css.js.map +1 -0
  118. package/dist/indexer/code/languages/generic.d.ts +61 -0
  119. package/dist/indexer/code/languages/generic.d.ts.map +1 -0
  120. package/dist/indexer/code/languages/generic.js +150 -0
  121. package/dist/indexer/code/languages/generic.js.map +1 -0
  122. package/dist/indexer/code/languages/graphql.d.ts +13 -0
  123. package/dist/indexer/code/languages/graphql.d.ts.map +1 -0
  124. package/dist/indexer/code/languages/graphql.js +180 -0
  125. package/dist/indexer/code/languages/graphql.js.map +1 -0
  126. package/dist/indexer/code/languages/html.d.ts +16 -0
  127. package/dist/indexer/code/languages/html.d.ts.map +1 -0
  128. package/dist/indexer/code/languages/html.js +138 -0
  129. package/dist/indexer/code/languages/html.js.map +1 -0
  130. package/dist/indexer/code/languages/index.d.ts +9 -0
  131. package/dist/indexer/code/languages/index.d.ts.map +1 -0
  132. package/dist/indexer/code/languages/index.js +12 -0
  133. package/dist/indexer/code/languages/index.js.map +1 -0
  134. package/dist/indexer/code/languages/json.d.ts +12 -0
  135. package/dist/indexer/code/languages/json.d.ts.map +1 -0
  136. package/dist/indexer/code/languages/json.js +66 -0
  137. package/dist/indexer/code/languages/json.js.map +1 -0
  138. package/dist/indexer/code/languages/registry.d.ts +78 -0
  139. package/dist/indexer/code/languages/registry.d.ts.map +1 -0
  140. package/dist/indexer/code/languages/registry.js +72 -0
  141. package/dist/indexer/code/languages/registry.js.map +1 -0
  142. package/dist/indexer/code/languages/typescript.d.ts +39 -0
  143. package/dist/indexer/code/languages/typescript.d.ts.map +1 -0
  144. package/dist/indexer/code/languages/typescript.js +300 -0
  145. package/dist/indexer/code/languages/typescript.js.map +1 -0
  146. package/dist/indexer/code/languages/yaml.d.ts +13 -0
  147. package/dist/indexer/code/languages/yaml.d.ts.map +1 -0
  148. package/dist/indexer/code/languages/yaml.js +90 -0
  149. package/dist/indexer/code/languages/yaml.js.map +1 -0
  150. package/dist/indexer/code/parser.d.ts +26 -0
  151. package/dist/indexer/code/parser.d.ts.map +1 -0
  152. package/dist/indexer/code/parser.js +332 -0
  153. package/dist/indexer/code/parser.js.map +1 -0
  154. package/dist/indexer/code/retry.d.ts +58 -0
  155. package/dist/indexer/code/retry.d.ts.map +1 -0
  156. package/dist/indexer/code/retry.js +192 -0
  157. package/dist/indexer/code/retry.js.map +1 -0
  158. package/dist/indexer/code/tree/builder.d.ts +30 -0
  159. package/dist/indexer/code/tree/builder.d.ts.map +1 -0
  160. package/dist/indexer/code/tree/builder.js +132 -0
  161. package/dist/indexer/code/tree/builder.js.map +1 -0
  162. package/dist/indexer/code/tree/builder.test.d.ts +2 -0
  163. package/dist/indexer/code/tree/builder.test.d.ts.map +1 -0
  164. package/dist/indexer/code/tree/builder.test.js +31 -0
  165. package/dist/indexer/code/tree/builder.test.js.map +1 -0
  166. package/dist/indexer/code/tree/cache.d.ts +22 -0
  167. package/dist/indexer/code/tree/cache.d.ts.map +1 -0
  168. package/dist/indexer/code/tree/cache.js +85 -0
  169. package/dist/indexer/code/tree/cache.js.map +1 -0
  170. package/dist/indexer/code/tree/context.d.ts +32 -0
  171. package/dist/indexer/code/tree/context.d.ts.map +1 -0
  172. package/dist/indexer/code/tree/context.js +78 -0
  173. package/dist/indexer/code/tree/context.js.map +1 -0
  174. package/dist/indexer/code/tree/embedding.d.ts +9 -0
  175. package/dist/indexer/code/tree/embedding.d.ts.map +1 -0
  176. package/dist/indexer/code/tree/embedding.js +53 -0
  177. package/dist/indexer/code/tree/embedding.js.map +1 -0
  178. package/dist/indexer/code/tree/embedding.test.d.ts +2 -0
  179. package/dist/indexer/code/tree/embedding.test.d.ts.map +1 -0
  180. package/dist/indexer/code/tree/embedding.test.js +57 -0
  181. package/dist/indexer/code/tree/embedding.test.js.map +1 -0
  182. package/dist/indexer/code/tree/id.d.ts +3 -0
  183. package/dist/indexer/code/tree/id.d.ts.map +1 -0
  184. package/dist/indexer/code/tree/id.js +8 -0
  185. package/dist/indexer/code/tree/id.js.map +1 -0
  186. package/dist/indexer/code/tree/index.d.ts +113 -0
  187. package/dist/indexer/code/tree/index.d.ts.map +1 -0
  188. package/dist/indexer/code/tree/index.js +1146 -0
  189. package/dist/indexer/code/tree/index.js.map +1 -0
  190. package/dist/indexer/code/tree/rename.d.ts +13 -0
  191. package/dist/indexer/code/tree/rename.d.ts.map +1 -0
  192. package/dist/indexer/code/tree/rename.js +46 -0
  193. package/dist/indexer/code/tree/rename.js.map +1 -0
  194. package/dist/indexer/code/tree/repomap.d.ts +29 -0
  195. package/dist/indexer/code/tree/repomap.d.ts.map +1 -0
  196. package/dist/indexer/code/tree/repomap.js +95 -0
  197. package/dist/indexer/code/tree/repomap.js.map +1 -0
  198. package/dist/indexer/code/tree/repomap.test.d.ts +2 -0
  199. package/dist/indexer/code/tree/repomap.test.d.ts.map +1 -0
  200. package/dist/indexer/code/tree/repomap.test.js +93 -0
  201. package/dist/indexer/code/tree/repomap.test.js.map +1 -0
  202. package/dist/indexer/code/tree/stats.d.ts +26 -0
  203. package/dist/indexer/code/tree/stats.d.ts.map +1 -0
  204. package/dist/indexer/code/tree/stats.js +49 -0
  205. package/dist/indexer/code/tree/stats.js.map +1 -0
  206. package/dist/indexer/code/tree/types.d.ts +186 -0
  207. package/dist/indexer/code/tree/types.d.ts.map +1 -0
  208. package/dist/indexer/code/tree/types.js +10 -0
  209. package/dist/indexer/code/tree/types.js.map +1 -0
  210. package/dist/indexer/code/wal.d.ts +144 -0
  211. package/dist/indexer/code/wal.d.ts.map +1 -0
  212. package/dist/indexer/code/wal.js +283 -0
  213. package/dist/indexer/code/wal.js.map +1 -0
  214. package/dist/indexer/embeddings.d.ts +113 -0
  215. package/dist/indexer/embeddings.d.ts.map +1 -0
  216. package/dist/indexer/embeddings.js +477 -0
  217. package/dist/indexer/embeddings.js.map +1 -0
  218. package/dist/indexer/git-sync.d.ts +117 -0
  219. package/dist/indexer/git-sync.d.ts.map +1 -0
  220. package/dist/indexer/git-sync.js +398 -0
  221. package/dist/indexer/git-sync.js.map +1 -0
  222. package/dist/indexer/index.d.ts +175 -0
  223. package/dist/indexer/index.d.ts.map +1 -0
  224. package/dist/indexer/index.js +1096 -0
  225. package/dist/indexer/index.js.map +1 -0
  226. package/dist/indexer/mocks/mock-reranker.d.ts +12 -0
  227. package/dist/indexer/mocks/mock-reranker.d.ts.map +1 -0
  228. package/dist/indexer/mocks/mock-reranker.js +26 -0
  229. package/dist/indexer/mocks/mock-reranker.js.map +1 -0
  230. package/dist/indexer/parser.d.ts +8 -0
  231. package/dist/indexer/parser.d.ts.map +1 -0
  232. package/dist/indexer/parser.js +44 -0
  233. package/dist/indexer/parser.js.map +1 -0
  234. package/dist/indexer/parser.test.d.ts +2 -0
  235. package/dist/indexer/parser.test.d.ts.map +1 -0
  236. package/dist/indexer/parser.test.js +197 -0
  237. package/dist/indexer/parser.test.js.map +1 -0
  238. package/dist/indexer/reranking.d.ts +71 -0
  239. package/dist/indexer/reranking.d.ts.map +1 -0
  240. package/dist/indexer/reranking.integration.test.d.ts +2 -0
  241. package/dist/indexer/reranking.integration.test.d.ts.map +1 -0
  242. package/dist/indexer/reranking.integration.test.js +104 -0
  243. package/dist/indexer/reranking.integration.test.js.map +1 -0
  244. package/dist/indexer/reranking.js +256 -0
  245. package/dist/indexer/reranking.js.map +1 -0
  246. package/dist/indexer/reranking.test.d.ts +2 -0
  247. package/dist/indexer/reranking.test.d.ts.map +1 -0
  248. package/dist/indexer/reranking.test.js +130 -0
  249. package/dist/indexer/reranking.test.js.map +1 -0
  250. package/dist/indexer/wal/file-storage.d.ts +60 -0
  251. package/dist/indexer/wal/file-storage.d.ts.map +1 -0
  252. package/dist/indexer/wal/file-storage.js +277 -0
  253. package/dist/indexer/wal/file-storage.js.map +1 -0
  254. package/dist/indexer/wal/file-storage.test.d.ts +8 -0
  255. package/dist/indexer/wal/file-storage.test.d.ts.map +1 -0
  256. package/dist/indexer/wal/file-storage.test.js +444 -0
  257. package/dist/indexer/wal/file-storage.test.js.map +1 -0
  258. package/dist/indexer/wal/index.d.ts +41 -0
  259. package/dist/indexer/wal/index.d.ts.map +1 -0
  260. package/dist/indexer/wal/index.js +61 -0
  261. package/dist/indexer/wal/index.js.map +1 -0
  262. package/dist/indexer/wal/integration.test.d.ts +11 -0
  263. package/dist/indexer/wal/integration.test.d.ts.map +1 -0
  264. package/dist/indexer/wal/integration.test.js +378 -0
  265. package/dist/indexer/wal/integration.test.js.map +1 -0
  266. package/dist/indexer/wal/lancedb-storage.d.ts +72 -0
  267. package/dist/indexer/wal/lancedb-storage.d.ts.map +1 -0
  268. package/dist/indexer/wal/lancedb-storage.js +462 -0
  269. package/dist/indexer/wal/lancedb-storage.js.map +1 -0
  270. package/dist/indexer/wal/lancedb-storage.test.d.ts +8 -0
  271. package/dist/indexer/wal/lancedb-storage.test.d.ts.map +1 -0
  272. package/dist/indexer/wal/lancedb-storage.test.js +415 -0
  273. package/dist/indexer/wal/lancedb-storage.test.js.map +1 -0
  274. package/dist/indexer/wal/sync-wal.d.ts +144 -0
  275. package/dist/indexer/wal/sync-wal.d.ts.map +1 -0
  276. package/dist/indexer/wal/sync-wal.js +863 -0
  277. package/dist/indexer/wal/sync-wal.js.map +1 -0
  278. package/dist/indexer/wal/sync-wal.test.d.ts +8 -0
  279. package/dist/indexer/wal/sync-wal.test.d.ts.map +1 -0
  280. package/dist/indexer/wal/sync-wal.test.js +752 -0
  281. package/dist/indexer/wal/sync-wal.test.js.map +1 -0
  282. package/dist/indexer/wal/types.d.ts +167 -0
  283. package/dist/indexer/wal/types.d.ts.map +1 -0
  284. package/dist/indexer/wal/types.js +12 -0
  285. package/dist/indexer/wal/types.js.map +1 -0
  286. package/dist/indexer/watcher.d.ts +36 -0
  287. package/dist/indexer/watcher.d.ts.map +1 -0
  288. package/dist/indexer/watcher.js +110 -0
  289. package/dist/indexer/watcher.js.map +1 -0
  290. package/dist/search/explore.d.ts +62 -0
  291. package/dist/search/explore.d.ts.map +1 -0
  292. package/dist/search/explore.js +111 -0
  293. package/dist/search/explore.js.map +1 -0
  294. package/dist/search/fts.d.ts +23 -0
  295. package/dist/search/fts.d.ts.map +1 -0
  296. package/dist/search/fts.js +64 -0
  297. package/dist/search/fts.js.map +1 -0
  298. package/dist/search/fts.test.d.ts +2 -0
  299. package/dist/search/fts.test.d.ts.map +1 -0
  300. package/dist/search/fts.test.js +27 -0
  301. package/dist/search/fts.test.js.map +1 -0
  302. package/dist/search/grep.d.ts +75 -0
  303. package/dist/search/grep.d.ts.map +1 -0
  304. package/dist/search/grep.js +96 -0
  305. package/dist/search/grep.js.map +1 -0
  306. package/dist/search/grep.test.d.ts +2 -0
  307. package/dist/search/grep.test.d.ts.map +1 -0
  308. package/dist/search/grep.test.js +178 -0
  309. package/dist/search/grep.test.js.map +1 -0
  310. package/dist/search/hybrid-grep.d.ts +43 -0
  311. package/dist/search/hybrid-grep.d.ts.map +1 -0
  312. package/dist/search/hybrid-grep.js +130 -0
  313. package/dist/search/hybrid-grep.js.map +1 -0
  314. package/dist/search/hybrid-grep.test.d.ts +2 -0
  315. package/dist/search/hybrid-grep.test.d.ts.map +1 -0
  316. package/dist/search/hybrid-grep.test.js +133 -0
  317. package/dist/search/hybrid-grep.test.js.map +1 -0
  318. package/dist/search/rg-executor.d.ts +63 -0
  319. package/dist/search/rg-executor.d.ts.map +1 -0
  320. package/dist/search/rg-executor.js +146 -0
  321. package/dist/search/rg-executor.js.map +1 -0
  322. package/dist/search/rg-executor.test.d.ts +2 -0
  323. package/dist/search/rg-executor.test.d.ts.map +1 -0
  324. package/dist/search/rg-executor.test.js +104 -0
  325. package/dist/search/rg-executor.test.js.map +1 -0
  326. package/dist/search/rg-parser/extractor.d.ts +14 -0
  327. package/dist/search/rg-parser/extractor.d.ts.map +1 -0
  328. package/dist/search/rg-parser/extractor.js +82 -0
  329. package/dist/search/rg-parser/extractor.js.map +1 -0
  330. package/dist/search/rg-parser/extractor.test.d.ts +2 -0
  331. package/dist/search/rg-parser/extractor.test.d.ts.map +1 -0
  332. package/dist/search/rg-parser/extractor.test.js +35 -0
  333. package/dist/search/rg-parser/extractor.test.js.map +1 -0
  334. package/dist/search/rg-parser/fts-builder.d.ts +7 -0
  335. package/dist/search/rg-parser/fts-builder.d.ts.map +1 -0
  336. package/dist/search/rg-parser/fts-builder.js +18 -0
  337. package/dist/search/rg-parser/fts-builder.js.map +1 -0
  338. package/dist/search/rg-parser/fts-builder.test.d.ts +2 -0
  339. package/dist/search/rg-parser/fts-builder.test.d.ts.map +1 -0
  340. package/dist/search/rg-parser/fts-builder.test.js +26 -0
  341. package/dist/search/rg-parser/fts-builder.test.js.map +1 -0
  342. package/dist/search/rg-parser/index.d.ts +36 -0
  343. package/dist/search/rg-parser/index.d.ts.map +1 -0
  344. package/dist/search/rg-parser/index.js +83 -0
  345. package/dist/search/rg-parser/index.js.map +1 -0
  346. package/dist/search/rg-parser/index.test.d.ts +2 -0
  347. package/dist/search/rg-parser/index.test.d.ts.map +1 -0
  348. package/dist/search/rg-parser/index.test.js +34 -0
  349. package/dist/search/rg-parser/index.test.js.map +1 -0
  350. package/dist/search/rg-parser/strategy.d.ts +14 -0
  351. package/dist/search/rg-parser/strategy.d.ts.map +1 -0
  352. package/dist/search/rg-parser/strategy.js +31 -0
  353. package/dist/search/rg-parser/strategy.js.map +1 -0
  354. package/dist/search/rg-parser/strategy.test.d.ts +2 -0
  355. package/dist/search/rg-parser/strategy.test.d.ts.map +1 -0
  356. package/dist/search/rg-parser/strategy.test.js +29 -0
  357. package/dist/search/rg-parser/strategy.test.js.map +1 -0
  358. package/dist/types.d.ts +345 -0
  359. package/dist/types.d.ts.map +1 -0
  360. package/dist/types.js +7 -0
  361. package/dist/types.js.map +1 -0
  362. package/dist/utils/vault.d.ts +84 -0
  363. package/dist/utils/vault.d.ts.map +1 -0
  364. package/dist/utils/vault.js +138 -0
  365. package/dist/utils/vault.js.map +1 -0
  366. package/dist/utils/vault.test.d.ts +2 -0
  367. package/dist/utils/vault.test.d.ts.map +1 -0
  368. package/dist/utils/vault.test.js +153 -0
  369. package/dist/utils/vault.test.js.map +1 -0
  370. package/package.json +69 -0
@@ -0,0 +1,1096 @@
1
+ // Main Indexer class with dependency injection
2
+ // No singletons - all dependencies passed via constructor
3
+ import * as lancedb from '@lancedb/lancedb';
4
+ import { BooleanQuery, MatchQuery, Occur, PhraseQuery } from '@lancedb/lancedb';
5
+ import fs from 'node:fs/promises';
6
+ import path from 'node:path';
7
+ import crypto from 'node:crypto';
8
+ import { DEFAULT_RERANKING_CONFIG } from '../config/types.js';
9
+ import { parseNote, generateContentHash } from './parser.js';
10
+ import { chunkNote } from './chunker.js';
11
+ import { createError, isError } from '../types.js';
12
+ import { buildJsonStringArrayContainsAnyFilter, buildPathPrefixFilter, combineFilters, } from '../search/fts.js';
13
+ /**
14
+ * Indexer class - manages the vector index for notes.
15
+ * All dependencies are passed via constructor (no singletons).
16
+ */
17
+ export class Indexer {
18
+ config;
19
+ embeddingProvider;
20
+ rerankerProvider;
21
+ rerankingConfig;
22
+ vaultUtils;
23
+ chunkingConfig;
24
+ db = null;
25
+ chunksTable = null;
26
+ fileIndexTable = null;
27
+ initialized = false;
28
+ metaPath;
29
+ hasVectorIndex = false;
30
+ // Unified SyncWal (optional - when provided, uses unified recovery)
31
+ syncWal = null;
32
+ constructor(deps) {
33
+ this.config = deps.config;
34
+ this.embeddingProvider = deps.embeddingProvider;
35
+ this.rerankerProvider = deps.rerankerProvider ?? null;
36
+ this.rerankingConfig = deps.rerankingConfig ?? deps.config.reranking ?? DEFAULT_RERANKING_CONFIG;
37
+ this.vaultUtils = deps.vaultUtils;
38
+ this.chunkingConfig = deps.config.chunking;
39
+ const isRemoteDbPath = /^[a-z][a-z0-9+.-]*:\/\//i.test(deps.config.lancedb.dbPath);
40
+ if (isRemoteDbPath) {
41
+ const dbHash = crypto.createHash('sha1').update(deps.config.lancedb.dbPath).digest('hex').slice(0, 12);
42
+ this.metaPath = path.join(process.cwd(), '.lancedb-state', dbHash, 'meta.json');
43
+ }
44
+ else {
45
+ this.metaPath = path.join(deps.config.lancedb.dbPath, 'meta.json');
46
+ }
47
+ this.syncWal = deps.syncWal || null;
48
+ }
49
+ async loadMeta() {
50
+ try {
51
+ const content = await fs.readFile(this.metaPath, 'utf-8');
52
+ return JSON.parse(content);
53
+ }
54
+ catch {
55
+ return null;
56
+ }
57
+ }
58
+ async saveMeta() {
59
+ const modelName = this.embeddingProvider.getModelName();
60
+ const meta = {
61
+ model: modelName,
62
+ createdAt: Date.now(),
63
+ provider: this.config.embedding.provider,
64
+ dimensions: this.embeddingProvider.getDimensions(),
65
+ };
66
+ await fs.mkdir(path.dirname(this.metaPath), { recursive: true });
67
+ await fs.writeFile(this.metaPath, JSON.stringify(meta, null, 2));
68
+ }
69
+ async cleanupLegacyIndex() {
70
+ for (const vaultPath of this.config.vaultPaths) {
71
+ const legacyPath = path.join(vaultPath, '.index');
72
+ try {
73
+ await fs.access(legacyPath);
74
+ console.warn('[indexer] Removing legacy Vectra index at', legacyPath);
75
+ await fs.rm(legacyPath, { recursive: true });
76
+ }
77
+ catch {
78
+ // Directory doesn't exist
79
+ }
80
+ }
81
+ }
82
+ async initialize() {
83
+ if (this.initialized)
84
+ return;
85
+ await this.cleanupLegacyIndex();
86
+ // Initialize embedding provider
87
+ await this.embeddingProvider.initialize();
88
+ // Ensure local LanceDB directory exists (skip for remote URIs)
89
+ const isRemoteDbPath = /^[a-z][a-z0-9+.-]*:\/\//i.test(this.config.lancedb.dbPath);
90
+ if (!isRemoteDbPath) {
91
+ await fs.mkdir(this.config.lancedb.dbPath, { recursive: true });
92
+ }
93
+ // Connect to LanceDB
94
+ this.db = await lancedb.connect(this.config.lancedb.dbPath, {
95
+ storageOptions: this.config.lancedb.storageOptions,
96
+ });
97
+ const tableNames = await this.db.tableNames();
98
+ // Check if embedding config changed
99
+ const meta = await this.loadMeta();
100
+ const currentModel = this.embeddingProvider.getModelName();
101
+ const currentDimensions = this.embeddingProvider.getDimensions();
102
+ const needsRecreate = meta && (meta.model !== currentModel ||
103
+ meta.provider !== this.config.embedding.provider ||
104
+ meta.dimensions !== currentDimensions);
105
+ if (needsRecreate) {
106
+ console.warn('[indexer] Embedding config changed, recreating index...');
107
+ console.warn(` Previous: model=${meta.model}, provider=${meta.provider}, dimensions=${meta.dimensions}`);
108
+ console.warn(` Current: model=${currentModel}, provider=${this.config.embedding.provider}, dimensions=${currentDimensions}`);
109
+ if (tableNames.includes(this.config.lancedb.chunksTable)) {
110
+ await this.db.dropTable(this.config.lancedb.chunksTable);
111
+ }
112
+ if (tableNames.includes(this.config.lancedb.fileIndexTable)) {
113
+ await this.db.dropTable(this.config.lancedb.fileIndexTable);
114
+ }
115
+ }
116
+ if (tableNames.includes(this.config.lancedb.chunksTable)) {
117
+ this.chunksTable = await this.db.openTable(this.config.lancedb.chunksTable);
118
+ }
119
+ if (tableNames.includes(this.config.lancedb.fileIndexTable)) {
120
+ this.fileIndexTable = await this.db.openTable(this.config.lancedb.fileIndexTable);
121
+ }
122
+ await this.saveMeta();
123
+ this.initialized = true;
124
+ // Auto-create all tables at startup
125
+ await this.ensureAllTables();
126
+ }
127
+ /**
128
+ * Ensure all notes index tables exist with proper schema.
129
+ * Safe to call multiple times (idempotent).
130
+ */
131
+ async ensureAllTables() {
132
+ if (!this.initialized) {
133
+ await this.initialize();
134
+ return; // initialize() already called ensureAllTables()
135
+ }
136
+ await this.ensureChunksTable();
137
+ await this.ensureFileIndexTable();
138
+ }
139
+ async ensureChunksTable() {
140
+ if (this.chunksTable)
141
+ return this.chunksTable;
142
+ const sampleData = [{
143
+ id: '__init__',
144
+ vector: new Array(this.embeddingProvider.getDimensions()).fill(0),
145
+ path: '',
146
+ section: '',
147
+ title: '',
148
+ tags: '[]',
149
+ links: '[]',
150
+ modified: 0,
151
+ content_hash: '',
152
+ content: '',
153
+ }];
154
+ this.chunksTable = await this.db.createTable(this.config.lancedb.chunksTable, sampleData, { mode: 'overwrite' });
155
+ await this.chunksTable.delete("id = '__init__'");
156
+ return this.chunksTable;
157
+ }
158
+ async ensureFileIndexTable() {
159
+ if (this.fileIndexTable)
160
+ return this.fileIndexTable;
161
+ const sampleData = [{
162
+ path: '__init__',
163
+ mtime: 0,
164
+ content_hash: '',
165
+ chunk_ids: '[]',
166
+ indexed_at: 0,
167
+ }];
168
+ this.fileIndexTable = await this.db.createTable(this.config.lancedb.fileIndexTable, sampleData, { mode: 'overwrite' });
169
+ await this.fileIndexTable.delete("path = '__init__'");
170
+ return this.fileIndexTable;
171
+ }
172
+ async getFileRecord(filePath) {
173
+ const table = await this.ensureFileIndexTable();
174
+ const results = await table.query()
175
+ .where(`path = '${filePath.replace(/'/g, "''")}'`)
176
+ .toArray();
177
+ if (results.length === 0)
178
+ return null;
179
+ const r = results[0];
180
+ return {
181
+ path: r.path,
182
+ mtime: r.mtime,
183
+ content_hash: r.content_hash,
184
+ chunk_ids: r.chunk_ids,
185
+ indexed_at: r.indexed_at,
186
+ };
187
+ }
188
+ async getAllIndexedPaths() {
189
+ const table = await this.ensureFileIndexTable();
190
+ const results = await table.query().select(['path']).toArray();
191
+ return new Set(results.map(r => r.path));
192
+ }
193
+ async updateFileIndex(filePath, mtime, contentHash, chunkIds) {
194
+ const table = await this.ensureFileIndexTable();
195
+ await table.delete(`path = '${filePath.replace(/'/g, "''")}'`);
196
+ await table.add([{
197
+ path: filePath,
198
+ mtime,
199
+ content_hash: contentHash,
200
+ chunk_ids: JSON.stringify(chunkIds),
201
+ indexed_at: Date.now(),
202
+ }]);
203
+ }
204
+ async removeFromFileIndex(filePath) {
205
+ const table = await this.ensureFileIndexTable();
206
+ await table.delete(`path = '${filePath.replace(/'/g, "''")}'`);
207
+ }
208
+ async indexChunk(chunk, modified) {
209
+ const contentHash = generateContentHash(chunk.content);
210
+ const table = await this.ensureChunksTable();
211
+ const existingResult = await table.query()
212
+ .where(`id = '${chunk.id}'`)
213
+ .toArray();
214
+ if (existingResult.length > 0 && existingResult[0].content_hash === contentHash) {
215
+ return chunk.id;
216
+ }
217
+ const embedding = await this.embeddingProvider.getEmbedding(chunk.content);
218
+ if (isError(embedding)) {
219
+ return embedding;
220
+ }
221
+ if (existingResult.length > 0) {
222
+ await table.delete(`id = '${chunk.id}'`);
223
+ }
224
+ await table.add([{
225
+ id: chunk.id,
226
+ vector: embedding,
227
+ path: chunk.path,
228
+ section: chunk.section,
229
+ title: chunk.title,
230
+ tags: JSON.stringify(chunk.tags),
231
+ links: JSON.stringify(chunk.links),
232
+ modified,
233
+ content_hash: contentHash,
234
+ content: chunk.content,
235
+ }]);
236
+ return chunk.id;
237
+ }
238
+ async deleteChunksForFile(chunkIds) {
239
+ if (chunkIds.length === 0)
240
+ return;
241
+ const table = await this.ensureChunksTable();
242
+ for (const id of chunkIds) {
243
+ await table.delete(`id = '${id}'`);
244
+ }
245
+ }
246
+ async removeChunksForPath(filePath) {
247
+ const table = await this.ensureChunksTable();
248
+ await table.delete(`path = '${filePath.replace(/'/g, "''")}'`);
249
+ }
250
+ async createOrUpdateVectorIndex() {
251
+ const table = await this.ensureChunksTable();
252
+ const count = await table.countRows();
253
+ if (count < 10) {
254
+ return;
255
+ }
256
+ console.log(`[indexer] Creating/updating IVF_PQ vector index (${count} chunks)...`);
257
+ try {
258
+ await table.createIndex('vector', {
259
+ config: lancedb.Index.ivfPq({
260
+ numPartitions: Math.max(4, Math.min(256, Math.floor(count / 10))),
261
+ numSubVectors: 16,
262
+ }),
263
+ });
264
+ this.hasVectorIndex = true;
265
+ console.log('[indexer] Vector index created successfully');
266
+ }
267
+ catch (err) {
268
+ const msg = err.message;
269
+ if (!msg.includes('already exists')) {
270
+ console.warn('[indexer] Could not create vector index:', msg);
271
+ }
272
+ }
273
+ }
274
+ async createOrUpdateFtsIndex() {
275
+ if (!this.config.notesFts.enabled) {
276
+ return;
277
+ }
278
+ const table = await this.ensureChunksTable();
279
+ const count = await table.countRows();
280
+ if (count === 0) {
281
+ return;
282
+ }
283
+ const indexName = 'content_idx';
284
+ console.log(`[indexer] Creating/updating FTS index (${count} chunks)...`);
285
+ try {
286
+ await table.createIndex('content', {
287
+ config: lancedb.Index.fts({
288
+ withPosition: this.config.notesFts.withPosition,
289
+ baseTokenizer: this.config.notesFts.baseTokenizer,
290
+ language: this.config.notesFts.language,
291
+ maxTokenLength: this.config.notesFts.maxTokenLength,
292
+ lowercase: this.config.notesFts.lowercase,
293
+ stem: this.config.notesFts.stem,
294
+ removeStopWords: this.config.notesFts.removeStopWords,
295
+ asciiFolding: this.config.notesFts.asciiFolding,
296
+ }),
297
+ replace: true,
298
+ });
299
+ await table.waitForIndex([indexName], 60);
300
+ console.log('[indexer] FTS index created successfully');
301
+ }
302
+ catch (err) {
303
+ console.warn('[indexer] Could not create FTS index:', err.message);
304
+ }
305
+ }
306
+ normalizeSearchScores(results) {
307
+ if (results.length === 0)
308
+ return [];
309
+ const values = results.map((r) => r.score);
310
+ const min = Math.min(...values);
311
+ const max = Math.max(...values);
312
+ if (max === min)
313
+ return values.map(() => 1);
314
+ return values.map((v) => (v - min) / (max - min));
315
+ }
316
+ buildHybridResultKey(result) {
317
+ return `${result.path}::${result.section ?? ''}::${result.content.slice(0, 120)}`;
318
+ }
319
+ async search(query, options = {}) {
320
+ const { limit = this.config.search.defaultLimit, threshold = this.config.search.defaultThreshold, tags, folder, rerank = true, } = options;
321
+ await this.initialize();
322
+ const table = await this.ensureChunksTable();
323
+ const count = await table.countRows();
324
+ if (count === 0) {
325
+ return [];
326
+ }
327
+ const queryEmbedding = await this.embeddingProvider.getEmbedding(query, 'query');
328
+ if (isError(queryEmbedding)) {
329
+ return queryEmbedding;
330
+ }
331
+ const useReranking = Boolean(rerank &&
332
+ this.rerankerProvider &&
333
+ this.rerankingConfig.enabled &&
334
+ this.rerankingConfig.provider !== 'none');
335
+ const fetchLimit = useReranking
336
+ ? Math.max(this.rerankingConfig.topK, limit)
337
+ : limit * 2;
338
+ const vectorThreshold = useReranking
339
+ ? this.rerankingConfig.prerankThreshold
340
+ : threshold;
341
+ let searchQuery = table.search(queryEmbedding).limit(fetchLimit);
342
+ const folderFilter = folder
343
+ ? buildPathPrefixFilter('path', folder, { ensureTrailingSlash: true })
344
+ : null;
345
+ if (folderFilter) {
346
+ searchQuery = searchQuery.where(folderFilter);
347
+ }
348
+ const results = await searchQuery.toArray();
349
+ const processed = [];
350
+ for (const r of results) {
351
+ const distance = r._distance;
352
+ const score = 1 - (distance / 2);
353
+ if (score < vectorThreshold)
354
+ continue;
355
+ const itemTags = JSON.parse(r.tags);
356
+ const itemLinks = JSON.parse(r.links);
357
+ if (tags && tags.length > 0) {
358
+ if (!tags.some((tag) => itemTags.includes(tag))) {
359
+ continue;
360
+ }
361
+ }
362
+ processed.push({
363
+ path: r.path,
364
+ section: r.section,
365
+ score,
366
+ content: r.content,
367
+ title: r.title,
368
+ tags: itemTags,
369
+ links: itemLinks,
370
+ });
371
+ if (processed.length >= fetchLimit)
372
+ break;
373
+ }
374
+ if (processed.length === 0) {
375
+ return [];
376
+ }
377
+ if (useReranking && processed.length >= 5 && this.rerankerProvider) {
378
+ try {
379
+ if (!this.rerankerProvider.isInitialized()) {
380
+ await this.rerankerProvider.initialize();
381
+ }
382
+ const documents = processed.map((item) => item.content);
383
+ const reranked = await this.rerankerProvider.rerank(query, documents, {
384
+ topK: limit,
385
+ });
386
+ if (!isError(reranked)) {
387
+ const rerankedResults = reranked
388
+ .filter((rr) => rr.index >= 0 && rr.index < processed.length)
389
+ .map((rr) => ({
390
+ ...processed[rr.index],
391
+ score: rr.score,
392
+ _originalScore: processed[rr.index].score,
393
+ }))
394
+ .filter((result) => result.score >= threshold);
395
+ return rerankedResults.slice(0, limit);
396
+ }
397
+ console.warn('[indexer] Reranking failed, using vector scores:', reranked.message);
398
+ }
399
+ catch (error) {
400
+ console.warn('[indexer] Reranking error, falling back to vector scores:', error);
401
+ }
402
+ }
403
+ return processed
404
+ .filter((result) => result.score >= threshold)
405
+ .slice(0, limit);
406
+ }
407
+ async searchFts(options) {
408
+ await this.initialize();
409
+ const query = options.query.trim();
410
+ if (!query) {
411
+ return createError('INVALID_QUERY', 'Query must not be empty');
412
+ }
413
+ const table = await this.ensureChunksTable();
414
+ const count = await table.countRows();
415
+ if (count === 0) {
416
+ return [];
417
+ }
418
+ const limit = options.limit ?? this.config.search.defaultLimit;
419
+ const hasFuzzyOptions = options.fuzziness !== undefined ||
420
+ options.maxExpansions !== undefined ||
421
+ options.prefixLength !== undefined;
422
+ const phraseQuery = options.phraseMatch
423
+ ? new PhraseQuery(query, 'content', { slop: options.phraseSlop ?? 0 })
424
+ : null;
425
+ const fuzzyQuery = hasFuzzyOptions
426
+ ? new MatchQuery(query, 'content', {
427
+ fuzziness: options.fuzziness ?? 1,
428
+ maxExpansions: options.maxExpansions,
429
+ prefixLength: options.prefixLength,
430
+ })
431
+ : null;
432
+ let ftsQuery = query;
433
+ if (phraseQuery && fuzzyQuery) {
434
+ ftsQuery = new BooleanQuery([
435
+ [Occur.Should, phraseQuery],
436
+ [Occur.Should, fuzzyQuery],
437
+ ]);
438
+ }
439
+ else if (phraseQuery) {
440
+ ftsQuery = phraseQuery;
441
+ }
442
+ else if (fuzzyQuery) {
443
+ ftsQuery = fuzzyQuery;
444
+ }
445
+ let searchQuery = table.search(ftsQuery, 'fts', 'content').limit(limit);
446
+ const whereClause = combineFilters([
447
+ options.folder
448
+ ? buildPathPrefixFilter('path', options.folder, { ensureTrailingSlash: true })
449
+ : null,
450
+ buildJsonStringArrayContainsAnyFilter('tags', options.tags),
451
+ ]);
452
+ if (whereClause) {
453
+ searchQuery = searchQuery.where(whereClause);
454
+ }
455
+ const rows = await searchQuery.toArray();
456
+ const results = rows.map((r) => {
457
+ let parsedTags = [];
458
+ let parsedLinks = [];
459
+ try {
460
+ parsedTags = JSON.parse(r.tags || '[]');
461
+ }
462
+ catch {
463
+ parsedTags = [];
464
+ }
465
+ try {
466
+ parsedLinks = JSON.parse(r.links || '[]');
467
+ }
468
+ catch {
469
+ parsedLinks = [];
470
+ }
471
+ const ftsScore = typeof r._score === 'number' ? r._score : undefined;
472
+ const fallbackScore = typeof r._distance === 'number'
473
+ ? 1 - (r._distance / 2)
474
+ : 0;
475
+ return {
476
+ path: r.path,
477
+ section: r.section,
478
+ score: ftsScore ?? fallbackScore,
479
+ content: r.content,
480
+ title: r.title,
481
+ tags: parsedTags,
482
+ links: parsedLinks,
483
+ };
484
+ });
485
+ return results;
486
+ }
487
+ async searchHybrid(options) {
488
+ const query = options.query.trim();
489
+ if (!query) {
490
+ return createError('INVALID_QUERY', 'Query must not be empty');
491
+ }
492
+ const limit = options.limit ?? this.config.search.defaultLimit;
493
+ const threshold = options.threshold ?? this.config.search.defaultThreshold;
494
+ const semanticLimit = options.semanticLimit ?? Math.max(limit * 3, 20);
495
+ const ftsLimit = options.ftsLimit ?? Math.max(limit * 3, 20);
496
+ const vectorWeight = options.vectorWeight ?? 0.55;
497
+ const ftsWeight = options.ftsWeight ?? 0.45;
498
+ const rerankEnabled = options.rerank ?? true;
499
+ const [semanticResults, ftsResults] = await Promise.all([
500
+ this.search(query, {
501
+ limit: semanticLimit,
502
+ threshold: 0,
503
+ tags: options.tags,
504
+ folder: options.folder,
505
+ rerank: false,
506
+ }),
507
+ this.searchFts({
508
+ query,
509
+ limit: ftsLimit,
510
+ fuzziness: options.fuzziness,
511
+ maxExpansions: options.maxExpansions,
512
+ prefixLength: options.prefixLength,
513
+ phraseMatch: options.phraseMatch,
514
+ phraseSlop: options.phraseSlop,
515
+ folder: options.folder,
516
+ tags: options.tags,
517
+ }),
518
+ ]);
519
+ if (isError(semanticResults))
520
+ return semanticResults;
521
+ if (isError(ftsResults))
522
+ return ftsResults;
523
+ if (semanticResults.length === 0 && ftsResults.length === 0)
524
+ return [];
525
+ const semanticNorm = this.normalizeSearchScores(semanticResults);
526
+ const ftsNorm = this.normalizeSearchScores(ftsResults);
527
+ const merged = new Map();
528
+ for (let i = 0; i < semanticResults.length; i++) {
529
+ const item = semanticResults[i];
530
+ const key = this.buildHybridResultKey(item);
531
+ const existing = merged.get(key);
532
+ if (existing) {
533
+ existing.vector = Math.max(existing.vector, semanticNorm[i] ?? 0);
534
+ }
535
+ else {
536
+ merged.set(key, { item, vector: semanticNorm[i] ?? 0, fts: 0 });
537
+ }
538
+ }
539
+ for (let i = 0; i < ftsResults.length; i++) {
540
+ const item = ftsResults[i];
541
+ const key = this.buildHybridResultKey(item);
542
+ const existing = merged.get(key);
543
+ if (existing) {
544
+ existing.fts = Math.max(existing.fts, ftsNorm[i] ?? 0);
545
+ }
546
+ else {
547
+ merged.set(key, { item, vector: 0, fts: ftsNorm[i] ?? 0 });
548
+ }
549
+ }
550
+ const fused = Array.from(merged.values())
551
+ .map(({ item, vector, fts }) => {
552
+ const overlapBoost = vector > 0 && fts > 0 ? 0.05 : 0;
553
+ const score = Math.min(1, vectorWeight * vector + ftsWeight * fts + overlapBoost);
554
+ return { ...item, score };
555
+ })
556
+ .sort((a, b) => b.score - a.score);
557
+ const useReranking = Boolean(rerankEnabled &&
558
+ this.rerankerProvider &&
559
+ this.rerankingConfig.enabled &&
560
+ this.rerankingConfig.provider !== 'none');
561
+ if (useReranking && fused.length >= 5 && this.rerankerProvider) {
562
+ try {
563
+ if (!this.rerankerProvider.isInitialized()) {
564
+ await this.rerankerProvider.initialize();
565
+ }
566
+ const candidateLimit = Math.max(this.rerankingConfig.topK, limit);
567
+ const candidates = fused.slice(0, candidateLimit);
568
+ const reranked = await this.rerankerProvider.rerank(query, candidates.map((c) => c.content), { topK: limit });
569
+ if (!isError(reranked)) {
570
+ return reranked
571
+ .filter((rr) => rr.index >= 0 && rr.index < candidates.length)
572
+ .map((rr) => ({
573
+ ...candidates[rr.index],
574
+ score: rr.score,
575
+ _originalScore: candidates[rr.index].score,
576
+ }))
577
+ .filter((result) => result.score >= threshold)
578
+ .slice(0, limit);
579
+ }
580
+ console.warn('[indexer] Hybrid reranking failed, using fused scores:', reranked.message);
581
+ }
582
+ catch (error) {
583
+ console.warn('[indexer] Hybrid reranking error, using fused scores:', error);
584
+ }
585
+ }
586
+ return fused
587
+ .filter((result) => result.score >= threshold)
588
+ .slice(0, limit);
589
+ }
590
+ async reindexAll() {
591
+ await this.initialize();
592
+ const files = await this.vaultUtils.globVaults('**/*.md');
593
+ let indexed = 0;
594
+ let skipped = 0;
595
+ let deleted = 0;
596
+ const errors = [];
597
+ const indexedPaths = await this.getAllIndexedPaths();
598
+ const currentPaths = new Set();
599
+ for (const file of files) {
600
+ const relativePath = this.vaultUtils.toRelativePath(file);
601
+ currentPaths.add(relativePath);
602
+ try {
603
+ const stat = await fs.stat(file);
604
+ const existingRecord = await this.getFileRecord(relativePath);
605
+ if (existingRecord && existingRecord.mtime === stat.mtimeMs) {
606
+ skipped++;
607
+ continue;
608
+ }
609
+ const content = await fs.readFile(file, 'utf-8');
610
+ const fileHash = generateContentHash(content);
611
+ if (existingRecord && existingRecord.content_hash === fileHash) {
612
+ await this.updateFileIndex(relativePath, stat.mtimeMs, fileHash, JSON.parse(existingRecord.chunk_ids));
613
+ skipped++;
614
+ continue;
615
+ }
616
+ const note = parseNote(relativePath, content);
617
+ const chunks = chunkNote(note, this.chunkingConfig);
618
+ if (existingRecord) {
619
+ const oldChunkIds = JSON.parse(existingRecord.chunk_ids);
620
+ await this.deleteChunksForFile(oldChunkIds);
621
+ }
622
+ const newChunkIds = [];
623
+ for (const chunk of chunks) {
624
+ const result = await this.indexChunk(chunk, stat.mtimeMs);
625
+ if (isError(result)) {
626
+ errors.push(result);
627
+ }
628
+ else {
629
+ newChunkIds.push(result);
630
+ indexed++;
631
+ }
632
+ }
633
+ await this.updateFileIndex(relativePath, stat.mtimeMs, fileHash, newChunkIds);
634
+ }
635
+ catch (err) {
636
+ errors.push(createError('INDEX_FILE_ERROR', `Failed to index ${relativePath}`, {
637
+ error: err.message,
638
+ }));
639
+ }
640
+ }
641
+ for (const indexedPath of indexedPaths) {
642
+ if (!currentPaths.has(indexedPath)) {
643
+ const record = await this.getFileRecord(indexedPath);
644
+ if (record) {
645
+ const chunkIds = JSON.parse(record.chunk_ids);
646
+ await this.deleteChunksForFile(chunkIds);
647
+ await this.removeFromFileIndex(indexedPath);
648
+ deleted++;
649
+ }
650
+ }
651
+ }
652
+ await this.createOrUpdateVectorIndex();
653
+ await this.createOrUpdateFtsIndex();
654
+ return { indexed, skipped, deleted, errors };
655
+ }
656
+ /**
657
+ * Compute manifest hash from sorted file paths for recovery validation.
658
+ * Both notes and code use the same hash algorithm.
659
+ */
660
+ computeManifestHash(files) {
661
+ const sortedPaths = files.map(f => this.vaultUtils.toRelativePath(f)).sort();
662
+ const content = sortedPaths.join('\n');
663
+ return crypto.createHash('sha256').update(content).digest('hex');
664
+ }
665
+ /**
666
+ * Reindex all notes using unified SyncWal for recovery.
667
+ * Uses offset-based recovery (deterministic file ordering).
668
+ *
669
+ * @param recovery Optional recovery plan from SyncWal
670
+ * @returns Result with counts
671
+ */
672
+ async reindexAllWithSyncWal(recovery) {
673
+ if (!this.syncWal) {
674
+ throw new Error('SyncWal not provided - use reindexAll() instead');
675
+ }
676
+ await this.initialize();
677
+ // Get all files sorted deterministically
678
+ let files = await this.vaultUtils.globVaults('**/*.md');
679
+ files = files.sort((a, b) => a.localeCompare(b));
680
+ // Compute manifest hash for recovery validation
681
+ const manifestHash = this.computeManifestHash(files);
682
+ // Validate recovery plan
683
+ let skipCount = 0;
684
+ if (recovery) {
685
+ if (recovery.manifestHash === manifestHash) {
686
+ skipCount = recovery.skipCount;
687
+ console.log(`[indexer] Resuming from file ${skipCount}/${files.length}`);
688
+ }
689
+ else {
690
+ console.warn('[indexer] Manifest hash mismatch, starting full reindex');
691
+ }
692
+ }
693
+ // Start task (acquires lock, starts heartbeat)
694
+ const taskId = await this.syncWal.startTask('notes', files.length, manifestHash);
695
+ let indexed = 0;
696
+ let skipped = 0;
697
+ let deleted = 0;
698
+ const errors = [];
699
+ const indexedPaths = await this.getAllIndexedPaths();
700
+ const currentPaths = new Set();
701
+ try {
702
+ for (let i = skipCount; i < files.length; i++) {
703
+ // Check cancellation
704
+ if (this.syncWal.isCancelled(taskId)) {
705
+ await this.syncWal.cancelTask(taskId);
706
+ return { indexed, skipped, deleted, errors };
707
+ }
708
+ // Check pause - wait until resumed or cancelled
709
+ const shouldContinue = await this.syncWal.waitWhilePaused(taskId);
710
+ if (!shouldContinue) {
711
+ // Cancelled while paused
712
+ await this.syncWal.cancelTask(taskId);
713
+ return { indexed, skipped, deleted, errors };
714
+ }
715
+ const file = files[i];
716
+ const relativePath = this.vaultUtils.toRelativePath(file);
717
+ currentPaths.add(relativePath);
718
+ try {
719
+ const stat = await fs.stat(file);
720
+ const existingRecord = await this.getFileRecord(relativePath);
721
+ if (existingRecord && existingRecord.mtime === stat.mtimeMs) {
722
+ skipped++;
723
+ await this.syncWal.fileProcessed(taskId, relativePath, 0, 'updated');
724
+ continue;
725
+ }
726
+ const content = await fs.readFile(file, 'utf-8');
727
+ const fileHash = generateContentHash(content);
728
+ if (existingRecord && existingRecord.content_hash === fileHash) {
729
+ await this.updateFileIndex(relativePath, stat.mtimeMs, fileHash, JSON.parse(existingRecord.chunk_ids));
730
+ skipped++;
731
+ await this.syncWal.fileProcessed(taskId, relativePath, 0, 'updated');
732
+ continue;
733
+ }
734
+ const note = parseNote(relativePath, content);
735
+ const chunks = chunkNote(note, this.chunkingConfig);
736
+ if (existingRecord) {
737
+ const oldChunkIds = JSON.parse(existingRecord.chunk_ids);
738
+ await this.deleteChunksForFile(oldChunkIds);
739
+ }
740
+ const newChunkIds = [];
741
+ for (const chunk of chunks) {
742
+ const result = await this.indexChunk(chunk, stat.mtimeMs);
743
+ if (isError(result)) {
744
+ errors.push(result);
745
+ }
746
+ else {
747
+ newChunkIds.push(result);
748
+ indexed++;
749
+ }
750
+ }
751
+ await this.updateFileIndex(relativePath, stat.mtimeMs, fileHash, newChunkIds);
752
+ await this.syncWal.fileProcessed(taskId, relativePath, newChunkIds.length, existingRecord ? 'updated' : 'added');
753
+ }
754
+ catch (err) {
755
+ const error = createError('INDEX_FILE_ERROR', `Failed to index ${relativePath}`, {
756
+ error: err.message,
757
+ });
758
+ errors.push(error);
759
+ await this.syncWal.fileFailed(taskId, relativePath, err.message, 0);
760
+ }
761
+ }
762
+ // Handle deleted files
763
+ this.syncWal.setPhase('deleting');
764
+ for (const indexedPath of indexedPaths) {
765
+ if (!currentPaths.has(indexedPath)) {
766
+ const record = await this.getFileRecord(indexedPath);
767
+ if (record) {
768
+ const chunkIds = JSON.parse(record.chunk_ids);
769
+ await this.deleteChunksForFile(chunkIds);
770
+ await this.removeFromFileIndex(indexedPath);
771
+ deleted++;
772
+ }
773
+ }
774
+ }
775
+ await this.createOrUpdateVectorIndex();
776
+ await this.createOrUpdateFtsIndex();
777
+ // Complete task
778
+ await this.syncWal.completeTask(taskId);
779
+ }
780
+ catch (err) {
781
+ await this.syncWal.failTask(taskId, err.message);
782
+ throw err;
783
+ }
784
+ return { indexed, skipped, deleted, errors };
785
+ }
786
+ async rebuildFtsIndex() {
787
+ await this.initialize();
788
+ await this.createOrUpdateFtsIndex();
789
+ }
790
+ /**
791
+ * Set the SyncWal instance for unified recovery.
792
+ * Should be called when SyncWal is created after Indexer construction.
793
+ */
794
+ setSyncWal(syncWal) {
795
+ this.syncWal = syncWal;
796
+ }
797
+ /**
798
+ * Get the SyncWal instance.
799
+ */
800
+ getSyncWal() {
801
+ return this.syncWal;
802
+ }
803
+ /**
804
+ * Check if SyncWal is configured.
805
+ */
806
+ hasSyncWal() {
807
+ return this.syncWal !== null;
808
+ }
809
+ /**
810
+ * Clear all indexed data (chunks and file index).
811
+ * Used for force reindex.
812
+ */
813
+ async clearIndex() {
814
+ await this.initialize();
815
+ if (!this.db)
816
+ return;
817
+ const tableNames = await this.db.tableNames();
818
+ if (tableNames.includes(this.config.lancedb.chunksTable)) {
819
+ await this.db.dropTable(this.config.lancedb.chunksTable);
820
+ this.chunksTable = null;
821
+ }
822
+ if (tableNames.includes(this.config.lancedb.fileIndexTable)) {
823
+ await this.db.dropTable(this.config.lancedb.fileIndexTable);
824
+ this.fileIndexTable = null;
825
+ }
826
+ }
827
+ async indexFile(filePath) {
828
+ await this.initialize();
829
+ let absolutePath;
830
+ if (path.isAbsolute(filePath)) {
831
+ absolutePath = filePath;
832
+ }
833
+ else {
834
+ const resolved = this.vaultUtils.resolveNotePath(filePath);
835
+ if (!resolved) {
836
+ return createError('FILE_NOT_FOUND', `File not found in any vault: ${filePath}`);
837
+ }
838
+ absolutePath = resolved;
839
+ }
840
+ const relativePath = this.vaultUtils.toRelativePath(absolutePath);
841
+ try {
842
+ const content = await fs.readFile(absolutePath, 'utf-8');
843
+ const stat = await fs.stat(absolutePath);
844
+ const note = parseNote(relativePath, content);
845
+ const chunks = chunkNote(note, this.chunkingConfig);
846
+ const fileHash = generateContentHash(content);
847
+ await this.removeChunksForPath(relativePath);
848
+ const chunkIds = [];
849
+ for (const chunk of chunks) {
850
+ const result = await this.indexChunk(chunk, stat.mtimeMs);
851
+ if (isError(result))
852
+ return result;
853
+ chunkIds.push(result);
854
+ }
855
+ await this.updateFileIndex(relativePath, stat.mtimeMs, fileHash, chunkIds);
856
+ }
857
+ catch (err) {
858
+ return createError('INDEX_FILE_ERROR', `Failed to index ${relativePath}`, {
859
+ error: err.message,
860
+ });
861
+ }
862
+ }
863
+ async removeFile(filePath) {
864
+ const relativePath = path.isAbsolute(filePath)
865
+ ? this.vaultUtils.toRelativePath(filePath)
866
+ : filePath;
867
+ const record = await this.getFileRecord(relativePath);
868
+ if (record) {
869
+ const chunkIds = JSON.parse(record.chunk_ids);
870
+ await this.deleteChunksForFile(chunkIds);
871
+ await this.removeFromFileIndex(relativePath);
872
+ }
873
+ else {
874
+ await this.removeChunksForPath(relativePath);
875
+ }
876
+ }
877
+ /**
878
+ * Reindex all notes with detailed profiling.
879
+ * Forces full reindex (ignores mtime/content_hash checks) to get accurate timing.
880
+ */
881
+ async reindexWithProfiling() {
882
+ const totalStart = performance.now();
883
+ await this.initialize();
884
+ const errors = [];
885
+ // ============================================================
886
+ // PHASE 1: PARSING
887
+ // ============================================================
888
+ const parsingStart = performance.now();
889
+ const files = await this.vaultUtils.globVaults('**/*.md');
890
+ const allChunks = [];
891
+ let filesProcessed = 0;
892
+ for (const file of files) {
893
+ const relativePath = this.vaultUtils.toRelativePath(file);
894
+ try {
895
+ const stat = await fs.stat(file);
896
+ const content = await fs.readFile(file, 'utf-8');
897
+ const fileHash = generateContentHash(content);
898
+ const note = parseNote(relativePath, content);
899
+ const chunks = chunkNote(note, this.chunkingConfig);
900
+ for (const chunk of chunks) {
901
+ allChunks.push({
902
+ chunk,
903
+ mtime: stat.mtimeMs,
904
+ filePath: relativePath,
905
+ fileHash,
906
+ });
907
+ }
908
+ filesProcessed++;
909
+ }
910
+ catch (err) {
911
+ errors.push(createError('INDEX_FILE_ERROR', `Failed to parse ${relativePath}`, {
912
+ error: err.message,
913
+ }));
914
+ }
915
+ }
916
+ const parsingTimeMs = performance.now() - parsingStart;
917
+ // ============================================================
918
+ // PHASE 2: EMBEDDING
919
+ // ============================================================
920
+ const embeddingStart = performance.now();
921
+ // Extract all texts for batch embedding
922
+ const texts = allChunks.map((item) => item.chunk.content);
923
+ const batchSize = this.config.embedding.provider === 'voyage'
924
+ ? this.config.embedding.voyageBatchSize
925
+ : this.config.embedding.provider === 'ollama' || this.config.embedding.provider === 'localai'
926
+ ? 128
927
+ : 32;
928
+ const batchCount = Math.ceil(texts.length / batchSize);
929
+ let embeddings = [];
930
+ if (texts.length > 0) {
931
+ const result = await this.embeddingProvider.getEmbeddingsBatch(texts, batchSize);
932
+ if (isError(result)) {
933
+ errors.push(result);
934
+ }
935
+ else {
936
+ embeddings = result;
937
+ }
938
+ }
939
+ const embeddingTimeMs = performance.now() - embeddingStart;
940
+ const chunksEmbedded = embeddings.length;
941
+ // ============================================================
942
+ // PHASE 3: DB WRITE
943
+ // ============================================================
944
+ const dbWriteStart = performance.now();
945
+ // Clear existing data for full reindex
946
+ const table = await this.ensureChunksTable();
947
+ const fileTable = await this.ensureFileIndexTable();
948
+ // Delete all existing records
949
+ try {
950
+ await table.delete('id IS NOT NULL');
951
+ }
952
+ catch {
953
+ // Table might be empty
954
+ }
955
+ try {
956
+ await fileTable.delete('path IS NOT NULL');
957
+ }
958
+ catch {
959
+ // Table might be empty
960
+ }
961
+ // Prepare records for bulk insert
962
+ const chunkRecords = [];
963
+ const fileIndexMap = new Map();
964
+ for (let i = 0; i < allChunks.length; i++) {
965
+ const { chunk, mtime, filePath, fileHash } = allChunks[i];
966
+ const embedding = embeddings[i];
967
+ if (!embedding)
968
+ continue; // Skip if embedding failed
969
+ const contentHash = generateContentHash(chunk.content);
970
+ chunkRecords.push({
971
+ id: chunk.id,
972
+ vector: embedding,
973
+ path: chunk.path,
974
+ section: chunk.section,
975
+ title: chunk.title,
976
+ tags: JSON.stringify(chunk.tags),
977
+ links: JSON.stringify(chunk.links),
978
+ modified: mtime,
979
+ content_hash: contentHash,
980
+ content: chunk.content,
981
+ });
982
+ // Track for file index
983
+ if (!fileIndexMap.has(filePath)) {
984
+ fileIndexMap.set(filePath, { mtime, fileHash, chunkIds: [] });
985
+ }
986
+ fileIndexMap.get(filePath).chunkIds.push(chunk.id);
987
+ }
988
+ // Bulk insert chunks
989
+ let chunksWritten = 0;
990
+ if (chunkRecords.length > 0) {
991
+ await table.add(chunkRecords);
992
+ chunksWritten = chunkRecords.length;
993
+ }
994
+ // Insert file index records
995
+ const fileRecords = [];
996
+ for (const [filePath, data] of fileIndexMap) {
997
+ fileRecords.push({
998
+ path: filePath,
999
+ mtime: data.mtime,
1000
+ content_hash: data.fileHash,
1001
+ chunk_ids: JSON.stringify(data.chunkIds),
1002
+ indexed_at: Date.now(),
1003
+ });
1004
+ }
1005
+ if (fileRecords.length > 0) {
1006
+ await fileTable.add(fileRecords);
1007
+ }
1008
+ const dbWriteTimeMs = performance.now() - dbWriteStart;
1009
+ // ============================================================
1010
+ // PHASE 4: VECTOR INDEX
1011
+ // ============================================================
1012
+ const vectorIndexStart = performance.now();
1013
+ await this.createOrUpdateVectorIndex();
1014
+ await this.createOrUpdateFtsIndex();
1015
+ const vectorIndexTimeMs = performance.now() - vectorIndexStart;
1016
+ const totalTimeMs = performance.now() - totalStart;
1017
+ const stats = {
1018
+ totalTimeMs,
1019
+ phases: {
1020
+ parsing: {
1021
+ timeMs: parsingTimeMs,
1022
+ filesProcessed,
1023
+ chunksGenerated: allChunks.length,
1024
+ },
1025
+ embedding: {
1026
+ timeMs: embeddingTimeMs,
1027
+ chunksEmbedded,
1028
+ batchCount,
1029
+ },
1030
+ dbWrite: {
1031
+ timeMs: dbWriteTimeMs,
1032
+ chunksWritten,
1033
+ },
1034
+ vectorIndex: {
1035
+ timeMs: vectorIndexTimeMs,
1036
+ },
1037
+ },
1038
+ };
1039
+ return { stats, indexed: chunksWritten, errors };
1040
+ }
1041
+ /**
1042
+ * Get index statistics.
1043
+ */
1044
+ async getStats() {
1045
+ await this.initialize();
1046
+ const chunksTable = await this.ensureChunksTable();
1047
+ const fileTable = await this.ensureFileIndexTable();
1048
+ const totalChunks = await chunksTable.countRows();
1049
+ const totalFiles = await fileTable.countRows();
1050
+ return { totalChunks, totalFiles };
1051
+ }
1052
+ /**
1053
+ * Get all indexed files with their metadata.
1054
+ * Returns a Map of relative file path to index info.
1055
+ */
1056
+ async getIndexedFiles() {
1057
+ await this.initialize();
1058
+ const table = await this.ensureFileIndexTable();
1059
+ const results = await table.query().toArray();
1060
+ const map = new Map();
1061
+ for (const r of results) {
1062
+ const path = r.path;
1063
+ const chunk_ids = r.chunk_ids;
1064
+ const chunks = chunk_ids ? JSON.parse(chunk_ids).length : 0;
1065
+ map.set(path, {
1066
+ chunks,
1067
+ indexed_at: r.indexed_at,
1068
+ content_hash: r.content_hash,
1069
+ });
1070
+ }
1071
+ return map;
1072
+ }
1073
+ /**
1074
+ * Get the config for this indexer.
1075
+ */
1076
+ getConfig() {
1077
+ return this.config;
1078
+ }
1079
+ /**
1080
+ * Get the embedding provider for this indexer.
1081
+ */
1082
+ getEmbeddingProvider() {
1083
+ return this.embeddingProvider;
1084
+ }
1085
+ /**
1086
+ * Get the vault utils for this indexer.
1087
+ */
1088
+ getVaultUtils() {
1089
+ return this.vaultUtils;
1090
+ }
1091
+ }
1092
+ // Re-export everything from submodules
1093
+ export { parseNote, extractLinks, extractHashtags, extractTitle, generateContentHash } from './parser.js';
1094
+ export { chunkNote } from './chunker.js';
1095
+ export { WasmEmbeddingProvider, VoyageEmbeddingProvider, OllamaEmbeddingProvider, createEmbeddingProvider, } from './embeddings.js';
1096
+ //# sourceMappingURL=index.js.map