grepmind-core 0.1.0-alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (370) hide show
  1. package/LICENSE +190 -0
  2. package/dist/config/types.d.ts +174 -0
  3. package/dist/config/types.d.ts.map +1 -0
  4. package/dist/config/types.js +137 -0
  5. package/dist/config/types.js.map +1 -0
  6. package/dist/git.d.ts +98 -0
  7. package/dist/git.d.ts.map +1 -0
  8. package/dist/git.js +298 -0
  9. package/dist/git.js.map +1 -0
  10. package/dist/git.test.d.ts +7 -0
  11. package/dist/git.test.d.ts.map +1 -0
  12. package/dist/git.test.js +242 -0
  13. package/dist/git.test.js.map +1 -0
  14. package/dist/index.d.ts +44 -0
  15. package/dist/index.d.ts.map +1 -0
  16. package/dist/index.js +67 -0
  17. package/dist/index.js.map +1 -0
  18. package/dist/indexer/branch.d.ts +121 -0
  19. package/dist/indexer/branch.d.ts.map +1 -0
  20. package/dist/indexer/branch.js +451 -0
  21. package/dist/indexer/branch.js.map +1 -0
  22. package/dist/indexer/chunker.d.ts +9 -0
  23. package/dist/indexer/chunker.d.ts.map +1 -0
  24. package/dist/indexer/chunker.js +70 -0
  25. package/dist/indexer/chunker.js.map +1 -0
  26. package/dist/indexer/chunker.test.d.ts +2 -0
  27. package/dist/indexer/chunker.test.d.ts.map +1 -0
  28. package/dist/indexer/chunker.test.js +180 -0
  29. package/dist/indexer/chunker.test.js.map +1 -0
  30. package/dist/indexer/code/branch.d.ts +155 -0
  31. package/dist/indexer/code/branch.d.ts.map +1 -0
  32. package/dist/indexer/code/branch.js +550 -0
  33. package/dist/indexer/code/branch.js.map +1 -0
  34. package/dist/indexer/code/branch.test.d.ts +7 -0
  35. package/dist/indexer/code/branch.test.d.ts.map +1 -0
  36. package/dist/indexer/code/branch.test.js +241 -0
  37. package/dist/indexer/code/branch.test.js.map +1 -0
  38. package/dist/indexer/code/chunker.d.ts +61 -0
  39. package/dist/indexer/code/chunker.d.ts.map +1 -0
  40. package/dist/indexer/code/chunker.js +311 -0
  41. package/dist/indexer/code/chunker.js.map +1 -0
  42. package/dist/indexer/code/chunker.test.d.ts +2 -0
  43. package/dist/indexer/code/chunker.test.d.ts.map +1 -0
  44. package/dist/indexer/code/chunker.test.js +552 -0
  45. package/dist/indexer/code/chunker.test.js.map +1 -0
  46. package/dist/indexer/code/fts.test.d.ts +2 -0
  47. package/dist/indexer/code/fts.test.d.ts.map +1 -0
  48. package/dist/indexer/code/fts.test.js +14 -0
  49. package/dist/indexer/code/fts.test.js.map +1 -0
  50. package/dist/indexer/code/graph/embedded.d.ts +11 -0
  51. package/dist/indexer/code/graph/embedded.d.ts.map +1 -0
  52. package/dist/indexer/code/graph/embedded.js +152 -0
  53. package/dist/indexer/code/graph/embedded.js.map +1 -0
  54. package/dist/indexer/code/graph/embedded.test.d.ts +2 -0
  55. package/dist/indexer/code/graph/embedded.test.d.ts.map +1 -0
  56. package/dist/indexer/code/graph/embedded.test.js +105 -0
  57. package/dist/indexer/code/graph/embedded.test.js.map +1 -0
  58. package/dist/indexer/code/graph/facts.d.ts +11 -0
  59. package/dist/indexer/code/graph/facts.d.ts.map +1 -0
  60. package/dist/indexer/code/graph/facts.js +456 -0
  61. package/dist/indexer/code/graph/facts.js.map +1 -0
  62. package/dist/indexer/code/graph/facts.test.d.ts +2 -0
  63. package/dist/indexer/code/graph/facts.test.d.ts.map +1 -0
  64. package/dist/indexer/code/graph/facts.test.js +181 -0
  65. package/dist/indexer/code/graph/facts.test.js.map +1 -0
  66. package/dist/indexer/code/graph/id.d.ts +14 -0
  67. package/dist/indexer/code/graph/id.d.ts.map +1 -0
  68. package/dist/indexer/code/graph/id.js +40 -0
  69. package/dist/indexer/code/graph/id.js.map +1 -0
  70. package/dist/indexer/code/graph/id.test.d.ts +2 -0
  71. package/dist/indexer/code/graph/id.test.d.ts.map +1 -0
  72. package/dist/indexer/code/graph/id.test.js +86 -0
  73. package/dist/indexer/code/graph/id.test.js.map +1 -0
  74. package/dist/indexer/code/graph/index.d.ts +133 -0
  75. package/dist/indexer/code/graph/index.d.ts.map +1 -0
  76. package/dist/indexer/code/graph/index.js +1876 -0
  77. package/dist/indexer/code/graph/index.js.map +1 -0
  78. package/dist/indexer/code/graph/index.test.d.ts +2 -0
  79. package/dist/indexer/code/graph/index.test.d.ts.map +1 -0
  80. package/dist/indexer/code/graph/index.test.js +210 -0
  81. package/dist/indexer/code/graph/index.test.js.map +1 -0
  82. package/dist/indexer/code/graph/queries.d.ts +22 -0
  83. package/dist/indexer/code/graph/queries.d.ts.map +1 -0
  84. package/dist/indexer/code/graph/queries.js +79 -0
  85. package/dist/indexer/code/graph/queries.js.map +1 -0
  86. package/dist/indexer/code/graph/queries.test.d.ts +2 -0
  87. package/dist/indexer/code/graph/queries.test.d.ts.map +1 -0
  88. package/dist/indexer/code/graph/queries.test.js +108 -0
  89. package/dist/indexer/code/graph/queries.test.js.map +1 -0
  90. package/dist/indexer/code/graph/resolver.d.ts +136 -0
  91. package/dist/indexer/code/graph/resolver.d.ts.map +1 -0
  92. package/dist/indexer/code/graph/resolver.js +839 -0
  93. package/dist/indexer/code/graph/resolver.js.map +1 -0
  94. package/dist/indexer/code/graph/resolver.test.d.ts +2 -0
  95. package/dist/indexer/code/graph/resolver.test.d.ts.map +1 -0
  96. package/dist/indexer/code/graph/resolver.test.js +482 -0
  97. package/dist/indexer/code/graph/resolver.test.js.map +1 -0
  98. package/dist/indexer/code/graph/semantic.d.ts +33 -0
  99. package/dist/indexer/code/graph/semantic.d.ts.map +1 -0
  100. package/dist/indexer/code/graph/semantic.js +279 -0
  101. package/dist/indexer/code/graph/semantic.js.map +1 -0
  102. package/dist/indexer/code/graph/semantic.test.d.ts +2 -0
  103. package/dist/indexer/code/graph/semantic.test.d.ts.map +1 -0
  104. package/dist/indexer/code/graph/semantic.test.js +127 -0
  105. package/dist/indexer/code/graph/semantic.test.js.map +1 -0
  106. package/dist/indexer/code/index.d.ts +404 -0
  107. package/dist/indexer/code/index.d.ts.map +1 -0
  108. package/dist/indexer/code/index.js +2070 -0
  109. package/dist/indexer/code/index.js.map +1 -0
  110. package/dist/indexer/code/languages/bash.d.ts +14 -0
  111. package/dist/indexer/code/languages/bash.d.ts.map +1 -0
  112. package/dist/indexer/code/languages/bash.js +125 -0
  113. package/dist/indexer/code/languages/bash.js.map +1 -0
  114. package/dist/indexer/code/languages/css.d.ts +16 -0
  115. package/dist/indexer/code/languages/css.d.ts.map +1 -0
  116. package/dist/indexer/code/languages/css.js +204 -0
  117. package/dist/indexer/code/languages/css.js.map +1 -0
  118. package/dist/indexer/code/languages/generic.d.ts +61 -0
  119. package/dist/indexer/code/languages/generic.d.ts.map +1 -0
  120. package/dist/indexer/code/languages/generic.js +150 -0
  121. package/dist/indexer/code/languages/generic.js.map +1 -0
  122. package/dist/indexer/code/languages/graphql.d.ts +13 -0
  123. package/dist/indexer/code/languages/graphql.d.ts.map +1 -0
  124. package/dist/indexer/code/languages/graphql.js +180 -0
  125. package/dist/indexer/code/languages/graphql.js.map +1 -0
  126. package/dist/indexer/code/languages/html.d.ts +16 -0
  127. package/dist/indexer/code/languages/html.d.ts.map +1 -0
  128. package/dist/indexer/code/languages/html.js +138 -0
  129. package/dist/indexer/code/languages/html.js.map +1 -0
  130. package/dist/indexer/code/languages/index.d.ts +9 -0
  131. package/dist/indexer/code/languages/index.d.ts.map +1 -0
  132. package/dist/indexer/code/languages/index.js +12 -0
  133. package/dist/indexer/code/languages/index.js.map +1 -0
  134. package/dist/indexer/code/languages/json.d.ts +12 -0
  135. package/dist/indexer/code/languages/json.d.ts.map +1 -0
  136. package/dist/indexer/code/languages/json.js +66 -0
  137. package/dist/indexer/code/languages/json.js.map +1 -0
  138. package/dist/indexer/code/languages/registry.d.ts +78 -0
  139. package/dist/indexer/code/languages/registry.d.ts.map +1 -0
  140. package/dist/indexer/code/languages/registry.js +72 -0
  141. package/dist/indexer/code/languages/registry.js.map +1 -0
  142. package/dist/indexer/code/languages/typescript.d.ts +39 -0
  143. package/dist/indexer/code/languages/typescript.d.ts.map +1 -0
  144. package/dist/indexer/code/languages/typescript.js +300 -0
  145. package/dist/indexer/code/languages/typescript.js.map +1 -0
  146. package/dist/indexer/code/languages/yaml.d.ts +13 -0
  147. package/dist/indexer/code/languages/yaml.d.ts.map +1 -0
  148. package/dist/indexer/code/languages/yaml.js +90 -0
  149. package/dist/indexer/code/languages/yaml.js.map +1 -0
  150. package/dist/indexer/code/parser.d.ts +26 -0
  151. package/dist/indexer/code/parser.d.ts.map +1 -0
  152. package/dist/indexer/code/parser.js +332 -0
  153. package/dist/indexer/code/parser.js.map +1 -0
  154. package/dist/indexer/code/retry.d.ts +58 -0
  155. package/dist/indexer/code/retry.d.ts.map +1 -0
  156. package/dist/indexer/code/retry.js +192 -0
  157. package/dist/indexer/code/retry.js.map +1 -0
  158. package/dist/indexer/code/tree/builder.d.ts +30 -0
  159. package/dist/indexer/code/tree/builder.d.ts.map +1 -0
  160. package/dist/indexer/code/tree/builder.js +132 -0
  161. package/dist/indexer/code/tree/builder.js.map +1 -0
  162. package/dist/indexer/code/tree/builder.test.d.ts +2 -0
  163. package/dist/indexer/code/tree/builder.test.d.ts.map +1 -0
  164. package/dist/indexer/code/tree/builder.test.js +31 -0
  165. package/dist/indexer/code/tree/builder.test.js.map +1 -0
  166. package/dist/indexer/code/tree/cache.d.ts +22 -0
  167. package/dist/indexer/code/tree/cache.d.ts.map +1 -0
  168. package/dist/indexer/code/tree/cache.js +85 -0
  169. package/dist/indexer/code/tree/cache.js.map +1 -0
  170. package/dist/indexer/code/tree/context.d.ts +32 -0
  171. package/dist/indexer/code/tree/context.d.ts.map +1 -0
  172. package/dist/indexer/code/tree/context.js +78 -0
  173. package/dist/indexer/code/tree/context.js.map +1 -0
  174. package/dist/indexer/code/tree/embedding.d.ts +9 -0
  175. package/dist/indexer/code/tree/embedding.d.ts.map +1 -0
  176. package/dist/indexer/code/tree/embedding.js +53 -0
  177. package/dist/indexer/code/tree/embedding.js.map +1 -0
  178. package/dist/indexer/code/tree/embedding.test.d.ts +2 -0
  179. package/dist/indexer/code/tree/embedding.test.d.ts.map +1 -0
  180. package/dist/indexer/code/tree/embedding.test.js +57 -0
  181. package/dist/indexer/code/tree/embedding.test.js.map +1 -0
  182. package/dist/indexer/code/tree/id.d.ts +3 -0
  183. package/dist/indexer/code/tree/id.d.ts.map +1 -0
  184. package/dist/indexer/code/tree/id.js +8 -0
  185. package/dist/indexer/code/tree/id.js.map +1 -0
  186. package/dist/indexer/code/tree/index.d.ts +113 -0
  187. package/dist/indexer/code/tree/index.d.ts.map +1 -0
  188. package/dist/indexer/code/tree/index.js +1146 -0
  189. package/dist/indexer/code/tree/index.js.map +1 -0
  190. package/dist/indexer/code/tree/rename.d.ts +13 -0
  191. package/dist/indexer/code/tree/rename.d.ts.map +1 -0
  192. package/dist/indexer/code/tree/rename.js +46 -0
  193. package/dist/indexer/code/tree/rename.js.map +1 -0
  194. package/dist/indexer/code/tree/repomap.d.ts +29 -0
  195. package/dist/indexer/code/tree/repomap.d.ts.map +1 -0
  196. package/dist/indexer/code/tree/repomap.js +95 -0
  197. package/dist/indexer/code/tree/repomap.js.map +1 -0
  198. package/dist/indexer/code/tree/repomap.test.d.ts +2 -0
  199. package/dist/indexer/code/tree/repomap.test.d.ts.map +1 -0
  200. package/dist/indexer/code/tree/repomap.test.js +93 -0
  201. package/dist/indexer/code/tree/repomap.test.js.map +1 -0
  202. package/dist/indexer/code/tree/stats.d.ts +26 -0
  203. package/dist/indexer/code/tree/stats.d.ts.map +1 -0
  204. package/dist/indexer/code/tree/stats.js +49 -0
  205. package/dist/indexer/code/tree/stats.js.map +1 -0
  206. package/dist/indexer/code/tree/types.d.ts +186 -0
  207. package/dist/indexer/code/tree/types.d.ts.map +1 -0
  208. package/dist/indexer/code/tree/types.js +10 -0
  209. package/dist/indexer/code/tree/types.js.map +1 -0
  210. package/dist/indexer/code/wal.d.ts +144 -0
  211. package/dist/indexer/code/wal.d.ts.map +1 -0
  212. package/dist/indexer/code/wal.js +283 -0
  213. package/dist/indexer/code/wal.js.map +1 -0
  214. package/dist/indexer/embeddings.d.ts +113 -0
  215. package/dist/indexer/embeddings.d.ts.map +1 -0
  216. package/dist/indexer/embeddings.js +477 -0
  217. package/dist/indexer/embeddings.js.map +1 -0
  218. package/dist/indexer/git-sync.d.ts +117 -0
  219. package/dist/indexer/git-sync.d.ts.map +1 -0
  220. package/dist/indexer/git-sync.js +398 -0
  221. package/dist/indexer/git-sync.js.map +1 -0
  222. package/dist/indexer/index.d.ts +175 -0
  223. package/dist/indexer/index.d.ts.map +1 -0
  224. package/dist/indexer/index.js +1096 -0
  225. package/dist/indexer/index.js.map +1 -0
  226. package/dist/indexer/mocks/mock-reranker.d.ts +12 -0
  227. package/dist/indexer/mocks/mock-reranker.d.ts.map +1 -0
  228. package/dist/indexer/mocks/mock-reranker.js +26 -0
  229. package/dist/indexer/mocks/mock-reranker.js.map +1 -0
  230. package/dist/indexer/parser.d.ts +8 -0
  231. package/dist/indexer/parser.d.ts.map +1 -0
  232. package/dist/indexer/parser.js +44 -0
  233. package/dist/indexer/parser.js.map +1 -0
  234. package/dist/indexer/parser.test.d.ts +2 -0
  235. package/dist/indexer/parser.test.d.ts.map +1 -0
  236. package/dist/indexer/parser.test.js +197 -0
  237. package/dist/indexer/parser.test.js.map +1 -0
  238. package/dist/indexer/reranking.d.ts +71 -0
  239. package/dist/indexer/reranking.d.ts.map +1 -0
  240. package/dist/indexer/reranking.integration.test.d.ts +2 -0
  241. package/dist/indexer/reranking.integration.test.d.ts.map +1 -0
  242. package/dist/indexer/reranking.integration.test.js +104 -0
  243. package/dist/indexer/reranking.integration.test.js.map +1 -0
  244. package/dist/indexer/reranking.js +256 -0
  245. package/dist/indexer/reranking.js.map +1 -0
  246. package/dist/indexer/reranking.test.d.ts +2 -0
  247. package/dist/indexer/reranking.test.d.ts.map +1 -0
  248. package/dist/indexer/reranking.test.js +130 -0
  249. package/dist/indexer/reranking.test.js.map +1 -0
  250. package/dist/indexer/wal/file-storage.d.ts +60 -0
  251. package/dist/indexer/wal/file-storage.d.ts.map +1 -0
  252. package/dist/indexer/wal/file-storage.js +277 -0
  253. package/dist/indexer/wal/file-storage.js.map +1 -0
  254. package/dist/indexer/wal/file-storage.test.d.ts +8 -0
  255. package/dist/indexer/wal/file-storage.test.d.ts.map +1 -0
  256. package/dist/indexer/wal/file-storage.test.js +444 -0
  257. package/dist/indexer/wal/file-storage.test.js.map +1 -0
  258. package/dist/indexer/wal/index.d.ts +41 -0
  259. package/dist/indexer/wal/index.d.ts.map +1 -0
  260. package/dist/indexer/wal/index.js +61 -0
  261. package/dist/indexer/wal/index.js.map +1 -0
  262. package/dist/indexer/wal/integration.test.d.ts +11 -0
  263. package/dist/indexer/wal/integration.test.d.ts.map +1 -0
  264. package/dist/indexer/wal/integration.test.js +378 -0
  265. package/dist/indexer/wal/integration.test.js.map +1 -0
  266. package/dist/indexer/wal/lancedb-storage.d.ts +72 -0
  267. package/dist/indexer/wal/lancedb-storage.d.ts.map +1 -0
  268. package/dist/indexer/wal/lancedb-storage.js +462 -0
  269. package/dist/indexer/wal/lancedb-storage.js.map +1 -0
  270. package/dist/indexer/wal/lancedb-storage.test.d.ts +8 -0
  271. package/dist/indexer/wal/lancedb-storage.test.d.ts.map +1 -0
  272. package/dist/indexer/wal/lancedb-storage.test.js +415 -0
  273. package/dist/indexer/wal/lancedb-storage.test.js.map +1 -0
  274. package/dist/indexer/wal/sync-wal.d.ts +144 -0
  275. package/dist/indexer/wal/sync-wal.d.ts.map +1 -0
  276. package/dist/indexer/wal/sync-wal.js +863 -0
  277. package/dist/indexer/wal/sync-wal.js.map +1 -0
  278. package/dist/indexer/wal/sync-wal.test.d.ts +8 -0
  279. package/dist/indexer/wal/sync-wal.test.d.ts.map +1 -0
  280. package/dist/indexer/wal/sync-wal.test.js +752 -0
  281. package/dist/indexer/wal/sync-wal.test.js.map +1 -0
  282. package/dist/indexer/wal/types.d.ts +167 -0
  283. package/dist/indexer/wal/types.d.ts.map +1 -0
  284. package/dist/indexer/wal/types.js +12 -0
  285. package/dist/indexer/wal/types.js.map +1 -0
  286. package/dist/indexer/watcher.d.ts +36 -0
  287. package/dist/indexer/watcher.d.ts.map +1 -0
  288. package/dist/indexer/watcher.js +110 -0
  289. package/dist/indexer/watcher.js.map +1 -0
  290. package/dist/search/explore.d.ts +62 -0
  291. package/dist/search/explore.d.ts.map +1 -0
  292. package/dist/search/explore.js +111 -0
  293. package/dist/search/explore.js.map +1 -0
  294. package/dist/search/fts.d.ts +23 -0
  295. package/dist/search/fts.d.ts.map +1 -0
  296. package/dist/search/fts.js +64 -0
  297. package/dist/search/fts.js.map +1 -0
  298. package/dist/search/fts.test.d.ts +2 -0
  299. package/dist/search/fts.test.d.ts.map +1 -0
  300. package/dist/search/fts.test.js +27 -0
  301. package/dist/search/fts.test.js.map +1 -0
  302. package/dist/search/grep.d.ts +75 -0
  303. package/dist/search/grep.d.ts.map +1 -0
  304. package/dist/search/grep.js +96 -0
  305. package/dist/search/grep.js.map +1 -0
  306. package/dist/search/grep.test.d.ts +2 -0
  307. package/dist/search/grep.test.d.ts.map +1 -0
  308. package/dist/search/grep.test.js +178 -0
  309. package/dist/search/grep.test.js.map +1 -0
  310. package/dist/search/hybrid-grep.d.ts +43 -0
  311. package/dist/search/hybrid-grep.d.ts.map +1 -0
  312. package/dist/search/hybrid-grep.js +130 -0
  313. package/dist/search/hybrid-grep.js.map +1 -0
  314. package/dist/search/hybrid-grep.test.d.ts +2 -0
  315. package/dist/search/hybrid-grep.test.d.ts.map +1 -0
  316. package/dist/search/hybrid-grep.test.js +133 -0
  317. package/dist/search/hybrid-grep.test.js.map +1 -0
  318. package/dist/search/rg-executor.d.ts +63 -0
  319. package/dist/search/rg-executor.d.ts.map +1 -0
  320. package/dist/search/rg-executor.js +146 -0
  321. package/dist/search/rg-executor.js.map +1 -0
  322. package/dist/search/rg-executor.test.d.ts +2 -0
  323. package/dist/search/rg-executor.test.d.ts.map +1 -0
  324. package/dist/search/rg-executor.test.js +104 -0
  325. package/dist/search/rg-executor.test.js.map +1 -0
  326. package/dist/search/rg-parser/extractor.d.ts +14 -0
  327. package/dist/search/rg-parser/extractor.d.ts.map +1 -0
  328. package/dist/search/rg-parser/extractor.js +82 -0
  329. package/dist/search/rg-parser/extractor.js.map +1 -0
  330. package/dist/search/rg-parser/extractor.test.d.ts +2 -0
  331. package/dist/search/rg-parser/extractor.test.d.ts.map +1 -0
  332. package/dist/search/rg-parser/extractor.test.js +35 -0
  333. package/dist/search/rg-parser/extractor.test.js.map +1 -0
  334. package/dist/search/rg-parser/fts-builder.d.ts +7 -0
  335. package/dist/search/rg-parser/fts-builder.d.ts.map +1 -0
  336. package/dist/search/rg-parser/fts-builder.js +18 -0
  337. package/dist/search/rg-parser/fts-builder.js.map +1 -0
  338. package/dist/search/rg-parser/fts-builder.test.d.ts +2 -0
  339. package/dist/search/rg-parser/fts-builder.test.d.ts.map +1 -0
  340. package/dist/search/rg-parser/fts-builder.test.js +26 -0
  341. package/dist/search/rg-parser/fts-builder.test.js.map +1 -0
  342. package/dist/search/rg-parser/index.d.ts +36 -0
  343. package/dist/search/rg-parser/index.d.ts.map +1 -0
  344. package/dist/search/rg-parser/index.js +83 -0
  345. package/dist/search/rg-parser/index.js.map +1 -0
  346. package/dist/search/rg-parser/index.test.d.ts +2 -0
  347. package/dist/search/rg-parser/index.test.d.ts.map +1 -0
  348. package/dist/search/rg-parser/index.test.js +34 -0
  349. package/dist/search/rg-parser/index.test.js.map +1 -0
  350. package/dist/search/rg-parser/strategy.d.ts +14 -0
  351. package/dist/search/rg-parser/strategy.d.ts.map +1 -0
  352. package/dist/search/rg-parser/strategy.js +31 -0
  353. package/dist/search/rg-parser/strategy.js.map +1 -0
  354. package/dist/search/rg-parser/strategy.test.d.ts +2 -0
  355. package/dist/search/rg-parser/strategy.test.d.ts.map +1 -0
  356. package/dist/search/rg-parser/strategy.test.js +29 -0
  357. package/dist/search/rg-parser/strategy.test.js.map +1 -0
  358. package/dist/types.d.ts +345 -0
  359. package/dist/types.d.ts.map +1 -0
  360. package/dist/types.js +7 -0
  361. package/dist/types.js.map +1 -0
  362. package/dist/utils/vault.d.ts +84 -0
  363. package/dist/utils/vault.d.ts.map +1 -0
  364. package/dist/utils/vault.js +138 -0
  365. package/dist/utils/vault.js.map +1 -0
  366. package/dist/utils/vault.test.d.ts +2 -0
  367. package/dist/utils/vault.test.d.ts.map +1 -0
  368. package/dist/utils/vault.test.js +153 -0
  369. package/dist/utils/vault.test.js.map +1 -0
  370. package/package.json +69 -0
@@ -0,0 +1,2070 @@
1
+ // CodeIndexer - main code indexing class
2
+ // Refactored for DI: accepts config and embedding provider via constructor
3
+ import * as lancedb from '@lancedb/lancedb';
4
+ import { BooleanQuery, MatchQuery, Occur, PhraseQuery } from '@lancedb/lancedb';
5
+ import { glob } from 'glob';
6
+ import fs from 'node:fs/promises';
7
+ import { existsSync, mkdirSync, rmSync, statSync, readFileSync } from 'node:fs';
8
+ import path from 'node:path';
9
+ import crypto from 'node:crypto';
10
+ import _ignore from 'ignore';
11
+ const ignore = _ignore.default || _ignore;
12
+ import { minimatch } from 'minimatch';
13
+ import { DEFAULT_RERANKING_CONFIG, } from '../../config/types.js';
14
+ import { createTreeSitterParser } from './parser.js';
15
+ import { applyStableChunkIds, symbolsToChunks, createFileChunk, buildEmbeddingText, generateContentHash, } from './chunker.js';
16
+ import { languageRegistry } from './languages/index.js';
17
+ import { createError, isError } from '../../types.js';
18
+ import { CodeIndexWal } from './wal.js';
19
+ import { withRetry } from './retry.js';
20
+ import { GraphIndexer } from './graph/index.js';
21
+ import { buildPathPrefixFilter, combineFilters } from '../../search/fts.js';
22
+ import { TreeIndexer } from './tree/index.js';
23
+ // Import language extractors to register them
24
+ import './languages/typescript.js';
25
+ import './languages/css.js';
26
+ import './languages/graphql.js';
27
+ import './languages/json.js';
28
+ import './languages/yaml.js';
29
+ import './languages/html.js';
30
+ import './languages/bash.js';
31
+ // Lock timeout for concurrent indexing protection
32
+ const LOCK_TIMEOUT_MS = 60_000; // 1 minute
33
+ // Default retry options for embedding calls
34
+ const DEFAULT_EMBEDDING_RETRY = {
35
+ maxRetries: 3,
36
+ baseDelayMs: 1000,
37
+ maxDelayMs: 30000,
38
+ retryableErrors: [
39
+ '429',
40
+ 'rate limit',
41
+ 'Rate limit',
42
+ 'RATE_LIMIT',
43
+ '5',
44
+ 'ECONNRESET',
45
+ 'ETIMEDOUT',
46
+ 'timeout',
47
+ 'temporarily unavailable',
48
+ 'service unavailable',
49
+ ],
50
+ };
51
+ export function normalizeIdentifiers(content) {
52
+ return content
53
+ .replace(/([a-z0-9])([A-Z])/g, '$1 $2')
54
+ .replace(/[_-]+/g, ' ')
55
+ .replace(/\s+/g, ' ')
56
+ .trim();
57
+ }
58
+ /**
59
+ * Convert CodeChunk (camelCase) to LanceDB record (snake_case).
60
+ * Required because LanceDB SQL doesn't support camelCase column names.
61
+ */
62
+ function toChunkRecord(chunk, vector, normalizeContent) {
63
+ const normalized = normalizeContent ? normalizeIdentifiers(chunk.content) : chunk.content;
64
+ return {
65
+ id: chunk.id,
66
+ vector,
67
+ path: chunk.path,
68
+ language: chunk.language,
69
+ symbol_type: chunk.symbolType,
70
+ symbol_name: chunk.symbolName,
71
+ signature: chunk.signature,
72
+ parent_symbol: chunk.parentSymbol,
73
+ scope: JSON.stringify(chunk.scope),
74
+ content: chunk.content,
75
+ content_normalized: normalized,
76
+ start_line: chunk.startLine,
77
+ end_line: chunk.endLine,
78
+ docstring: chunk.docstring,
79
+ modified: chunk.modified,
80
+ content_hash: chunk.contentHash,
81
+ };
82
+ }
83
+ /**
84
+ * Convert LanceDB record (snake_case) to CodeSearchResult (camelCase).
85
+ * Required because LanceDB SQL doesn't support camelCase column names.
86
+ */
87
+ function fromChunkRecord(r) {
88
+ return {
89
+ id: r.id,
90
+ path: r.path,
91
+ language: r.language,
92
+ symbolType: r.symbol_type,
93
+ symbolName: r.symbol_name,
94
+ signature: r.signature,
95
+ parentSymbol: r.parent_symbol,
96
+ scope: JSON.parse(r.scope || '[]'),
97
+ content: r.content,
98
+ startLine: r.start_line,
99
+ endLine: r.end_line,
100
+ docstring: r.docstring,
101
+ modified: r.modified,
102
+ contentHash: r.content_hash,
103
+ score: (r._distance !== undefined) ? 1 - r._distance / 2 : 1.0,
104
+ };
105
+ }
106
+ // Default include patterns
107
+ const DEFAULT_INCLUDE = [
108
+ '**/*.ts',
109
+ '**/*.tsx',
110
+ '**/*.js',
111
+ '**/*.jsx',
112
+ '**/*.mjs',
113
+ '**/*.cjs',
114
+ '**/*.css',
115
+ '**/*.graphql',
116
+ '**/*.gql',
117
+ '**/*.graphqls',
118
+ '**/*.json',
119
+ '**/*.yaml',
120
+ '**/*.yml',
121
+ '**/*.html',
122
+ '**/*.htm',
123
+ '**/*.sh',
124
+ '**/*.bash',
125
+ '**/*.zsh',
126
+ ];
127
+ // Default exclude patterns
128
+ const DEFAULT_EXCLUDE = [
129
+ '**/node_modules/**',
130
+ '**/dist/**',
131
+ '**/build/**',
132
+ '**/out/**',
133
+ '**/.git/**',
134
+ '**/.next/**',
135
+ '**/.nuxt/**',
136
+ '**/.svelte-kit/**',
137
+ '**/coverage/**',
138
+ '**/__pycache__/**',
139
+ '**/*.min.js',
140
+ '**/*.bundle.js',
141
+ '**/*.chunk.js',
142
+ '**/vendor/**',
143
+ '**/third_party/**',
144
+ '**/*.d.ts', // Declaration files excluded by default
145
+ ];
146
+ // Valid languages for filter sanitization
147
+ const VALID_LANGUAGES = new Set([
148
+ 'typescript',
149
+ 'tsx',
150
+ 'javascript',
151
+ 'jsx',
152
+ 'css',
153
+ 'graphql',
154
+ 'json',
155
+ 'yaml',
156
+ 'html',
157
+ 'bash',
158
+ ]);
159
+ // Valid symbol types for filter sanitization
160
+ const VALID_SYMBOL_TYPES = new Set([
161
+ 'function',
162
+ 'class',
163
+ 'method',
164
+ 'interface',
165
+ 'type',
166
+ 'variable',
167
+ 'comment',
168
+ 'file',
169
+ ]);
170
+ /**
171
+ * Sanitize identifier for SQL WHERE clause.
172
+ */
173
+ function sanitizeIdentifier(value) {
174
+ if (!/^[a-zA-Z0-9_-]+$/.test(value)) {
175
+ throw new Error(`Invalid identifier: ${value}`);
176
+ }
177
+ return value;
178
+ }
179
+ /**
180
+ * Sanitize path for SQL WHERE clause.
181
+ */
182
+ function sanitizePath(value) {
183
+ return value.replace(/'/g, "''");
184
+ }
185
+ /**
186
+ * Execute function with index lock to prevent concurrent indexing.
187
+ */
188
+ async function withIndexLock(lockDir, fn) {
189
+ const lockPath = path.join(lockDir, 'code_index.lock');
190
+ // Atomic lock acquisition via mkdir (throws EEXIST if already locked)
191
+ try {
192
+ mkdirSync(lockPath);
193
+ }
194
+ catch (err) {
195
+ if (err.code === 'EEXIST') {
196
+ // Lock exists - check if stale
197
+ try {
198
+ const stat = statSync(lockPath);
199
+ if (Date.now() - stat.mtimeMs > LOCK_TIMEOUT_MS) {
200
+ // Stale lock - remove and retry once
201
+ rmSync(lockPath, { recursive: true });
202
+ mkdirSync(lockPath);
203
+ }
204
+ else {
205
+ throw new Error('Code index is locked by another process');
206
+ }
207
+ }
208
+ catch (statErr) {
209
+ // Lock was removed between EEXIST and stat - retry
210
+ if (statErr.code === 'ENOENT') {
211
+ mkdirSync(lockPath);
212
+ }
213
+ else {
214
+ throw statErr;
215
+ }
216
+ }
217
+ }
218
+ else {
219
+ throw err;
220
+ }
221
+ }
222
+ try {
223
+ return await fn();
224
+ }
225
+ finally {
226
+ rmSync(lockPath, { recursive: true, force: true });
227
+ }
228
+ }
229
+ /**
230
+ * CodeIndexer - indexes code files using Tree-sitter and LanceDB.
231
+ */
232
+ export class CodeIndexer {
233
+ db = null;
234
+ chunksTable = null;
235
+ fileIndexTable = null;
236
+ initialized = false;
237
+ rootPath;
238
+ dbPath;
239
+ localStateDir;
240
+ metaPath;
241
+ gitignore = null;
242
+ wal;
243
+ walEnabled;
244
+ retryOptions;
245
+ parser;
246
+ graphIndexer = null;
247
+ treeIndexer = null;
248
+ // Injected dependencies
249
+ embeddingProvider;
250
+ rerankerProvider;
251
+ embeddingConfig;
252
+ rerankingConfig;
253
+ codeConfig;
254
+ codeFtsConfig;
255
+ lancedbStorageOptions;
256
+ // Unified SyncWal (optional - when provided, uses unified recovery)
257
+ syncWal = null;
258
+ constructor(rootPath, dbPath, deps, options = {}) {
259
+ this.rootPath = path.resolve(rootPath);
260
+ this.dbPath = dbPath;
261
+ this.localStateDir = options.localStateDir ?? dbPath;
262
+ this.metaPath = path.join(this.localStateDir, 'code_meta.json');
263
+ this.wal = new CodeIndexWal(this.localStateDir);
264
+ this.walEnabled = options.walEnabled ?? deps.codeConfig.wal?.enabled ?? true;
265
+ this.retryOptions = {
266
+ ...DEFAULT_EMBEDDING_RETRY,
267
+ ...deps.codeConfig.retry,
268
+ ...options.retryOptions,
269
+ };
270
+ this.parser = createTreeSitterParser();
271
+ if (deps.codeConfig.graph.enabled) {
272
+ this.graphIndexer = new GraphIndexer({
273
+ rootPath: this.rootPath,
274
+ dbPath: this.dbPath,
275
+ codeConfig: deps.codeConfig,
276
+ parser: this.parser,
277
+ storageOptions: deps.lancedbStorageOptions,
278
+ });
279
+ }
280
+ if (deps.codeConfig.tree?.enabled) {
281
+ this.treeIndexer = new TreeIndexer(this.rootPath, this.dbPath, deps.embeddingProvider, deps.embeddingProvider.getDimensions(), deps.codeConfig.tree, {
282
+ localStateDir: this.localStateDir,
283
+ lancedbStorageOptions: deps.lancedbStorageOptions,
284
+ });
285
+ }
286
+ // Store injected dependencies
287
+ this.embeddingProvider = deps.embeddingProvider;
288
+ this.rerankerProvider = deps.rerankerProvider ?? null;
289
+ this.embeddingConfig = deps.embeddingConfig;
290
+ this.rerankingConfig = deps.rerankingConfig ?? DEFAULT_RERANKING_CONFIG;
291
+ this.codeConfig = deps.codeConfig;
292
+ this.codeFtsConfig = deps.codeFtsConfig;
293
+ this.lancedbStorageOptions = deps.lancedbStorageOptions;
294
+ // Store unified SyncWal if provided
295
+ this.syncWal = deps.syncWal || null;
296
+ }
297
+ async loadMeta() {
298
+ try {
299
+ const content = await fs.readFile(this.metaPath, 'utf-8');
300
+ return JSON.parse(content);
301
+ }
302
+ catch {
303
+ return null;
304
+ }
305
+ }
306
+ async saveMeta() {
307
+ const modelName = this.embeddingProvider.getModelName();
308
+ const meta = {
309
+ model: modelName,
310
+ provider: this.embeddingConfig.provider,
311
+ dimensions: this.embeddingProvider.getDimensions(),
312
+ createdAt: Date.now(),
313
+ };
314
+ await fs.mkdir(path.dirname(this.metaPath), { recursive: true });
315
+ await fs.writeFile(this.metaPath, JSON.stringify(meta, null, 2));
316
+ }
317
+ /**
318
+ * Create file filter based on include/exclude patterns and .gitignore.
319
+ */
320
+ createFileFilter(options) {
321
+ const include = options.include || this.codeConfig.include || DEFAULT_INCLUDE;
322
+ const exclude = options.exclude || this.codeConfig.exclude || DEFAULT_EXCLUDE;
323
+ const maxFileSize = options.maxFileSize || this.codeConfig.maxFileSize || 1024 * 1024;
324
+ // Load .gitignore (always respected)
325
+ if (!this.gitignore) {
326
+ const ig = ignore();
327
+ const gitignorePath = path.join(this.rootPath, '.gitignore');
328
+ if (existsSync(gitignorePath)) {
329
+ try {
330
+ const content = readFileSync(gitignorePath, 'utf8');
331
+ ig.add(content);
332
+ }
333
+ catch {
334
+ // Ignore errors reading .gitignore
335
+ }
336
+ }
337
+ this.gitignore = ig;
338
+ }
339
+ return (filePath) => {
340
+ const relativePath = path.relative(this.rootPath, filePath);
341
+ // 1. Must match at least one include pattern
342
+ const included = include.some((p) => minimatch(relativePath, p));
343
+ if (!included) {
344
+ return false;
345
+ }
346
+ // 2. Must not match any exclude pattern
347
+ if (exclude.some((p) => minimatch(relativePath, p))) {
348
+ return false;
349
+ }
350
+ // 3. Must not be in .gitignore
351
+ if (this.gitignore && this.gitignore.ignores(relativePath)) {
352
+ return false;
353
+ }
354
+ // 4. File size check is done at read time
355
+ return true;
356
+ };
357
+ }
358
+ /**
359
+ * Compute manifest hash from sorted file paths for recovery validation.
360
+ * Both notes and code use the same hash algorithm.
361
+ */
362
+ computeManifestHash(files) {
363
+ // Sort deterministically by path
364
+ const sortedPaths = files.map(f => path.relative(this.rootPath, f)).sort();
365
+ const content = sortedPaths.join('\n');
366
+ return crypto.createHash('sha256').update(content).digest('hex');
367
+ }
368
+ /**
369
+ * Initialize the indexer - connect to LanceDB and initialize parser.
370
+ */
371
+ async initialize() {
372
+ if (this.initialized)
373
+ return;
374
+ // Ensure LanceDB directory exists
375
+ const isRemoteDbPath = /^[a-z][a-z0-9+.-]*:\/\//i.test(this.dbPath);
376
+ if (!isRemoteDbPath) {
377
+ await fs.mkdir(this.dbPath, { recursive: true });
378
+ }
379
+ // Connect to LanceDB
380
+ this.db = await lancedb.connect(this.dbPath, {
381
+ storageOptions: this.lancedbStorageOptions,
382
+ });
383
+ const tableNames = await this.db.tableNames();
384
+ // Check if embedding config changed
385
+ const meta = await this.loadMeta();
386
+ const currentModel = this.embeddingProvider.getModelName();
387
+ const currentDimensions = this.embeddingProvider.getDimensions();
388
+ const needsRecreate = meta && (meta.model !== currentModel ||
389
+ meta.provider !== this.embeddingConfig.provider ||
390
+ meta.dimensions !== currentDimensions);
391
+ if (needsRecreate) {
392
+ console.warn('[code-indexer] Embedding config changed, recreating index...');
393
+ console.warn(` Previous: model=${meta.model}, provider=${meta.provider}, dimensions=${meta.dimensions}`);
394
+ console.warn(` Current: model=${currentModel}, provider=${this.embeddingConfig.provider}, dimensions=${currentDimensions}`);
395
+ // Drop tables if they exist
396
+ if (tableNames.includes('code_chunks')) {
397
+ await this.db.dropTable('code_chunks');
398
+ }
399
+ if (tableNames.includes('code_file_index')) {
400
+ await this.db.dropTable('code_file_index');
401
+ }
402
+ this.chunksTable = null;
403
+ this.fileIndexTable = null;
404
+ }
405
+ // Save current meta
406
+ await this.saveMeta();
407
+ // Open existing tables if they exist (and weren't dropped)
408
+ if (!this.chunksTable && (await this.db.tableNames()).includes('code_chunks')) {
409
+ this.chunksTable = await this.db.openTable('code_chunks');
410
+ }
411
+ if (!this.fileIndexTable && (await this.db.tableNames()).includes('code_file_index')) {
412
+ this.fileIndexTable = await this.db.openTable('code_file_index');
413
+ }
414
+ // Initialize Tree-sitter parser
415
+ await this.parser.initialize();
416
+ if (this.graphIndexer) {
417
+ await this.graphIndexer.initialize();
418
+ }
419
+ this.initialized = true;
420
+ // Auto-create all tables at startup
421
+ await this.ensureAllTables();
422
+ }
423
+ /**
424
+ * Ensure all code index tables exist with proper schema.
425
+ * Safe to call multiple times (idempotent).
426
+ */
427
+ async ensureAllTables() {
428
+ if (!this.initialized) {
429
+ await this.initialize();
430
+ return; // initialize() already called ensureAllTables()
431
+ }
432
+ await this.ensureChunksTable();
433
+ await this.ensureFileIndexTable();
434
+ if (this.graphIndexer) {
435
+ await this.graphIndexer.ensureAllTables();
436
+ }
437
+ if (this.treeIndexer) {
438
+ await this.treeIndexer.ensureAllTables();
439
+ }
440
+ }
441
+ /**
442
+ * Ensure code_chunks table exists.
443
+ */
444
+ async ensureChunksTable() {
445
+ if (this.chunksTable)
446
+ return this.chunksTable;
447
+ const dimensions = this.embeddingProvider.getDimensions();
448
+ // Create with sample data for schema inference (snake_case for SQL compatibility)
449
+ const sampleData = [
450
+ {
451
+ id: '__init__',
452
+ vector: new Array(dimensions).fill(0),
453
+ path: '',
454
+ language: '',
455
+ symbol_type: 'function',
456
+ symbol_name: '',
457
+ signature: '',
458
+ parent_symbol: '',
459
+ scope: '[]', // JSON array
460
+ content: '',
461
+ content_normalized: '',
462
+ start_line: 0,
463
+ end_line: 0,
464
+ docstring: '',
465
+ modified: 0,
466
+ content_hash: '',
467
+ },
468
+ ];
469
+ this.chunksTable = await this.db.createTable('code_chunks', sampleData, {
470
+ mode: 'overwrite',
471
+ });
472
+ // Delete the sample row
473
+ await this.chunksTable.delete("id = '__init__'");
474
+ return this.chunksTable;
475
+ }
476
+ /**
477
+ * Ensure code_file_index table exists.
478
+ */
479
+ async ensureFileIndexTable() {
480
+ if (this.fileIndexTable)
481
+ return this.fileIndexTable;
482
+ const sampleData = [
483
+ {
484
+ path: '__init__',
485
+ mtime: 0,
486
+ content_hash: '',
487
+ chunk_ids: '[]',
488
+ language: '',
489
+ indexed_at: 0,
490
+ },
491
+ ];
492
+ this.fileIndexTable = await this.db.createTable('code_file_index', sampleData, { mode: 'overwrite' });
493
+ // Delete the sample row
494
+ await this.fileIndexTable.delete("path = '__init__'");
495
+ return this.fileIndexTable;
496
+ }
497
+ getFtsColumn() {
498
+ return this.codeFtsConfig.normalizeIdentifiers ? 'content_normalized' : 'content';
499
+ }
500
+ async createOrUpdateFtsIndex() {
501
+ if (!this.codeFtsConfig.enabled) {
502
+ return;
503
+ }
504
+ const table = await this.ensureChunksTable();
505
+ const count = await table.countRows();
506
+ if (count === 0) {
507
+ return;
508
+ }
509
+ const columnToIndex = this.getFtsColumn();
510
+ const indexName = `${columnToIndex}_idx`;
511
+ const staleIndexName = `${columnToIndex === 'content' ? 'content_normalized' : 'content'}_idx`;
512
+ try {
513
+ const indices = await table.listIndices();
514
+ if (indices.some((index) => index.name === staleIndexName)) {
515
+ await table.dropIndex(staleIndexName);
516
+ }
517
+ }
518
+ catch (err) {
519
+ console.warn('[code-indexer] Could not inspect/drop stale FTS index:', err.message);
520
+ }
521
+ console.log(`[code-indexer] Creating/updating FTS index on ${columnToIndex} (${count} chunks)...`);
522
+ try {
523
+ await table.createIndex(columnToIndex, {
524
+ config: lancedb.Index.fts({
525
+ withPosition: this.codeFtsConfig.withPosition,
526
+ baseTokenizer: this.codeFtsConfig.baseTokenizer,
527
+ stem: this.codeFtsConfig.stem,
528
+ removeStopWords: this.codeFtsConfig.removeStopWords,
529
+ ngramMinLength: this.codeFtsConfig.ngramMinLength,
530
+ ngramMaxLength: this.codeFtsConfig.ngramMaxLength,
531
+ prefixOnly: this.codeFtsConfig.prefixOnly,
532
+ }),
533
+ replace: true,
534
+ });
535
+ await table.waitForIndex([indexName], 60);
536
+ console.log('[code-indexer] FTS index created successfully');
537
+ }
538
+ catch (err) {
539
+ console.warn('[code-indexer] Could not create FTS index:', err.message);
540
+ }
541
+ }
542
+ normalizeSearchScores(results) {
543
+ if (results.length === 0)
544
+ return [];
545
+ const values = results.map((r) => r.score);
546
+ const min = Math.min(...values);
547
+ const max = Math.max(...values);
548
+ if (max === min)
549
+ return values.map(() => 1);
550
+ return values.map((v) => (v - min) / (max - min));
551
+ }
552
+ /**
553
+ * Get file record from index.
554
+ */
555
+ async getFileRecord(filePath) {
556
+ const table = await this.ensureFileIndexTable();
557
+ const results = await table
558
+ .query()
559
+ .where(`path = '${sanitizePath(filePath)}'`)
560
+ .toArray();
561
+ if (results.length === 0)
562
+ return null;
563
+ const r = results[0];
564
+ return {
565
+ path: r.path,
566
+ mtime: r.mtime,
567
+ content_hash: r.content_hash,
568
+ chunk_ids: r.chunk_ids,
569
+ language: r.language,
570
+ indexed_at: r.indexed_at,
571
+ };
572
+ }
573
+ getTreeDeps() {
574
+ return {
575
+ getSymbols: async (filePath) => this.getSymbolsForTree(filePath),
576
+ getFileRecord: async (filePath) => this.getCodeFileRecordForTree(filePath),
577
+ };
578
+ }
579
+ async getCodeFileRecordForTree(filePath) {
580
+ const record = await this.getFileRecord(filePath);
581
+ if (!record)
582
+ return null;
583
+ return {
584
+ mtime: record.mtime,
585
+ content_hash: record.content_hash,
586
+ language: record.language,
587
+ };
588
+ }
589
+ async getSymbolsForTree(filePath) {
590
+ const table = await this.ensureChunksTable();
591
+ const rows = await table
592
+ .query()
593
+ .where(`path = '${sanitizePath(filePath)}'`)
594
+ .toArray();
595
+ return rows.map((row) => ({
596
+ id: row.id,
597
+ path: row.path,
598
+ language: row.language,
599
+ symbolType: row.symbol_type,
600
+ symbolName: row.symbol_name,
601
+ signature: row.signature || null,
602
+ parentSymbol: row.parent_symbol || null,
603
+ scope: JSON.parse(row.scope || '[]'),
604
+ content: row.content,
605
+ startLine: row.start_line,
606
+ endLine: row.end_line,
607
+ docstring: row.docstring || null,
608
+ modified: row.modified,
609
+ contentHash: row.content_hash,
610
+ }));
611
+ }
612
+ /**
613
+ * Get all indexed file paths.
614
+ */
615
+ async getAllIndexedPaths() {
616
+ const table = await this.ensureFileIndexTable();
617
+ const results = await table.query().select(['path']).toArray();
618
+ return new Set(results.map((r) => r.path));
619
+ }
620
+ /**
621
+ * Update file index record.
622
+ */
623
+ async updateFileIndex(filePath, mtime, contentHash, chunkIds, language) {
624
+ const table = await this.ensureFileIndexTable();
625
+ // Delete existing record if any
626
+ await table.delete(`path = '${sanitizePath(filePath)}'`);
627
+ // Insert new record
628
+ await table.add([
629
+ {
630
+ path: filePath,
631
+ mtime,
632
+ content_hash: contentHash,
633
+ chunk_ids: JSON.stringify(chunkIds),
634
+ language,
635
+ indexed_at: Date.now(),
636
+ },
637
+ ]);
638
+ }
639
+ /**
640
+ * Remove file from index.
641
+ */
642
+ async removeFromFileIndex(filePath) {
643
+ const table = await this.ensureFileIndexTable();
644
+ await table.delete(`path = '${sanitizePath(filePath)}'`);
645
+ }
646
+ /**
647
+ * Delete chunks by IDs.
648
+ */
649
+ async deleteChunks(chunkIds) {
650
+ if (chunkIds.length === 0)
651
+ return;
652
+ const table = await this.ensureChunksTable();
653
+ for (const id of chunkIds) {
654
+ await table.delete(`id = '${id}'`);
655
+ }
656
+ }
657
+ /**
658
+ * Delete all chunks for a file path.
659
+ */
660
+ async deleteChunksForPath(filePath) {
661
+ const table = await this.ensureChunksTable();
662
+ await table.delete(`path = '${sanitizePath(filePath)}'`);
663
+ }
664
+ /**
665
+ * Index a single code chunk with retry logic.
666
+ */
667
+ async indexChunk(chunk) {
668
+ const table = await this.ensureChunksTable();
669
+ // Build embedding text
670
+ const embeddingText = buildEmbeddingText(chunk);
671
+ // Get embedding with retry
672
+ let embedding;
673
+ try {
674
+ embedding = await withRetry(async () => {
675
+ const result = await this.embeddingProvider.getEmbedding(embeddingText, 'document');
676
+ if (isError(result)) {
677
+ throw new Error(result.message);
678
+ }
679
+ return result;
680
+ }, this.retryOptions);
681
+ }
682
+ catch (err) {
683
+ const error = err instanceof Error ? err : new Error(String(err));
684
+ return createError('EMBEDDING_ERROR', error.message);
685
+ }
686
+ // Check if chunk exists
687
+ const existing = await table
688
+ .query()
689
+ .where(`id = '${chunk.id}'`)
690
+ .toArray();
691
+ if (existing.length > 0 && existing[0].content_hash === chunk.contentHash) {
692
+ return chunk.id; // No changes needed
693
+ }
694
+ // Delete existing if present
695
+ if (existing.length > 0) {
696
+ await table.delete(`id = '${chunk.id}'`);
697
+ }
698
+ // Insert new chunk using snake_case for SQL compatibility
699
+ const record = toChunkRecord(chunk, embedding, this.codeFtsConfig.normalizeIdentifiers);
700
+ await table.add([record]);
701
+ return chunk.id;
702
+ }
703
+ /**
704
+ * Index multiple chunks in batch - much faster than indexChunk() one by one.
705
+ * Uses batch embedding for efficiency with retry logic.
706
+ *
707
+ * @param chunks - Array of chunks to index
708
+ * @param batchSize - Embedding batch size (default: 32)
709
+ * @param onEmbeddingProgress - Optional callback for embedding progress
710
+ * @returns Array of indexed chunk IDs and any errors
711
+ */
712
+ async indexChunksBatch(chunks, batchSize = 32, onEmbeddingProgress) {
713
+ if (chunks.length === 0) {
714
+ return { chunkIds: [], errors: [] };
715
+ }
716
+ const table = await this.ensureChunksTable();
717
+ const chunkIds = [];
718
+ const errors = [];
719
+ // Build embedding texts for all chunks
720
+ const embeddingTexts = chunks.map((chunk) => buildEmbeddingText(chunk));
721
+ // Get all embeddings in batch with retry logic
722
+ let embeddings;
723
+ try {
724
+ embeddings = await withRetry(async () => {
725
+ const result = await this.embeddingProvider.getEmbeddingsBatch(embeddingTexts, batchSize, 'document', onEmbeddingProgress);
726
+ if (isError(result)) {
727
+ throw new Error(result.message);
728
+ }
729
+ return result;
730
+ }, this.retryOptions);
731
+ }
732
+ catch (err) {
733
+ const error = err instanceof Error ? err : new Error(String(err));
734
+ return { chunkIds: [], errors: [createError('EMBEDDING_ERROR', error.message)] };
735
+ }
736
+ // Build records for batch insert (snake_case for SQL compatibility)
737
+ const records = [];
738
+ for (let i = 0; i < chunks.length; i++) {
739
+ const chunk = chunks[i];
740
+ const embedding = embeddings[i];
741
+ records.push(toChunkRecord(chunk, embedding, this.codeFtsConfig.normalizeIdentifiers));
742
+ chunkIds.push(chunk.id);
743
+ }
744
+ // Batch insert all records
745
+ if (records.length > 0) {
746
+ await table.add(records);
747
+ }
748
+ return { chunkIds, errors };
749
+ }
750
+ /**
751
+ * Parse a single file and return chunks without indexing.
752
+ * Used for batch processing.
753
+ */
754
+ async parseFile(filePath, options = {}) {
755
+ const absolutePath = path.isAbsolute(filePath)
756
+ ? filePath
757
+ : path.join(this.rootPath, filePath);
758
+ const relativePath = path.relative(this.rootPath, absolutePath);
759
+ try {
760
+ // Check file size
761
+ const stat = await fs.stat(absolutePath);
762
+ const maxSize = options.maxFileSize || this.codeConfig.maxFileSize || 1024 * 1024;
763
+ if (stat.size > maxSize) {
764
+ return createError('FILE_TOO_LARGE', `File exceeds max size: ${relativePath}`);
765
+ }
766
+ // Read file content
767
+ const content = await fs.readFile(absolutePath, 'utf-8');
768
+ const fileHash = generateContentHash(content);
769
+ // Detect language
770
+ const langKey = this.parser.getLanguageForFile(absolutePath);
771
+ if (!langKey) {
772
+ return createError('UNKNOWN_LANGUAGE', `Unknown file type: ${relativePath}`);
773
+ }
774
+ // Get extractor
775
+ const extractor = languageRegistry.getExtractor(langKey);
776
+ if (!extractor) {
777
+ return createError('NO_EXTRACTOR', `No extractor for language: ${langKey}`);
778
+ }
779
+ // Parse file
780
+ const tree = await this.parser.parse(content, langKey);
781
+ try {
782
+ // Extract symbols
783
+ const symbolsWithDetails = extractor.extractSymbols(tree, content, relativePath);
784
+ const symbols = symbolsWithDetails.map((s) => ({
785
+ name: s.name,
786
+ type: s.type,
787
+ startLine: s.startLine,
788
+ endLine: s.endLine,
789
+ node: s.node,
790
+ }));
791
+ // Convert to chunks
792
+ const minLines = options.minLines || this.codeConfig.chunking?.minLines || 3;
793
+ const maxLines = options.maxLines || this.codeConfig.chunking?.maxLines || 100;
794
+ const chunks = symbolsToChunks(symbols, relativePath, content, langKey, stat.mtimeMs, { minLines, maxLines });
795
+ // Add metadata from extractor
796
+ // Match by symbolName + startLine since symbolsToChunks filters by minLines
797
+ const detailsMap = new Map(symbolsWithDetails.map(d => [`${d.name}:${d.startLine}`, d]));
798
+ for (const chunk of chunks) {
799
+ if (chunk.symbolType === 'file')
800
+ continue; // Skip file chunks
801
+ const details = detailsMap.get(`${chunk.symbolName}:${chunk.startLine}`);
802
+ if (details) {
803
+ chunk.signature = details.signature;
804
+ chunk.docstring = details.docstring;
805
+ chunk.parentSymbol = details.parentSymbol;
806
+ chunk.scope = details.scope;
807
+ }
808
+ }
809
+ applyStableChunkIds(chunks);
810
+ // Add file-level chunk
811
+ const includeFileChunks = options.includeFileChunks ?? this.codeConfig.chunking?.includeFileChunks ?? true;
812
+ if (includeFileChunks) {
813
+ const fileChunk = createFileChunk(relativePath, content, langKey, stat.mtimeMs);
814
+ chunks.unshift(fileChunk);
815
+ }
816
+ return { chunks, fileHash, mtime: stat.mtimeMs, language: langKey };
817
+ }
818
+ finally {
819
+ this.parser.deleteTree(tree);
820
+ }
821
+ }
822
+ catch (err) {
823
+ return createError('PARSE_FILE_ERROR', `Failed to parse ${relativePath}`, {
824
+ error: err.message,
825
+ });
826
+ }
827
+ }
828
+ /**
829
+ * Index a single file with WAL support.
830
+ */
831
+ async indexFile(filePath, options = {}, batchId) {
832
+ await this.initialize();
833
+ const absolutePath = path.isAbsolute(filePath)
834
+ ? filePath
835
+ : path.join(this.rootPath, filePath);
836
+ const relativePath = path.relative(this.rootPath, absolutePath);
837
+ const chunkIds = [];
838
+ const errors = [];
839
+ // Write WAL start entry
840
+ if (this.walEnabled) {
841
+ await this.wal.start(relativePath, batchId);
842
+ }
843
+ try {
844
+ // Check file size
845
+ const stat = await fs.stat(absolutePath);
846
+ const maxSize = options.maxFileSize || this.codeConfig.maxFileSize || 1024 * 1024;
847
+ if (stat.size > maxSize) {
848
+ const err = createError('FILE_TOO_LARGE', `File exceeds max size: ${relativePath}`);
849
+ if (this.walEnabled) {
850
+ const retryCount = await this.wal.getRetryCount(relativePath);
851
+ await this.wal.fail(relativePath, err.message, retryCount + 1);
852
+ }
853
+ return { chunkIds, errors: [err] };
854
+ }
855
+ // Read file content
856
+ const content = await fs.readFile(absolutePath, 'utf-8');
857
+ const fileHash = generateContentHash(content);
858
+ // Detect language
859
+ const langKey = this.parser.getLanguageForFile(absolutePath);
860
+ if (!langKey) {
861
+ const err = createError('UNKNOWN_LANGUAGE', `Unknown file type: ${relativePath}`);
862
+ if (this.walEnabled) {
863
+ const retryCount = await this.wal.getRetryCount(relativePath);
864
+ await this.wal.fail(relativePath, err.message, retryCount + 1);
865
+ }
866
+ return { chunkIds, errors: [err] };
867
+ }
868
+ // Get extractor for this language
869
+ const extractor = languageRegistry.getExtractor(langKey);
870
+ if (!extractor) {
871
+ const err = createError('NO_EXTRACTOR', `No extractor for language: ${langKey}`);
872
+ if (this.walEnabled) {
873
+ const retryCount = await this.wal.getRetryCount(relativePath);
874
+ await this.wal.fail(relativePath, err.message, retryCount + 1);
875
+ }
876
+ return { chunkIds, errors: [err] };
877
+ }
878
+ // Parse file
879
+ const tree = await this.parser.parse(content, langKey);
880
+ try {
881
+ // Extract symbols using language-specific extractor
882
+ const symbolsWithDetails = extractor.extractSymbols(tree, content, relativePath);
883
+ // Convert to base ExtractedSymbol format for chunker
884
+ const symbols = symbolsWithDetails.map((s) => ({
885
+ name: s.name,
886
+ type: s.type,
887
+ startLine: s.startLine,
888
+ endLine: s.endLine,
889
+ node: s.node,
890
+ }));
891
+ // Convert symbols to chunks
892
+ const minLines = options.minLines || this.codeConfig.chunking?.minLines || 3;
893
+ const maxLines = options.maxLines || this.codeConfig.chunking?.maxLines || 100;
894
+ const chunks = symbolsToChunks(symbols, relativePath, content, langKey, stat.mtimeMs, { minLines, maxLines });
895
+ // Add detailed metadata from extractor to chunks
896
+ // Match by symbolName + startLine since symbolsToChunks filters by minLines
897
+ const detailsMap = new Map(symbolsWithDetails.map(d => [`${d.name}:${d.startLine}`, d]));
898
+ for (const chunk of chunks) {
899
+ if (chunk.symbolType === 'file')
900
+ continue; // Skip file chunks
901
+ const details = detailsMap.get(`${chunk.symbolName}:${chunk.startLine}`);
902
+ if (details) {
903
+ chunk.signature = details.signature;
904
+ chunk.docstring = details.docstring;
905
+ chunk.parentSymbol = details.parentSymbol;
906
+ chunk.scope = details.scope;
907
+ }
908
+ }
909
+ applyStableChunkIds(chunks);
910
+ // Optionally add file-level chunk
911
+ const includeFileChunks = options.includeFileChunks ?? this.codeConfig.chunking?.includeFileChunks ?? true;
912
+ if (includeFileChunks) {
913
+ const fileChunk = createFileChunk(relativePath, content, langKey, stat.mtimeMs);
914
+ chunks.unshift(fileChunk);
915
+ }
916
+ // Delete old chunks for this file
917
+ await this.deleteChunksForPath(relativePath);
918
+ // Index all chunks
919
+ for (const chunk of chunks) {
920
+ const result = await this.indexChunk(chunk);
921
+ if (isError(result)) {
922
+ errors.push(result);
923
+ }
924
+ else {
925
+ chunkIds.push(result);
926
+ }
927
+ }
928
+ // Update file index
929
+ await this.updateFileIndex(relativePath, stat.mtimeMs, fileHash, chunkIds, langKey);
930
+ if (this.graphIndexer) {
931
+ await this.graphIndexer.updateFiles([relativePath]);
932
+ }
933
+ if (this.treeIndexer) {
934
+ await this.treeIndexer.upsertFile(relativePath, this.getTreeDeps());
935
+ }
936
+ // Write WAL done entry
937
+ if (this.walEnabled) {
938
+ await this.wal.done(relativePath, chunkIds.length);
939
+ }
940
+ }
941
+ finally {
942
+ // Always delete tree to prevent memory leaks
943
+ this.parser.deleteTree(tree);
944
+ }
945
+ }
946
+ catch (err) {
947
+ const error = createError('INDEX_FILE_ERROR', `Failed to index ${relativePath}`, {
948
+ error: err.message,
949
+ });
950
+ errors.push(error);
951
+ // Write WAL fail entry
952
+ if (this.walEnabled) {
953
+ const retryCount = await this.wal.getRetryCount(relativePath);
954
+ await this.wal.fail(relativePath, err.message, retryCount + 1);
955
+ }
956
+ }
957
+ return { chunkIds, errors };
958
+ }
959
+ /**
960
+ * Reindex all files in root path.
961
+ * Uses batch processing for much faster performance with WAL support.
962
+ */
963
+ async reindexAll(options = {}) {
964
+ await this.initialize();
965
+ return withIndexLock(this.localStateDir, async () => {
966
+ const filter = this.createFileFilter(options);
967
+ const include = options.include || this.codeConfig.include || DEFAULT_INCLUDE;
968
+ // Find all matching files
969
+ const patterns = include.map((p) => path.join(this.rootPath, p));
970
+ let files = [];
971
+ for (const pattern of patterns) {
972
+ const matches = await glob(pattern, {
973
+ ignore: options.exclude || this.codeConfig.exclude || DEFAULT_EXCLUDE,
974
+ nodir: true,
975
+ });
976
+ files.push(...matches);
977
+ }
978
+ // Deduplicate and filter
979
+ files = [...new Set(files)].filter(filter);
980
+ // Generate batch ID for WAL
981
+ const batchId = crypto.randomUUID();
982
+ const relativePaths = files.map(f => path.relative(this.rootPath, f));
983
+ // Write batch start to WAL
984
+ if (this.walEnabled) {
985
+ await this.wal.batchStart(batchId, relativePaths);
986
+ }
987
+ let indexed = 0;
988
+ let skipped = 0;
989
+ let deleted = 0;
990
+ let failed = 0;
991
+ const errors = [];
992
+ // Track current file paths
993
+ const currentPaths = new Set();
994
+ const indexedPaths = await this.getAllIndexedPaths();
995
+ const filesToIndex = [];
996
+ const pathsToDelete = [];
997
+ for (const file of files) {
998
+ const relativePath = path.relative(this.rootPath, file);
999
+ currentPaths.add(relativePath);
1000
+ // Write file start to WAL
1001
+ if (this.walEnabled) {
1002
+ await this.wal.start(relativePath, batchId);
1003
+ }
1004
+ try {
1005
+ const stat = await fs.stat(file);
1006
+ // Check if file needs reindexing
1007
+ const existingRecord = await this.getFileRecord(relativePath);
1008
+ if (existingRecord && existingRecord.mtime === stat.mtimeMs) {
1009
+ skipped++;
1010
+ // Mark as done (skipped = no change)
1011
+ if (this.walEnabled) {
1012
+ await this.wal.done(relativePath, 0);
1013
+ }
1014
+ continue;
1015
+ }
1016
+ // Read and check content hash
1017
+ const content = await fs.readFile(file, 'utf-8');
1018
+ const fileHash = generateContentHash(content);
1019
+ if (existingRecord && existingRecord.content_hash === fileHash) {
1020
+ // Content unchanged, just update mtime
1021
+ await this.updateFileIndex(relativePath, stat.mtimeMs, fileHash, JSON.parse(existingRecord.chunk_ids), existingRecord.language);
1022
+ skipped++;
1023
+ // Mark as done
1024
+ if (this.walEnabled) {
1025
+ await this.wal.done(relativePath, 0);
1026
+ }
1027
+ continue;
1028
+ }
1029
+ // File changed - parse it (but don't index yet)
1030
+ const parseResult = await this.parseFile(file, options);
1031
+ if (isError(parseResult)) {
1032
+ errors.push(parseResult);
1033
+ failed++;
1034
+ if (this.walEnabled) {
1035
+ const retryCount = await this.wal.getRetryCount(relativePath);
1036
+ await this.wal.fail(relativePath, parseResult.message, retryCount + 1);
1037
+ }
1038
+ continue;
1039
+ }
1040
+ // Collect old chunks for deletion
1041
+ if (existingRecord) {
1042
+ pathsToDelete.push(relativePath);
1043
+ }
1044
+ filesToIndex.push({
1045
+ relativePath,
1046
+ chunks: parseResult.chunks,
1047
+ fileHash: parseResult.fileHash,
1048
+ mtime: parseResult.mtime,
1049
+ language: parseResult.language,
1050
+ });
1051
+ }
1052
+ catch (err) {
1053
+ const error = createError('INDEX_FILE_ERROR', `Failed to parse ${relativePath}`, {
1054
+ error: err.message,
1055
+ });
1056
+ errors.push(error);
1057
+ failed++;
1058
+ if (this.walEnabled) {
1059
+ const retryCount = await this.wal.getRetryCount(relativePath);
1060
+ await this.wal.fail(relativePath, err.message, retryCount + 1);
1061
+ }
1062
+ }
1063
+ }
1064
+ // Phase 2: Delete old chunks for files that will be reindexed
1065
+ for (const pathToDelete of pathsToDelete) {
1066
+ await this.deleteChunksForPath(pathToDelete);
1067
+ }
1068
+ // Phase 3: Batch index all chunks
1069
+ if (filesToIndex.length > 0) {
1070
+ // Collect all chunks from all files
1071
+ const allChunks = [];
1072
+ const chunkToFileMap = new Map();
1073
+ for (const fileResult of filesToIndex) {
1074
+ const startIdx = allChunks.length;
1075
+ allChunks.push(...fileResult.chunks);
1076
+ // Map chunk indices to file for later file index update
1077
+ for (let i = startIdx; i < allChunks.length; i++) {
1078
+ chunkToFileMap.set(i, fileResult);
1079
+ }
1080
+ }
1081
+ // Batch index all chunks at once (larger batch = fewer API calls)
1082
+ const batchResult = await this.indexChunksBatch(allChunks, 128);
1083
+ indexed = batchResult.chunkIds.length;
1084
+ errors.push(...batchResult.errors);
1085
+ // Update file indices and write WAL done entries
1086
+ const fileChunkIds = new Map();
1087
+ for (let i = 0; i < batchResult.chunkIds.length; i++) {
1088
+ const fileResult = chunkToFileMap.get(i);
1089
+ if (fileResult) {
1090
+ const ids = fileChunkIds.get(fileResult.relativePath) || [];
1091
+ ids.push(batchResult.chunkIds[i]);
1092
+ fileChunkIds.set(fileResult.relativePath, ids);
1093
+ }
1094
+ }
1095
+ for (const fileResult of filesToIndex) {
1096
+ const chunkIds = fileChunkIds.get(fileResult.relativePath) || [];
1097
+ await this.updateFileIndex(fileResult.relativePath, fileResult.mtime, fileResult.fileHash, chunkIds, fileResult.language);
1098
+ // Write WAL done entry for each file
1099
+ if (this.walEnabled) {
1100
+ await this.wal.done(fileResult.relativePath, chunkIds.length);
1101
+ }
1102
+ }
1103
+ }
1104
+ // Phase 4: Handle deleted files
1105
+ for (const indexedPath of indexedPaths) {
1106
+ if (!currentPaths.has(indexedPath)) {
1107
+ const record = await this.getFileRecord(indexedPath);
1108
+ if (record) {
1109
+ const chunkIds = JSON.parse(record.chunk_ids);
1110
+ await this.deleteChunks(chunkIds);
1111
+ await this.removeFromFileIndex(indexedPath);
1112
+ deleted++;
1113
+ }
1114
+ }
1115
+ }
1116
+ // Write batch done to WAL
1117
+ if (this.walEnabled) {
1118
+ await this.wal.batchDone(batchId, { indexed, failed });
1119
+ // Compact WAL after successful batch
1120
+ await this.wal.compact();
1121
+ }
1122
+ await this.createOrUpdateFtsIndex();
1123
+ if (this.graphIndexer) {
1124
+ await this.graphIndexer.indexAll();
1125
+ }
1126
+ if (this.treeIndexer) {
1127
+ await this.treeIndexer.buildTree(this.getTreeDeps());
1128
+ }
1129
+ return { indexed, skipped, deleted, errors };
1130
+ });
1131
+ }
1132
+ /**
1133
+ * Reindex all files using unified SyncWal for recovery.
1134
+ * Uses offset-based recovery (deterministic file ordering).
1135
+ *
1136
+ * @param recovery Optional recovery plan from SyncWal
1137
+ * @returns IndexResult with counts
1138
+ */
1139
+ async reindexAllWithSyncWal(recovery) {
1140
+ if (!this.syncWal) {
1141
+ throw new Error('SyncWal not provided - use reindexAll() instead');
1142
+ }
1143
+ await this.initialize();
1144
+ const filter = this.createFileFilter({});
1145
+ const include = this.codeConfig.include || DEFAULT_INCLUDE;
1146
+ // Find all matching files
1147
+ const patterns = include.map((p) => path.join(this.rootPath, p));
1148
+ let files = [];
1149
+ for (const pattern of patterns) {
1150
+ const matches = await glob(pattern, {
1151
+ ignore: this.codeConfig.exclude || DEFAULT_EXCLUDE,
1152
+ nodir: true,
1153
+ });
1154
+ files.push(...matches);
1155
+ }
1156
+ // Deduplicate, filter, and sort deterministically
1157
+ files = [...new Set(files)].filter(filter).sort((a, b) => a.localeCompare(b));
1158
+ // Compute manifest hash for recovery validation
1159
+ const manifestHash = this.computeManifestHash(files);
1160
+ // Validate recovery plan
1161
+ let skipCount = 0;
1162
+ if (recovery) {
1163
+ if (recovery.manifestHash === manifestHash) {
1164
+ skipCount = recovery.skipCount;
1165
+ console.log(`[code-indexer] Resuming from file ${skipCount}/${files.length}`);
1166
+ }
1167
+ else {
1168
+ // Files changed since crash — full reindex
1169
+ console.warn('[code-indexer] Manifest hash mismatch, starting full reindex');
1170
+ }
1171
+ }
1172
+ // Start task (acquires lock, starts heartbeat)
1173
+ const taskId = await this.syncWal.startTask('code', files.length, manifestHash);
1174
+ let indexed = 0;
1175
+ let skipped = 0;
1176
+ let deleted = 0;
1177
+ const errors = [];
1178
+ // Track current file paths
1179
+ const currentPaths = new Set();
1180
+ const indexedPaths = await this.getAllIndexedPaths();
1181
+ const filesToIndex = [];
1182
+ const pathsToDelete = [];
1183
+ try {
1184
+ for (let i = skipCount; i < files.length; i++) {
1185
+ // Check cancellation
1186
+ if (this.syncWal.isCancelled(taskId)) {
1187
+ await this.syncWal.cancelTask(taskId);
1188
+ return { indexed, skipped, deleted, errors };
1189
+ }
1190
+ // Check pause - wait until resumed or cancelled
1191
+ const shouldContinue = await this.syncWal.waitWhilePaused(taskId);
1192
+ if (!shouldContinue) {
1193
+ // Cancelled while paused
1194
+ await this.syncWal.cancelTask(taskId);
1195
+ return { indexed, skipped, deleted, errors };
1196
+ }
1197
+ const file = files[i];
1198
+ const relativePath = path.relative(this.rootPath, file);
1199
+ currentPaths.add(relativePath);
1200
+ try {
1201
+ const stat = await fs.stat(file);
1202
+ // Check if file needs reindexing
1203
+ const existingRecord = await this.getFileRecord(relativePath);
1204
+ if (existingRecord && existingRecord.mtime === stat.mtimeMs) {
1205
+ skipped++;
1206
+ await this.syncWal.fileProcessed(taskId, relativePath, 0, 'updated');
1207
+ continue;
1208
+ }
1209
+ // Read and check content hash
1210
+ const content = await fs.readFile(file, 'utf-8');
1211
+ const fileHash = generateContentHash(content);
1212
+ if (existingRecord && existingRecord.content_hash === fileHash) {
1213
+ // Content unchanged, just update mtime
1214
+ await this.updateFileIndex(relativePath, stat.mtimeMs, fileHash, JSON.parse(existingRecord.chunk_ids), existingRecord.language);
1215
+ skipped++;
1216
+ await this.syncWal.fileProcessed(taskId, relativePath, 0, 'updated');
1217
+ continue;
1218
+ }
1219
+ // File changed - parse it
1220
+ const parseResult = await this.parseFile(file, {});
1221
+ if (isError(parseResult)) {
1222
+ errors.push(parseResult);
1223
+ await this.syncWal.fileFailed(taskId, relativePath, parseResult.message, 0);
1224
+ continue;
1225
+ }
1226
+ // Collect old chunks for deletion
1227
+ if (existingRecord) {
1228
+ pathsToDelete.push(relativePath);
1229
+ }
1230
+ filesToIndex.push({
1231
+ relativePath,
1232
+ chunks: parseResult.chunks,
1233
+ fileHash: parseResult.fileHash,
1234
+ mtime: parseResult.mtime,
1235
+ language: parseResult.language,
1236
+ });
1237
+ }
1238
+ catch (err) {
1239
+ const error = createError('INDEX_FILE_ERROR', `Failed to parse ${relativePath}`, {
1240
+ error: err.message,
1241
+ });
1242
+ errors.push(error);
1243
+ await this.syncWal.fileFailed(taskId, relativePath, err.message, 0);
1244
+ }
1245
+ }
1246
+ // Phase 2: Delete old chunks for files that will be reindexed
1247
+ this.syncWal.setPhase('deleting');
1248
+ for (const pathToDelete of pathsToDelete) {
1249
+ await this.deleteChunksForPath(pathToDelete);
1250
+ }
1251
+ // Phase 3: Batch index all chunks
1252
+ this.syncWal.setPhase('indexing');
1253
+ if (filesToIndex.length > 0) {
1254
+ const allChunks = [];
1255
+ const chunkToFileMap = new Map();
1256
+ for (const fileResult of filesToIndex) {
1257
+ const startIdx = allChunks.length;
1258
+ allChunks.push(...fileResult.chunks);
1259
+ for (let i = startIdx; i < allChunks.length; i++) {
1260
+ chunkToFileMap.set(i, fileResult);
1261
+ }
1262
+ }
1263
+ // Batch index with embedding progress callback
1264
+ const embeddingProgressCallback = this.syncWal?.getEmbeddingProgressCallback();
1265
+ const batchResult = await this.indexChunksBatch(allChunks, 128, embeddingProgressCallback);
1266
+ indexed = batchResult.chunkIds.length;
1267
+ errors.push(...batchResult.errors);
1268
+ // Update file indices
1269
+ const fileChunkIds = new Map();
1270
+ for (let i = 0; i < batchResult.chunkIds.length; i++) {
1271
+ const fileResult = chunkToFileMap.get(i);
1272
+ if (fileResult) {
1273
+ const ids = fileChunkIds.get(fileResult.relativePath) || [];
1274
+ ids.push(batchResult.chunkIds[i]);
1275
+ fileChunkIds.set(fileResult.relativePath, ids);
1276
+ }
1277
+ }
1278
+ for (const fileResult of filesToIndex) {
1279
+ const chunkIds = fileChunkIds.get(fileResult.relativePath) || [];
1280
+ await this.updateFileIndex(fileResult.relativePath, fileResult.mtime, fileResult.fileHash, chunkIds, fileResult.language);
1281
+ await this.syncWal.fileProcessed(taskId, fileResult.relativePath, chunkIds.length, 'added');
1282
+ }
1283
+ }
1284
+ // Phase 4: Handle deleted files
1285
+ for (const indexedPath of indexedPaths) {
1286
+ if (!currentPaths.has(indexedPath)) {
1287
+ const record = await this.getFileRecord(indexedPath);
1288
+ if (record) {
1289
+ const chunkIds = JSON.parse(record.chunk_ids);
1290
+ await this.deleteChunks(chunkIds);
1291
+ await this.removeFromFileIndex(indexedPath);
1292
+ deleted++;
1293
+ }
1294
+ }
1295
+ }
1296
+ await this.createOrUpdateFtsIndex();
1297
+ // Run graph/tree indexing (blocking to ensure completion before sync:completed)
1298
+ if (this.graphIndexer) {
1299
+ console.log('[code-indexer] starting graphIndexer.indexAll()');
1300
+ this.syncWal.setPhase('graph');
1301
+ await this.graphIndexer.indexAll(undefined, {
1302
+ onProgress: (current, total, phase) => {
1303
+ this.syncWal?.emitSubPhaseProgress(current, total, `graph:${phase}`);
1304
+ },
1305
+ });
1306
+ console.log('[code-indexer] graphIndexer.indexAll() done');
1307
+ }
1308
+ if (this.treeIndexer) {
1309
+ console.log('[code-indexer] starting treeIndexer.buildTree()');
1310
+ this.syncWal.setPhase('tree');
1311
+ await this.treeIndexer.buildTree(this.getTreeDeps(), {
1312
+ onProgress: (current, total, phase) => {
1313
+ this.syncWal?.emitSubPhaseProgress(current, total, `tree:${phase}`);
1314
+ },
1315
+ });
1316
+ console.log('[code-indexer] treeIndexer.buildTree() done');
1317
+ }
1318
+ // Complete task AFTER all indexing is done
1319
+ await this.syncWal.completeTask(taskId);
1320
+ }
1321
+ catch (err) {
1322
+ // Task failed
1323
+ await this.syncWal.failTask(taskId, err.message);
1324
+ throw err;
1325
+ }
1326
+ return { indexed, skipped, deleted, errors };
1327
+ }
1328
+ async rebuildFtsIndex() {
1329
+ await this.initialize();
1330
+ await this.createOrUpdateFtsIndex();
1331
+ }
1332
+ /**
1333
+ * Set the SyncWal instance for unified recovery.
1334
+ * Should be called when SyncWal is created after CodeIndexer construction.
1335
+ */
1336
+ setSyncWal(syncWal) {
1337
+ this.syncWal = syncWal;
1338
+ }
1339
+ /**
1340
+ * Get the SyncWal instance.
1341
+ */
1342
+ getSyncWal() {
1343
+ return this.syncWal;
1344
+ }
1345
+ /**
1346
+ * Check if SyncWal is configured.
1347
+ */
1348
+ hasSyncWal() {
1349
+ return this.syncWal !== null;
1350
+ }
1351
+ /**
1352
+ * Search code by semantic query.
1353
+ */
1354
+ async search(query, options = {}) {
1355
+ await this.initialize();
1356
+ const { limit = 10, threshold = 0.5, language, symbolTypes, path: pathFilter, rerank = true, } = options;
1357
+ const table = await this.ensureChunksTable();
1358
+ // Check if table has data
1359
+ const count = await table.countRows();
1360
+ if (count === 0) {
1361
+ return [];
1362
+ }
1363
+ // Get query embedding (use 'query' type for Voyage AI asymmetric search)
1364
+ const queryEmbedding = await this.embeddingProvider.getEmbedding(query, 'query');
1365
+ if (isError(queryEmbedding)) {
1366
+ return queryEmbedding;
1367
+ }
1368
+ const useReranking = Boolean(rerank &&
1369
+ this.rerankerProvider &&
1370
+ this.rerankingConfig.enabled &&
1371
+ this.rerankingConfig.provider !== 'none');
1372
+ const fetchLimit = useReranking
1373
+ ? Math.max(this.rerankingConfig.topK, limit)
1374
+ : limit * 2;
1375
+ const vectorThreshold = useReranking
1376
+ ? this.rerankingConfig.prerankThreshold
1377
+ : threshold;
1378
+ // Build search query
1379
+ let searchQuery = table.search(queryEmbedding).limit(fetchLimit);
1380
+ // Apply filters
1381
+ const filters = [];
1382
+ if (language) {
1383
+ const lang = sanitizeIdentifier(language);
1384
+ if (!VALID_LANGUAGES.has(lang)) {
1385
+ return createError('INVALID_LANGUAGE', `Unknown language: ${lang}`);
1386
+ }
1387
+ filters.push(`language = '${lang}'`);
1388
+ }
1389
+ if (symbolTypes && symbolTypes.length > 0) {
1390
+ const validTypes = symbolTypes
1391
+ .map((t) => sanitizeIdentifier(t))
1392
+ .filter((t) => VALID_SYMBOL_TYPES.has(t));
1393
+ if (validTypes.length === 0) {
1394
+ return createError('INVALID_SYMBOL_TYPES', 'No valid symbol types provided');
1395
+ }
1396
+ const types = validTypes.map((t) => `'${t}'`).join(', ');
1397
+ filters.push(`symbol_type IN (${types})`);
1398
+ }
1399
+ if (pathFilter) {
1400
+ const safePath = sanitizePath(pathFilter);
1401
+ filters.push(`path LIKE '${safePath}%'`);
1402
+ }
1403
+ if (filters.length > 0) {
1404
+ searchQuery = searchQuery.where(filters.join(' AND '));
1405
+ }
1406
+ const results = await searchQuery.toArray();
1407
+ // Convert L2 distance to cosine similarity, filter and deduplicate
1408
+ const processed = [];
1409
+ const seenContentHashes = new Set();
1410
+ for (const r of results) {
1411
+ const distance = r._distance;
1412
+ // For normalized vectors: L2^2 = 2(1 - cos_sim)
1413
+ const score = 1 - distance / 2;
1414
+ if (score < vectorThreshold)
1415
+ continue;
1416
+ // Deduplicate by content_hash (same code in file chunk and symbol chunk)
1417
+ const contentHash = r.content_hash;
1418
+ if (seenContentHashes.has(contentHash))
1419
+ continue;
1420
+ seenContentHashes.add(contentHash);
1421
+ const result = fromChunkRecord(r);
1422
+ result.score = score;
1423
+ processed.push(result);
1424
+ if (processed.length >= fetchLimit)
1425
+ break;
1426
+ }
1427
+ if (processed.length === 0) {
1428
+ return [];
1429
+ }
1430
+ if (useReranking && processed.length >= 5 && this.rerankerProvider) {
1431
+ try {
1432
+ if (!this.rerankerProvider.isInitialized()) {
1433
+ await this.rerankerProvider.initialize();
1434
+ }
1435
+ const documents = processed.map((item) => item.content);
1436
+ const reranked = await this.rerankerProvider.rerank(query, documents, {
1437
+ topK: limit,
1438
+ });
1439
+ if (!isError(reranked)) {
1440
+ const rerankedResults = reranked
1441
+ .filter((rr) => rr.index >= 0 && rr.index < processed.length)
1442
+ .map((rr) => ({
1443
+ ...processed[rr.index],
1444
+ score: rr.score,
1445
+ _originalScore: processed[rr.index].score,
1446
+ }))
1447
+ .filter((result) => result.score >= threshold);
1448
+ return rerankedResults.slice(0, limit);
1449
+ }
1450
+ console.warn('[code-indexer] Reranking failed, using vector scores:', reranked.message);
1451
+ }
1452
+ catch (error) {
1453
+ console.warn('[code-indexer] Reranking error, falling back to vector scores:', error);
1454
+ }
1455
+ }
1456
+ return processed
1457
+ .filter((result) => result.score >= threshold)
1458
+ .slice(0, limit);
1459
+ }
1460
+ async searchFts(options) {
1461
+ await this.initialize();
1462
+ const query = options.query.trim();
1463
+ if (!query) {
1464
+ return createError('INVALID_QUERY', 'Query must not be empty');
1465
+ }
1466
+ const table = await this.ensureChunksTable();
1467
+ const count = await table.countRows();
1468
+ if (count === 0) {
1469
+ return [];
1470
+ }
1471
+ const limit = options.limit ?? 10;
1472
+ const ftsColumn = this.getFtsColumn();
1473
+ const phraseQuery = options.phraseMatch
1474
+ ? new PhraseQuery(query, ftsColumn, { slop: options.phraseSlop ?? 0 })
1475
+ : null;
1476
+ const fuzzyQuery = options.fuzziness !== undefined
1477
+ ? new MatchQuery(query, ftsColumn, { fuzziness: options.fuzziness })
1478
+ : null;
1479
+ let ftsQuery = query;
1480
+ if (phraseQuery && fuzzyQuery) {
1481
+ ftsQuery = new BooleanQuery([
1482
+ [Occur.Should, phraseQuery],
1483
+ [Occur.Should, fuzzyQuery],
1484
+ ]);
1485
+ }
1486
+ else if (phraseQuery) {
1487
+ ftsQuery = phraseQuery;
1488
+ }
1489
+ else if (fuzzyQuery) {
1490
+ ftsQuery = fuzzyQuery;
1491
+ }
1492
+ let searchQuery = table.search(ftsQuery, 'fts', ftsColumn).limit(limit);
1493
+ const filters = [];
1494
+ if (options.language) {
1495
+ const lang = sanitizeIdentifier(options.language);
1496
+ if (!VALID_LANGUAGES.has(lang)) {
1497
+ return createError('INVALID_LANGUAGE', `Unknown language: ${lang}`);
1498
+ }
1499
+ filters.push(`language = '${lang}'`);
1500
+ }
1501
+ if (options.symbolTypes && options.symbolTypes.length > 0) {
1502
+ const validTypes = options.symbolTypes
1503
+ .map((t) => sanitizeIdentifier(t))
1504
+ .filter((t) => VALID_SYMBOL_TYPES.has(t));
1505
+ if (validTypes.length === 0) {
1506
+ return createError('INVALID_SYMBOL_TYPES', 'No valid symbol types provided');
1507
+ }
1508
+ const types = validTypes.map((t) => `'${t}'`).join(', ');
1509
+ filters.push(`symbol_type IN (${types})`);
1510
+ }
1511
+ const pathFilter = options.path
1512
+ ? buildPathPrefixFilter('path', options.path)
1513
+ : null;
1514
+ const whereClause = combineFilters([...filters, pathFilter]);
1515
+ if (whereClause) {
1516
+ searchQuery = searchQuery.where(whereClause);
1517
+ }
1518
+ const rows = await searchQuery.toArray();
1519
+ const results = [];
1520
+ for (const row of rows) {
1521
+ const result = fromChunkRecord(row);
1522
+ const ftsScore = typeof row._score === 'number' ? row._score : undefined;
1523
+ const fallbackScore = typeof row._distance === 'number'
1524
+ ? 1 - row._distance / 2
1525
+ : 1;
1526
+ result.score = ftsScore ?? fallbackScore;
1527
+ results.push(result);
1528
+ }
1529
+ return results;
1530
+ }
1531
+ async searchHybrid(options) {
1532
+ const query = options.query.trim();
1533
+ if (!query) {
1534
+ return createError('INVALID_QUERY', 'Query must not be empty');
1535
+ }
1536
+ const limit = options.limit ?? 10;
1537
+ const threshold = options.threshold ?? 0.5;
1538
+ const semanticLimit = options.semanticLimit ?? Math.max(limit * 3, 20);
1539
+ const ftsLimit = options.ftsLimit ?? Math.max(limit * 3, 20);
1540
+ const vectorWeight = options.vectorWeight ?? 0.55;
1541
+ const ftsWeight = options.ftsWeight ?? 0.45;
1542
+ const rerankEnabled = options.rerank ?? true;
1543
+ const [semanticResults, ftsResults] = await Promise.all([
1544
+ this.search(query, {
1545
+ limit: semanticLimit,
1546
+ threshold: 0,
1547
+ language: options.language,
1548
+ symbolTypes: options.symbolTypes,
1549
+ path: options.path,
1550
+ rerank: false,
1551
+ }),
1552
+ this.searchFts({
1553
+ query,
1554
+ limit: ftsLimit,
1555
+ fuzziness: options.fuzziness,
1556
+ phraseMatch: options.phraseMatch,
1557
+ phraseSlop: options.phraseSlop,
1558
+ language: options.language,
1559
+ symbolTypes: options.symbolTypes,
1560
+ path: options.path,
1561
+ }),
1562
+ ]);
1563
+ if (isError(semanticResults))
1564
+ return semanticResults;
1565
+ if (isError(ftsResults))
1566
+ return ftsResults;
1567
+ if (semanticResults.length === 0 && ftsResults.length === 0)
1568
+ return [];
1569
+ const semanticNorm = this.normalizeSearchScores(semanticResults);
1570
+ const ftsNorm = this.normalizeSearchScores(ftsResults);
1571
+ const merged = new Map();
1572
+ for (let i = 0; i < semanticResults.length; i++) {
1573
+ const item = semanticResults[i];
1574
+ const existing = merged.get(item.id);
1575
+ if (existing) {
1576
+ existing.vector = Math.max(existing.vector, semanticNorm[i] ?? 0);
1577
+ }
1578
+ else {
1579
+ merged.set(item.id, { item, vector: semanticNorm[i] ?? 0, fts: 0 });
1580
+ }
1581
+ }
1582
+ for (let i = 0; i < ftsResults.length; i++) {
1583
+ const item = ftsResults[i];
1584
+ const existing = merged.get(item.id);
1585
+ if (existing) {
1586
+ existing.fts = Math.max(existing.fts, ftsNorm[i] ?? 0);
1587
+ }
1588
+ else {
1589
+ merged.set(item.id, { item, vector: 0, fts: ftsNorm[i] ?? 0 });
1590
+ }
1591
+ }
1592
+ const fused = Array.from(merged.values())
1593
+ .map(({ item, vector, fts }) => {
1594
+ const overlapBoost = vector > 0 && fts > 0 ? 0.05 : 0;
1595
+ const score = Math.min(1, vectorWeight * vector + ftsWeight * fts + overlapBoost);
1596
+ return { ...item, score };
1597
+ })
1598
+ .sort((a, b) => b.score - a.score);
1599
+ const useReranking = Boolean(rerankEnabled &&
1600
+ this.rerankerProvider &&
1601
+ this.rerankingConfig.enabled &&
1602
+ this.rerankingConfig.provider !== 'none');
1603
+ if (useReranking && fused.length >= 5 && this.rerankerProvider) {
1604
+ try {
1605
+ if (!this.rerankerProvider.isInitialized()) {
1606
+ await this.rerankerProvider.initialize();
1607
+ }
1608
+ const candidateLimit = Math.max(this.rerankingConfig.topK, limit);
1609
+ const candidates = fused.slice(0, candidateLimit);
1610
+ const reranked = await this.rerankerProvider.rerank(query, candidates.map((c) => c.content), { topK: limit });
1611
+ if (!isError(reranked)) {
1612
+ return reranked
1613
+ .filter((rr) => rr.index >= 0 && rr.index < candidates.length)
1614
+ .map((rr) => ({
1615
+ ...candidates[rr.index],
1616
+ score: rr.score,
1617
+ _originalScore: candidates[rr.index].score,
1618
+ }))
1619
+ .filter((result) => result.score >= threshold)
1620
+ .slice(0, limit);
1621
+ }
1622
+ console.warn('[code-indexer] Hybrid reranking failed, using fused scores:', reranked.message);
1623
+ }
1624
+ catch (error) {
1625
+ console.warn('[code-indexer] Hybrid reranking error, using fused scores:', error);
1626
+ }
1627
+ }
1628
+ return fused
1629
+ .filter((result) => result.score >= threshold)
1630
+ .slice(0, limit);
1631
+ }
1632
+ /**
1633
+ * Find symbols by name pattern (exact match or wildcard).
1634
+ */
1635
+ async findSymbols(name, options = {}) {
1636
+ await this.initialize();
1637
+ const { type, path: pathFilter, limit = 20 } = options;
1638
+ const table = await this.ensureChunksTable();
1639
+ // Check if table has data
1640
+ const count = await table.countRows();
1641
+ if (count === 0) {
1642
+ return [];
1643
+ }
1644
+ // Build filters
1645
+ const filters = [];
1646
+ // Handle wildcard in name
1647
+ if (name.includes('*')) {
1648
+ const sqlPattern = name.replace(/\*/g, '%');
1649
+ filters.push(`symbol_name LIKE '${sanitizePath(sqlPattern)}'`);
1650
+ }
1651
+ else {
1652
+ filters.push(`symbol_name = '${sanitizePath(name)}'`);
1653
+ }
1654
+ if (type && type !== 'all') {
1655
+ const validType = sanitizeIdentifier(type);
1656
+ if (!VALID_SYMBOL_TYPES.has(validType)) {
1657
+ return createError('INVALID_SYMBOL_TYPE', `Unknown symbol type: ${type}`);
1658
+ }
1659
+ filters.push(`symbol_type = '${validType}'`);
1660
+ }
1661
+ if (pathFilter) {
1662
+ // Normalize path separators to match stored paths
1663
+ const normalizedPath = pathFilter.replace(/\\/g, '/');
1664
+ filters.push(`path LIKE '${sanitizePath(normalizedPath)}%'`);
1665
+ }
1666
+ // Query with filters
1667
+ let query = table.query().where(filters.join(' AND '));
1668
+ query = query.limit(limit);
1669
+ const results = await query.toArray();
1670
+ return results.map((r) => fromChunkRecord(r));
1671
+ }
1672
+ /**
1673
+ * Remove a file from the index.
1674
+ */
1675
+ async removeFile(filePath) {
1676
+ await this.initialize();
1677
+ const relativePath = path.isAbsolute(filePath)
1678
+ ? path.relative(this.rootPath, filePath)
1679
+ : filePath;
1680
+ const record = await this.getFileRecord(relativePath);
1681
+ if (record) {
1682
+ const chunkIds = JSON.parse(record.chunk_ids);
1683
+ await this.deleteChunks(chunkIds);
1684
+ await this.removeFromFileIndex(relativePath);
1685
+ }
1686
+ else {
1687
+ // Fallback: delete by path
1688
+ await this.deleteChunksForPath(relativePath);
1689
+ }
1690
+ if (this.graphIndexer) {
1691
+ await this.graphIndexer.removeFile(relativePath);
1692
+ }
1693
+ if (this.treeIndexer) {
1694
+ await this.treeIndexer.removeFile(relativePath);
1695
+ }
1696
+ }
1697
+ /**
1698
+ * Clear all code index data.
1699
+ */
1700
+ async clear() {
1701
+ await this.initialize();
1702
+ if (this.chunksTable) {
1703
+ await this.db.dropTable('code_chunks');
1704
+ this.chunksTable = null;
1705
+ }
1706
+ if (this.fileIndexTable) {
1707
+ await this.db.dropTable('code_file_index');
1708
+ this.fileIndexTable = null;
1709
+ }
1710
+ if (this.graphIndexer) {
1711
+ await this.graphIndexer.clear();
1712
+ }
1713
+ if (this.treeIndexer) {
1714
+ await this.treeIndexer.clear();
1715
+ }
1716
+ }
1717
+ /**
1718
+ * Get index statistics.
1719
+ */
1720
+ async getStats() {
1721
+ await this.initialize();
1722
+ const chunksTable = await this.ensureChunksTable();
1723
+ const fileTable = await this.ensureFileIndexTable();
1724
+ const totalChunks = await chunksTable.countRows();
1725
+ const totalFiles = await fileTable.countRows();
1726
+ // Count by language
1727
+ const fileRecords = await fileTable.query().select(['language']).toArray();
1728
+ const languages = {};
1729
+ for (const r of fileRecords) {
1730
+ const lang = r.language;
1731
+ languages[lang] = (languages[lang] || 0) + 1;
1732
+ }
1733
+ return { totalChunks, totalFiles, languages };
1734
+ }
1735
+ /**
1736
+ * Get set of indexed file paths for quick lookup.
1737
+ */
1738
+ async getIndexedFilePaths() {
1739
+ await this.initialize();
1740
+ const fileTable = await this.ensureFileIndexTable();
1741
+ const records = await fileTable.query().select(['path']).toArray();
1742
+ return new Set(records.map(r => r.path));
1743
+ }
1744
+ /**
1745
+ * Returns true when code graph indexing is configured.
1746
+ */
1747
+ hasGraphIndexer() {
1748
+ return this.graphIndexer !== null;
1749
+ }
1750
+ /**
1751
+ * Returns true when tree indexing is configured.
1752
+ */
1753
+ hasTreeIndexer() {
1754
+ return this.treeIndexer !== null;
1755
+ }
1756
+ /**
1757
+ * Access tree indexer for tree retrieval tools.
1758
+ */
1759
+ getTreeIndexer() {
1760
+ return this.treeIndexer;
1761
+ }
1762
+ /**
1763
+ * Rebuild tree index from existing code index data.
1764
+ * Useful when tree tables are missing/corrupted while code chunks already exist.
1765
+ */
1766
+ async rebuildTreeIndex() {
1767
+ await this.initialize();
1768
+ if (!this.treeIndexer) {
1769
+ throw new Error('Tree index is not enabled');
1770
+ }
1771
+ const stats = await this.treeIndexer.buildTree(this.getTreeDeps());
1772
+ return {
1773
+ totalDirs: stats.totalDirs,
1774
+ totalFiles: stats.totalFiles,
1775
+ maxDepth: stats.maxDepth,
1776
+ buildTimeMs: stats.buildTimeMs,
1777
+ };
1778
+ }
1779
+ /**
1780
+ * Ensure tree index is queryable.
1781
+ * Auto-recovers when root dir is missing or tree has never been indexed.
1782
+ */
1783
+ async ensureTreeIndexReady() {
1784
+ await this.initialize();
1785
+ if (!this.treeIndexer) {
1786
+ throw new Error('Tree index is not enabled');
1787
+ }
1788
+ const [rootDir, freshness] = await Promise.all([
1789
+ this.treeIndexer.getDirByPath(''),
1790
+ Promise.resolve(this.treeIndexer.getFreshnessInfo()),
1791
+ ]);
1792
+ if (!rootDir || freshness.indexed_at === 0) {
1793
+ await this.treeIndexer.buildTree(this.getTreeDeps());
1794
+ }
1795
+ }
1796
+ /**
1797
+ * Prepare graph data in-memory without embedding calls and without graph DB writes.
1798
+ * Useful for fast validation on different codebases.
1799
+ */
1800
+ async codeGraphPrepare(quality = this.codeConfig.graph.quality, options = {}) {
1801
+ await this.initialize();
1802
+ if (!this.graphIndexer) {
1803
+ throw new Error('Code graph indexing is not enabled');
1804
+ }
1805
+ const filter = this.createFileFilter(options);
1806
+ const include = options.include || this.codeConfig.include || DEFAULT_INCLUDE;
1807
+ let files = [];
1808
+ for (const pattern of include.map((p) => path.join(this.rootPath, p))) {
1809
+ const matches = await glob(pattern, {
1810
+ ignore: options.exclude || this.codeConfig.exclude || DEFAULT_EXCLUDE,
1811
+ nodir: true,
1812
+ });
1813
+ files.push(...matches);
1814
+ }
1815
+ files = [...new Set(files)].filter(filter).sort((a, b) => a.localeCompare(b));
1816
+ const chunksByFile = new Map();
1817
+ const parseErrors = [];
1818
+ for (const file of files) {
1819
+ const parseResult = await this.parseFile(file, options);
1820
+ if (isError(parseResult)) {
1821
+ parseErrors.push(parseResult);
1822
+ continue;
1823
+ }
1824
+ const relativePath = path.relative(this.rootPath, file).replace(/\\/g, '/');
1825
+ for (const chunk of parseResult.chunks) {
1826
+ chunk.path = chunk.path.replace(/\\/g, '/');
1827
+ }
1828
+ chunksByFile.set(relativePath, parseResult.chunks);
1829
+ }
1830
+ const prepared = await this.graphIndexer.prepareGraph(quality, chunksByFile);
1831
+ return {
1832
+ ...prepared,
1833
+ parseErrors,
1834
+ };
1835
+ }
1836
+ /**
1837
+ * Build full code graph index.
1838
+ */
1839
+ async codeGraphIndex(quality) {
1840
+ await this.initialize();
1841
+ if (!this.graphIndexer) {
1842
+ throw new Error('Code graph indexing is not enabled');
1843
+ }
1844
+ return this.graphIndexer.indexAll(quality);
1845
+ }
1846
+ /**
1847
+ * Incremental code graph update.
1848
+ */
1849
+ async codeGraphUpdate() {
1850
+ await this.initialize();
1851
+ if (!this.graphIndexer) {
1852
+ throw new Error('Code graph indexing is not enabled');
1853
+ }
1854
+ return this.graphIndexer.incrementalUpdate();
1855
+ }
1856
+ /**
1857
+ * Code graph statistics.
1858
+ */
1859
+ async codeGraphInfo() {
1860
+ await this.initialize();
1861
+ if (!this.graphIndexer) {
1862
+ throw new Error('Code graph indexing is not enabled');
1863
+ }
1864
+ return this.graphIndexer.getStats();
1865
+ }
1866
+ /**
1867
+ * Find references to a symbol using graph edges.
1868
+ */
1869
+ async graphRefs(input) {
1870
+ await this.initialize();
1871
+ if (!this.graphIndexer) {
1872
+ return createError('GRAPH_DISABLED', 'Code graph indexing is not enabled');
1873
+ }
1874
+ return this.graphIndexer.graphRefs(input);
1875
+ }
1876
+ /**
1877
+ * Build callers/callees graph for a symbol.
1878
+ */
1879
+ async graphCalls(input) {
1880
+ await this.initialize();
1881
+ if (!this.graphIndexer) {
1882
+ return createError('GRAPH_DISABLED', 'Code graph indexing is not enabled');
1883
+ }
1884
+ return this.graphIndexer.graphCalls(input);
1885
+ }
1886
+ /**
1887
+ * Resolve import dependency graph for a file.
1888
+ */
1889
+ async graphDeps(input) {
1890
+ await this.initialize();
1891
+ if (!this.graphIndexer) {
1892
+ return createError('GRAPH_DISABLED', 'Code graph indexing is not enabled');
1893
+ }
1894
+ return this.graphIndexer.graphDeps(input);
1895
+ }
1896
+ /**
1897
+ * Build class/interface hierarchy graph.
1898
+ */
1899
+ async graphHierarchy(input) {
1900
+ await this.initialize();
1901
+ if (!this.graphIndexer) {
1902
+ return createError('GRAPH_DISABLED', 'Code graph indexing is not enabled');
1903
+ }
1904
+ return this.graphIndexer.graphHierarchy(input);
1905
+ }
1906
+ /**
1907
+ * Export graph edges as DOT or JSON.
1908
+ */
1909
+ async codeGraphExport(format = 'json') {
1910
+ await this.initialize();
1911
+ if (!this.graphIndexer) {
1912
+ throw new Error('Code graph indexing is not enabled');
1913
+ }
1914
+ return this.graphIndexer.exportGraph(format);
1915
+ }
1916
+ // ========================================
1917
+ // WAL methods
1918
+ // ========================================
1919
+ /**
1920
+ * Get WAL status (pending files, failed files, etc.)
1921
+ */
1922
+ async getWalStatus() {
1923
+ return this.wal.getStatus();
1924
+ }
1925
+ /**
1926
+ * Get recovery plan from legacy WAL.
1927
+ */
1928
+ async getRecoveryPlan() {
1929
+ return this.wal.recover();
1930
+ }
1931
+ /**
1932
+ * Recover from interrupted indexing using WAL.
1933
+ * Re-indexes pending and failed files.
1934
+ *
1935
+ * @param maxRetries - Max retries per file for failed files
1936
+ * @returns Result with counts of recovered files
1937
+ */
1938
+ async recover(maxRetries = 3) {
1939
+ await this.initialize();
1940
+ const plan = await this.wal.recover();
1941
+ let pending = 0;
1942
+ let retried = 0;
1943
+ let failed = 0;
1944
+ const errors = [];
1945
+ // Re-index pending files (interrupted during indexing)
1946
+ for (const filePath of plan.pending) {
1947
+ console.log(`[code-indexer] Recovering pending file: ${filePath}`);
1948
+ const result = await this.indexFile(filePath);
1949
+ if (result.errors.length > 0) {
1950
+ errors.push(...result.errors);
1951
+ failed++;
1952
+ }
1953
+ else {
1954
+ pending++;
1955
+ }
1956
+ }
1957
+ // Retry failed files (if under max retries)
1958
+ for (const failedFile of plan.failed) {
1959
+ if (failedFile.retryCount >= maxRetries) {
1960
+ console.log(`[code-indexer] Skipping ${failedFile.path} (max retries reached: ${failedFile.retryCount})`);
1961
+ failed++;
1962
+ continue;
1963
+ }
1964
+ console.log(`[code-indexer] Retrying failed file: ${failedFile.path} (attempt ${failedFile.retryCount + 1})`);
1965
+ const result = await this.indexFile(failedFile.path);
1966
+ if (result.errors.length > 0) {
1967
+ errors.push(...result.errors);
1968
+ failed++;
1969
+ }
1970
+ else {
1971
+ retried++;
1972
+ }
1973
+ }
1974
+ // Handle incomplete batch
1975
+ if (plan.incompleteBatch && plan.incompleteBatchFiles.length > 0) {
1976
+ console.log(`[code-indexer] Recovering incomplete batch: ${plan.incompleteBatch}`);
1977
+ console.log(`[code-indexer] Files to recover: ${plan.incompleteBatchFiles.length}`);
1978
+ for (const filePath of plan.incompleteBatchFiles) {
1979
+ const result = await this.indexFile(filePath);
1980
+ if (result.errors.length > 0) {
1981
+ errors.push(...result.errors);
1982
+ failed++;
1983
+ }
1984
+ else {
1985
+ pending++;
1986
+ }
1987
+ }
1988
+ }
1989
+ // Compact WAL after recovery
1990
+ await this.wal.compact();
1991
+ const needsTreeReconcile = this.treeIndexer && (plan.pending.length > 0 ||
1992
+ plan.failed.length > 0 ||
1993
+ plan.incompleteBatch !== null);
1994
+ if (needsTreeReconcile && this.treeIndexer) {
1995
+ await this.treeIndexer.reconcileAfterWalRecovery(this.getTreeDeps());
1996
+ }
1997
+ return { pending, retried, failed, errors };
1998
+ }
1999
+ /**
2000
+ * Retry failed files from WAL.
2001
+ *
2002
+ * @param maxRetries - Max total retries per file
2003
+ * @returns Result with counts
2004
+ */
2005
+ async retryFailed(maxRetries = 3) {
2006
+ await this.initialize();
2007
+ const plan = await this.wal.recover();
2008
+ let retried = 0;
2009
+ let skipped = 0;
2010
+ const errors = [];
2011
+ for (const failedFile of plan.failed) {
2012
+ if (failedFile.retryCount >= maxRetries) {
2013
+ console.log(`[code-indexer] Skipping ${failedFile.path} (max retries: ${failedFile.retryCount})`);
2014
+ skipped++;
2015
+ continue;
2016
+ }
2017
+ console.log(`[code-indexer] Retrying: ${failedFile.path} (attempt ${failedFile.retryCount + 1})`);
2018
+ const result = await this.indexFile(failedFile.path);
2019
+ if (result.errors.length > 0) {
2020
+ errors.push(...result.errors);
2021
+ }
2022
+ else {
2023
+ retried++;
2024
+ }
2025
+ }
2026
+ // Compact WAL after retry
2027
+ await this.wal.compact();
2028
+ if (this.treeIndexer && retried > 0) {
2029
+ await this.treeIndexer.reconcileAfterWalRecovery(this.getTreeDeps());
2030
+ }
2031
+ return { retried, skipped, errors };
2032
+ }
2033
+ /**
2034
+ * Compact the WAL file.
2035
+ */
2036
+ async compactWal() {
2037
+ return this.wal.compact();
2038
+ }
2039
+ /**
2040
+ * Clear the WAL file.
2041
+ */
2042
+ async clearWal() {
2043
+ return this.wal.clear();
2044
+ }
2045
+ /**
2046
+ * Check if WAL exists.
2047
+ */
2048
+ walExists() {
2049
+ return this.wal.exists();
2050
+ }
2051
+ /**
2052
+ * Get the root path.
2053
+ */
2054
+ getRootPath() {
2055
+ return this.rootPath;
2056
+ }
2057
+ /**
2058
+ * Get the database path.
2059
+ */
2060
+ getDbPath() {
2061
+ return this.dbPath;
2062
+ }
2063
+ }
2064
+ // Export withIndexLock for testing
2065
+ export { withIndexLock };
2066
+ // Factory function
2067
+ export function createCodeIndexer(rootPath, dbPath, deps, options) {
2068
+ return new CodeIndexer(rootPath, dbPath, deps, options);
2069
+ }
2070
+ //# sourceMappingURL=index.js.map