rag-lite-ts 2.1.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (328) hide show
  1. package/README.md +88 -5
  2. package/dist/{cli → cjs/cli}/indexer.js +73 -15
  3. package/dist/cjs/cli/ui-server.d.ts +5 -0
  4. package/dist/cjs/cli/ui-server.js +152 -0
  5. package/dist/{cli.js → cjs/cli.js} +25 -6
  6. package/dist/{core → cjs/core}/binary-index-format.js +6 -3
  7. package/dist/{core → cjs/core}/db.d.ts +56 -0
  8. package/dist/{core → cjs/core}/db.js +105 -0
  9. package/dist/{core → cjs/core}/ingestion.js +3 -0
  10. package/dist/cjs/core/knowledge-base-manager.d.ts +109 -0
  11. package/dist/cjs/core/knowledge-base-manager.js +256 -0
  12. package/dist/{core → cjs/core}/model-validator.js +1 -1
  13. package/dist/{core → cjs/core}/search-pipeline.js +1 -1
  14. package/dist/{core → cjs/core}/search.js +1 -1
  15. package/dist/cjs/core/vector-index-messages.d.ts +52 -0
  16. package/dist/cjs/core/vector-index-messages.js +5 -0
  17. package/dist/cjs/core/vector-index-worker.d.ts +6 -0
  18. package/dist/cjs/core/vector-index-worker.js +304 -0
  19. package/dist/cjs/core/vector-index.d.ts +107 -0
  20. package/dist/cjs/core/vector-index.js +344 -0
  21. package/dist/{factories → cjs/factories}/ingestion-factory.js +3 -7
  22. package/dist/{factories → cjs/factories}/search-factory.js +11 -0
  23. package/dist/{index-manager.d.ts → cjs/index-manager.d.ts} +23 -3
  24. package/dist/{index-manager.js → cjs/index-manager.js} +84 -15
  25. package/dist/{index.d.ts → cjs/index.d.ts} +2 -1
  26. package/dist/{index.js → cjs/index.js} +3 -1
  27. package/dist/esm/api-errors.d.ts +90 -0
  28. package/dist/esm/api-errors.js +320 -0
  29. package/dist/esm/cli/indexer.d.ts +11 -0
  30. package/dist/esm/cli/indexer.js +529 -0
  31. package/dist/esm/cli/search.d.ts +7 -0
  32. package/dist/esm/cli/search.js +332 -0
  33. package/dist/esm/cli/ui-server.d.ts +5 -0
  34. package/dist/esm/cli/ui-server.js +152 -0
  35. package/dist/esm/cli.d.ts +3 -0
  36. package/dist/esm/cli.js +548 -0
  37. package/dist/esm/config.d.ts +51 -0
  38. package/dist/esm/config.js +79 -0
  39. package/dist/esm/core/abstract-embedder.d.ts +125 -0
  40. package/dist/esm/core/abstract-embedder.js +264 -0
  41. package/dist/esm/core/actionable-error-messages.d.ts +60 -0
  42. package/dist/esm/core/actionable-error-messages.js +397 -0
  43. package/dist/esm/core/adapters.d.ts +93 -0
  44. package/dist/esm/core/adapters.js +139 -0
  45. package/dist/esm/core/batch-processing-optimizer.d.ts +155 -0
  46. package/dist/esm/core/batch-processing-optimizer.js +536 -0
  47. package/dist/esm/core/binary-index-format.d.ts +78 -0
  48. package/dist/esm/core/binary-index-format.js +294 -0
  49. package/dist/esm/core/chunker.d.ts +119 -0
  50. package/dist/esm/core/chunker.js +73 -0
  51. package/dist/esm/core/cli-database-utils.d.ts +53 -0
  52. package/dist/esm/core/cli-database-utils.js +239 -0
  53. package/dist/esm/core/config.d.ts +102 -0
  54. package/dist/esm/core/config.js +247 -0
  55. package/dist/esm/core/content-errors.d.ts +111 -0
  56. package/dist/esm/core/content-errors.js +362 -0
  57. package/dist/esm/core/content-manager.d.ts +335 -0
  58. package/dist/esm/core/content-manager.js +1476 -0
  59. package/dist/esm/core/content-performance-optimizer.d.ts +150 -0
  60. package/dist/esm/core/content-performance-optimizer.js +516 -0
  61. package/dist/esm/core/content-resolver.d.ts +104 -0
  62. package/dist/esm/core/content-resolver.js +285 -0
  63. package/dist/esm/core/cross-modal-search.d.ts +164 -0
  64. package/dist/esm/core/cross-modal-search.js +342 -0
  65. package/dist/esm/core/database-connection-manager.d.ts +109 -0
  66. package/dist/esm/core/database-connection-manager.js +310 -0
  67. package/dist/esm/core/db.d.ts +269 -0
  68. package/dist/esm/core/db.js +1000 -0
  69. package/dist/esm/core/embedder-factory.d.ts +154 -0
  70. package/dist/esm/core/embedder-factory.js +311 -0
  71. package/dist/esm/core/error-handler.d.ts +112 -0
  72. package/dist/esm/core/error-handler.js +239 -0
  73. package/dist/esm/core/index.d.ts +59 -0
  74. package/dist/esm/core/index.js +69 -0
  75. package/dist/esm/core/ingestion.d.ts +202 -0
  76. package/dist/esm/core/ingestion.js +904 -0
  77. package/dist/esm/core/interfaces.d.ts +408 -0
  78. package/dist/esm/core/interfaces.js +106 -0
  79. package/dist/esm/core/knowledge-base-manager.d.ts +109 -0
  80. package/dist/esm/core/knowledge-base-manager.js +256 -0
  81. package/dist/esm/core/lazy-dependency-loader.d.ts +147 -0
  82. package/dist/esm/core/lazy-dependency-loader.js +435 -0
  83. package/dist/esm/core/mode-detection-service.d.ts +150 -0
  84. package/dist/esm/core/mode-detection-service.js +565 -0
  85. package/dist/esm/core/mode-model-validator.d.ts +92 -0
  86. package/dist/esm/core/mode-model-validator.js +203 -0
  87. package/dist/esm/core/model-registry.d.ts +116 -0
  88. package/dist/esm/core/model-registry.js +411 -0
  89. package/dist/esm/core/model-validator.d.ts +217 -0
  90. package/dist/esm/core/model-validator.js +782 -0
  91. package/dist/esm/core/path-manager.d.ts +47 -0
  92. package/dist/esm/core/path-manager.js +71 -0
  93. package/dist/esm/core/raglite-paths.d.ts +121 -0
  94. package/dist/esm/core/raglite-paths.js +145 -0
  95. package/dist/esm/core/reranking-config.d.ts +42 -0
  96. package/dist/esm/core/reranking-config.js +147 -0
  97. package/dist/esm/core/reranking-factory.d.ts +92 -0
  98. package/dist/esm/core/reranking-factory.js +410 -0
  99. package/dist/esm/core/reranking-strategies.d.ts +310 -0
  100. package/dist/esm/core/reranking-strategies.js +650 -0
  101. package/dist/esm/core/resource-cleanup.d.ts +163 -0
  102. package/dist/esm/core/resource-cleanup.js +371 -0
  103. package/dist/esm/core/resource-manager.d.ts +212 -0
  104. package/dist/esm/core/resource-manager.js +564 -0
  105. package/dist/esm/core/search-pipeline.d.ts +111 -0
  106. package/dist/esm/core/search-pipeline.js +287 -0
  107. package/dist/esm/core/search.d.ts +141 -0
  108. package/dist/esm/core/search.js +320 -0
  109. package/dist/esm/core/streaming-operations.d.ts +145 -0
  110. package/dist/esm/core/streaming-operations.js +409 -0
  111. package/dist/esm/core/types.d.ts +66 -0
  112. package/dist/esm/core/types.js +6 -0
  113. package/dist/esm/core/universal-embedder.d.ts +177 -0
  114. package/dist/esm/core/universal-embedder.js +139 -0
  115. package/dist/esm/core/validation-messages.d.ts +99 -0
  116. package/dist/esm/core/validation-messages.js +334 -0
  117. package/dist/esm/core/vector-index-messages.d.ts +52 -0
  118. package/dist/esm/core/vector-index-messages.js +5 -0
  119. package/dist/esm/core/vector-index-worker.d.ts +6 -0
  120. package/dist/esm/core/vector-index-worker.js +304 -0
  121. package/dist/esm/core/vector-index.d.ts +107 -0
  122. package/dist/esm/core/vector-index.js +344 -0
  123. package/dist/esm/dom-polyfills.d.ts +6 -0
  124. package/dist/esm/dom-polyfills.js +37 -0
  125. package/dist/esm/factories/index.d.ts +27 -0
  126. package/dist/esm/factories/index.js +29 -0
  127. package/dist/esm/factories/ingestion-factory.d.ts +200 -0
  128. package/dist/esm/factories/ingestion-factory.js +473 -0
  129. package/dist/esm/factories/search-factory.d.ts +154 -0
  130. package/dist/esm/factories/search-factory.js +355 -0
  131. package/dist/esm/file-processor.d.ts +147 -0
  132. package/dist/esm/file-processor.js +963 -0
  133. package/dist/esm/index-manager.d.ts +136 -0
  134. package/dist/esm/index-manager.js +667 -0
  135. package/dist/esm/index.d.ts +76 -0
  136. package/dist/esm/index.js +112 -0
  137. package/dist/esm/indexer.d.ts +7 -0
  138. package/dist/esm/indexer.js +54 -0
  139. package/dist/esm/ingestion.d.ts +63 -0
  140. package/dist/esm/ingestion.js +124 -0
  141. package/dist/esm/mcp-server.d.ts +46 -0
  142. package/dist/esm/mcp-server.js +1820 -0
  143. package/dist/esm/multimodal/clip-embedder.d.ts +327 -0
  144. package/dist/esm/multimodal/clip-embedder.js +996 -0
  145. package/dist/esm/multimodal/index.d.ts +6 -0
  146. package/dist/esm/multimodal/index.js +6 -0
  147. package/dist/esm/preprocess.d.ts +19 -0
  148. package/dist/esm/preprocess.js +203 -0
  149. package/dist/esm/preprocessors/index.d.ts +17 -0
  150. package/dist/esm/preprocessors/index.js +38 -0
  151. package/dist/esm/preprocessors/mdx.d.ts +25 -0
  152. package/dist/esm/preprocessors/mdx.js +101 -0
  153. package/dist/esm/preprocessors/mermaid.d.ts +68 -0
  154. package/dist/esm/preprocessors/mermaid.js +329 -0
  155. package/dist/esm/preprocessors/registry.d.ts +56 -0
  156. package/dist/esm/preprocessors/registry.js +179 -0
  157. package/dist/esm/run-error-recovery-tests.d.ts +7 -0
  158. package/dist/esm/run-error-recovery-tests.js +101 -0
  159. package/dist/esm/search-standalone.d.ts +7 -0
  160. package/dist/esm/search-standalone.js +117 -0
  161. package/dist/esm/search.d.ts +99 -0
  162. package/dist/esm/search.js +177 -0
  163. package/dist/esm/test-utils.d.ts +18 -0
  164. package/dist/esm/test-utils.js +27 -0
  165. package/dist/esm/text/chunker.d.ts +33 -0
  166. package/dist/esm/text/chunker.js +279 -0
  167. package/dist/esm/text/embedder.d.ts +111 -0
  168. package/dist/esm/text/embedder.js +386 -0
  169. package/dist/esm/text/index.d.ts +8 -0
  170. package/dist/esm/text/index.js +9 -0
  171. package/dist/esm/text/preprocessors/index.d.ts +17 -0
  172. package/dist/esm/text/preprocessors/index.js +38 -0
  173. package/dist/esm/text/preprocessors/mdx.d.ts +25 -0
  174. package/dist/esm/text/preprocessors/mdx.js +101 -0
  175. package/dist/esm/text/preprocessors/mermaid.d.ts +68 -0
  176. package/dist/esm/text/preprocessors/mermaid.js +330 -0
  177. package/dist/esm/text/preprocessors/registry.d.ts +56 -0
  178. package/dist/esm/text/preprocessors/registry.js +180 -0
  179. package/dist/esm/text/reranker.d.ts +49 -0
  180. package/dist/esm/text/reranker.js +274 -0
  181. package/dist/esm/text/sentence-transformer-embedder.d.ts +96 -0
  182. package/dist/esm/text/sentence-transformer-embedder.js +340 -0
  183. package/dist/esm/text/tokenizer.d.ts +22 -0
  184. package/dist/esm/text/tokenizer.js +64 -0
  185. package/dist/esm/types.d.ts +83 -0
  186. package/dist/esm/types.js +3 -0
  187. package/dist/esm/utils/vector-math.d.ts +31 -0
  188. package/dist/esm/utils/vector-math.js +70 -0
  189. package/package.json +39 -14
  190. package/dist/core/vector-index.d.ts +0 -72
  191. package/dist/core/vector-index.js +0 -331
  192. /package/dist/{api-errors.d.ts → cjs/api-errors.d.ts} +0 -0
  193. /package/dist/{api-errors.js → cjs/api-errors.js} +0 -0
  194. /package/dist/{cli → cjs/cli}/indexer.d.ts +0 -0
  195. /package/dist/{cli → cjs/cli}/search.d.ts +0 -0
  196. /package/dist/{cli → cjs/cli}/search.js +0 -0
  197. /package/dist/{cli.d.ts → cjs/cli.d.ts} +0 -0
  198. /package/dist/{config.d.ts → cjs/config.d.ts} +0 -0
  199. /package/dist/{config.js → cjs/config.js} +0 -0
  200. /package/dist/{core → cjs/core}/abstract-embedder.d.ts +0 -0
  201. /package/dist/{core → cjs/core}/abstract-embedder.js +0 -0
  202. /package/dist/{core → cjs/core}/actionable-error-messages.d.ts +0 -0
  203. /package/dist/{core → cjs/core}/actionable-error-messages.js +0 -0
  204. /package/dist/{core → cjs/core}/adapters.d.ts +0 -0
  205. /package/dist/{core → cjs/core}/adapters.js +0 -0
  206. /package/dist/{core → cjs/core}/batch-processing-optimizer.d.ts +0 -0
  207. /package/dist/{core → cjs/core}/batch-processing-optimizer.js +0 -0
  208. /package/dist/{core → cjs/core}/binary-index-format.d.ts +0 -0
  209. /package/dist/{core → cjs/core}/chunker.d.ts +0 -0
  210. /package/dist/{core → cjs/core}/chunker.js +0 -0
  211. /package/dist/{core → cjs/core}/cli-database-utils.d.ts +0 -0
  212. /package/dist/{core → cjs/core}/cli-database-utils.js +0 -0
  213. /package/dist/{core → cjs/core}/config.d.ts +0 -0
  214. /package/dist/{core → cjs/core}/config.js +0 -0
  215. /package/dist/{core → cjs/core}/content-errors.d.ts +0 -0
  216. /package/dist/{core → cjs/core}/content-errors.js +0 -0
  217. /package/dist/{core → cjs/core}/content-manager.d.ts +0 -0
  218. /package/dist/{core → cjs/core}/content-manager.js +0 -0
  219. /package/dist/{core → cjs/core}/content-performance-optimizer.d.ts +0 -0
  220. /package/dist/{core → cjs/core}/content-performance-optimizer.js +0 -0
  221. /package/dist/{core → cjs/core}/content-resolver.d.ts +0 -0
  222. /package/dist/{core → cjs/core}/content-resolver.js +0 -0
  223. /package/dist/{core → cjs/core}/cross-modal-search.d.ts +0 -0
  224. /package/dist/{core → cjs/core}/cross-modal-search.js +0 -0
  225. /package/dist/{core → cjs/core}/database-connection-manager.d.ts +0 -0
  226. /package/dist/{core → cjs/core}/database-connection-manager.js +0 -0
  227. /package/dist/{core → cjs/core}/embedder-factory.d.ts +0 -0
  228. /package/dist/{core → cjs/core}/embedder-factory.js +0 -0
  229. /package/dist/{core → cjs/core}/error-handler.d.ts +0 -0
  230. /package/dist/{core → cjs/core}/error-handler.js +0 -0
  231. /package/dist/{core → cjs/core}/index.d.ts +0 -0
  232. /package/dist/{core → cjs/core}/index.js +0 -0
  233. /package/dist/{core → cjs/core}/ingestion.d.ts +0 -0
  234. /package/dist/{core → cjs/core}/interfaces.d.ts +0 -0
  235. /package/dist/{core → cjs/core}/interfaces.js +0 -0
  236. /package/dist/{core → cjs/core}/lazy-dependency-loader.d.ts +0 -0
  237. /package/dist/{core → cjs/core}/lazy-dependency-loader.js +0 -0
  238. /package/dist/{core → cjs/core}/mode-detection-service.d.ts +0 -0
  239. /package/dist/{core → cjs/core}/mode-detection-service.js +0 -0
  240. /package/dist/{core → cjs/core}/mode-model-validator.d.ts +0 -0
  241. /package/dist/{core → cjs/core}/mode-model-validator.js +0 -0
  242. /package/dist/{core → cjs/core}/model-registry.d.ts +0 -0
  243. /package/dist/{core → cjs/core}/model-registry.js +0 -0
  244. /package/dist/{core → cjs/core}/model-validator.d.ts +0 -0
  245. /package/dist/{core → cjs/core}/path-manager.d.ts +0 -0
  246. /package/dist/{core → cjs/core}/path-manager.js +0 -0
  247. /package/dist/{core → cjs/core}/raglite-paths.d.ts +0 -0
  248. /package/dist/{core → cjs/core}/raglite-paths.js +0 -0
  249. /package/dist/{core → cjs/core}/reranking-config.d.ts +0 -0
  250. /package/dist/{core → cjs/core}/reranking-config.js +0 -0
  251. /package/dist/{core → cjs/core}/reranking-factory.d.ts +0 -0
  252. /package/dist/{core → cjs/core}/reranking-factory.js +0 -0
  253. /package/dist/{core → cjs/core}/reranking-strategies.d.ts +0 -0
  254. /package/dist/{core → cjs/core}/reranking-strategies.js +0 -0
  255. /package/dist/{core → cjs/core}/resource-cleanup.d.ts +0 -0
  256. /package/dist/{core → cjs/core}/resource-cleanup.js +0 -0
  257. /package/dist/{core → cjs/core}/resource-manager.d.ts +0 -0
  258. /package/dist/{core → cjs/core}/resource-manager.js +0 -0
  259. /package/dist/{core → cjs/core}/search-pipeline.d.ts +0 -0
  260. /package/dist/{core → cjs/core}/search.d.ts +0 -0
  261. /package/dist/{core → cjs/core}/streaming-operations.d.ts +0 -0
  262. /package/dist/{core → cjs/core}/streaming-operations.js +0 -0
  263. /package/dist/{core → cjs/core}/types.d.ts +0 -0
  264. /package/dist/{core → cjs/core}/types.js +0 -0
  265. /package/dist/{core → cjs/core}/universal-embedder.d.ts +0 -0
  266. /package/dist/{core → cjs/core}/universal-embedder.js +0 -0
  267. /package/dist/{core → cjs/core}/validation-messages.d.ts +0 -0
  268. /package/dist/{core → cjs/core}/validation-messages.js +0 -0
  269. /package/dist/{dom-polyfills.d.ts → cjs/dom-polyfills.d.ts} +0 -0
  270. /package/dist/{dom-polyfills.js → cjs/dom-polyfills.js} +0 -0
  271. /package/dist/{factories → cjs/factories}/index.d.ts +0 -0
  272. /package/dist/{factories → cjs/factories}/index.js +0 -0
  273. /package/dist/{factories → cjs/factories}/ingestion-factory.d.ts +0 -0
  274. /package/dist/{factories → cjs/factories}/search-factory.d.ts +0 -0
  275. /package/dist/{file-processor.d.ts → cjs/file-processor.d.ts} +0 -0
  276. /package/dist/{file-processor.js → cjs/file-processor.js} +0 -0
  277. /package/dist/{indexer.d.ts → cjs/indexer.d.ts} +0 -0
  278. /package/dist/{indexer.js → cjs/indexer.js} +0 -0
  279. /package/dist/{ingestion.d.ts → cjs/ingestion.d.ts} +0 -0
  280. /package/dist/{ingestion.js → cjs/ingestion.js} +0 -0
  281. /package/dist/{mcp-server.d.ts → cjs/mcp-server.d.ts} +0 -0
  282. /package/dist/{mcp-server.js → cjs/mcp-server.js} +0 -0
  283. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.d.ts +0 -0
  284. /package/dist/{multimodal → cjs/multimodal}/clip-embedder.js +0 -0
  285. /package/dist/{multimodal → cjs/multimodal}/index.d.ts +0 -0
  286. /package/dist/{multimodal → cjs/multimodal}/index.js +0 -0
  287. /package/dist/{preprocess.d.ts → cjs/preprocess.d.ts} +0 -0
  288. /package/dist/{preprocess.js → cjs/preprocess.js} +0 -0
  289. /package/dist/{preprocessors → cjs/preprocessors}/index.d.ts +0 -0
  290. /package/dist/{preprocessors → cjs/preprocessors}/index.js +0 -0
  291. /package/dist/{preprocessors → cjs/preprocessors}/mdx.d.ts +0 -0
  292. /package/dist/{preprocessors → cjs/preprocessors}/mdx.js +0 -0
  293. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.d.ts +0 -0
  294. /package/dist/{preprocessors → cjs/preprocessors}/mermaid.js +0 -0
  295. /package/dist/{preprocessors → cjs/preprocessors}/registry.d.ts +0 -0
  296. /package/dist/{preprocessors → cjs/preprocessors}/registry.js +0 -0
  297. /package/dist/{run-error-recovery-tests.d.ts → cjs/run-error-recovery-tests.d.ts} +0 -0
  298. /package/dist/{run-error-recovery-tests.js → cjs/run-error-recovery-tests.js} +0 -0
  299. /package/dist/{search-standalone.d.ts → cjs/search-standalone.d.ts} +0 -0
  300. /package/dist/{search-standalone.js → cjs/search-standalone.js} +0 -0
  301. /package/dist/{search.d.ts → cjs/search.d.ts} +0 -0
  302. /package/dist/{search.js → cjs/search.js} +0 -0
  303. /package/dist/{test-utils.d.ts → cjs/test-utils.d.ts} +0 -0
  304. /package/dist/{test-utils.js → cjs/test-utils.js} +0 -0
  305. /package/dist/{text → cjs/text}/chunker.d.ts +0 -0
  306. /package/dist/{text → cjs/text}/chunker.js +0 -0
  307. /package/dist/{text → cjs/text}/embedder.d.ts +0 -0
  308. /package/dist/{text → cjs/text}/embedder.js +0 -0
  309. /package/dist/{text → cjs/text}/index.d.ts +0 -0
  310. /package/dist/{text → cjs/text}/index.js +0 -0
  311. /package/dist/{text → cjs/text}/preprocessors/index.d.ts +0 -0
  312. /package/dist/{text → cjs/text}/preprocessors/index.js +0 -0
  313. /package/dist/{text → cjs/text}/preprocessors/mdx.d.ts +0 -0
  314. /package/dist/{text → cjs/text}/preprocessors/mdx.js +0 -0
  315. /package/dist/{text → cjs/text}/preprocessors/mermaid.d.ts +0 -0
  316. /package/dist/{text → cjs/text}/preprocessors/mermaid.js +0 -0
  317. /package/dist/{text → cjs/text}/preprocessors/registry.d.ts +0 -0
  318. /package/dist/{text → cjs/text}/preprocessors/registry.js +0 -0
  319. /package/dist/{text → cjs/text}/reranker.d.ts +0 -0
  320. /package/dist/{text → cjs/text}/reranker.js +0 -0
  321. /package/dist/{text → cjs/text}/sentence-transformer-embedder.d.ts +0 -0
  322. /package/dist/{text → cjs/text}/sentence-transformer-embedder.js +0 -0
  323. /package/dist/{text → cjs/text}/tokenizer.d.ts +0 -0
  324. /package/dist/{text → cjs/text}/tokenizer.js +0 -0
  325. /package/dist/{types.d.ts → cjs/types.d.ts} +0 -0
  326. /package/dist/{types.js → cjs/types.js} +0 -0
  327. /package/dist/{utils → cjs/utils}/vector-math.d.ts +0 -0
  328. /package/dist/{utils → cjs/utils}/vector-math.js +0 -0
@@ -0,0 +1,1000 @@
1
+ /**
2
+ * CORE MODULE — Shared between text-only (rag-lite-ts) and future multimodal (rag-lite-mm)
3
+ * Model-agnostic. No transformer or modality-specific logic.
4
+ */
5
+ import sqlite3 from 'sqlite3';
6
+ import { promisify } from 'util';
7
+ import { handleError, ErrorSeverity, createError } from './error-handler.js';
8
+ /**
9
+ * Opens a SQLite database connection with promisified methods
10
+ * @param dbPath - Path to the SQLite database file
11
+ * @returns Promise that resolves to a database connection object
12
+ */
13
+ export function openDatabase(dbPath) {
14
+ return new Promise((resolve, reject) => {
15
+ const db = new sqlite3.Database(dbPath, (err) => {
16
+ if (err) {
17
+ const errorMsg = `Failed to open database at ${dbPath}: ${err.message}`;
18
+ // Categorize database errors for better handling
19
+ if (err.message.includes('ENOENT')) {
20
+ handleError(createError.fileSystem(`Database file not found: ${dbPath}. It will be created automatically.`), 'Database Connection', { severity: ErrorSeverity.INFO });
21
+ }
22
+ else if (err.message.includes('EACCES') || err.message.includes('permission')) {
23
+ reject(createError.database(`Permission denied accessing database: ${dbPath}. Check file permissions.`));
24
+ return;
25
+ }
26
+ else if (err.message.includes('SQLITE_CORRUPT')) {
27
+ reject(createError.database(`Database file is corrupted: ${dbPath}. Try running 'raglite rebuild'.`));
28
+ return;
29
+ }
30
+ else {
31
+ reject(createError.database(errorMsg));
32
+ return;
33
+ }
34
+ }
35
+ // Enable foreign key constraints
36
+ db.run('PRAGMA foreign_keys = ON', (err) => {
37
+ if (err) {
38
+ reject(createError.database(`Failed to enable foreign keys: ${err.message}`));
39
+ return;
40
+ }
41
+ // Create promisified methods with proper context binding and error handling
42
+ const connection = {
43
+ db,
44
+ run: (sql, params) => {
45
+ return new Promise((resolve, reject) => {
46
+ db.run(sql, params || [], function (err) {
47
+ if (err) {
48
+ // Enhance SQLite error messages
49
+ const enhancedError = enhanceSQLiteError(err, sql);
50
+ reject(enhancedError);
51
+ }
52
+ else {
53
+ resolve(this);
54
+ }
55
+ });
56
+ });
57
+ },
58
+ get: promisify(db.get.bind(db)),
59
+ all: promisify(db.all.bind(db)),
60
+ close: promisify(db.close.bind(db))
61
+ };
62
+ resolve(connection);
63
+ });
64
+ });
65
+ });
66
+ }
67
+ /**
68
+ * Enhance SQLite error messages with more context
69
+ */
70
+ function enhanceSQLiteError(error, sql) {
71
+ let enhancedMessage = error.message;
72
+ if (error.message.includes('SQLITE_BUSY')) {
73
+ enhancedMessage = 'Database is locked by another process. Ensure no other RAG-lite instances are running.';
74
+ }
75
+ else if (error.message.includes('SQLITE_FULL')) {
76
+ enhancedMessage = 'Database disk is full. Free up disk space and try again.';
77
+ }
78
+ else if (error.message.includes('SQLITE_CORRUPT')) {
79
+ enhancedMessage = 'Database file is corrupted. Try running "raglite rebuild" to recreate it.';
80
+ }
81
+ else if (error.message.includes('UNIQUE constraint failed')) {
82
+ enhancedMessage = `Duplicate entry detected: ${error.message}. This item may already exist.`;
83
+ }
84
+ else if (error.message.includes('FOREIGN KEY constraint failed')) {
85
+ enhancedMessage = `Foreign key constraint violation: ${error.message}. Referenced record may not exist.`;
86
+ }
87
+ if (sql && sql.length < 200) {
88
+ enhancedMessage += `\nSQL: ${sql}`;
89
+ }
90
+ return new Error(enhancedMessage);
91
+ }
92
+ /**
93
+ * Initializes the database schema with all required tables and indexes
94
+ * Enhanced to support content types for multimodal use
95
+ * @param connection - Database connection object
96
+ */
97
+ export async function initializeSchema(connection) {
98
+ try {
99
+ // Create documents table with content type support and content_id reference
100
+ await connection.run(`
101
+ CREATE TABLE IF NOT EXISTS documents (
102
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
103
+ content_id TEXT, -- References content_metadata.id
104
+ source TEXT NOT NULL UNIQUE,
105
+ title TEXT NOT NULL,
106
+ content_type TEXT DEFAULT 'text',
107
+ metadata TEXT,
108
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
109
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
110
+ FOREIGN KEY (content_id) REFERENCES content_metadata(id)
111
+ )
112
+ `);
113
+ // Create chunks table with content type and metadata support
114
+ await connection.run(`
115
+ CREATE TABLE IF NOT EXISTS chunks (
116
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
117
+ embedding_id TEXT NOT NULL UNIQUE,
118
+ document_id INTEGER NOT NULL,
119
+ content TEXT NOT NULL,
120
+ content_type TEXT DEFAULT 'text',
121
+ chunk_index INTEGER NOT NULL,
122
+ metadata TEXT,
123
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
124
+ FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE CASCADE
125
+ )
126
+ `);
127
+ // Create content_metadata table for unified content system
128
+ await connection.run(`
129
+ CREATE TABLE IF NOT EXISTS content_metadata (
130
+ id TEXT PRIMARY KEY, -- Hash-based content ID
131
+ storage_type TEXT NOT NULL CHECK (storage_type IN ('filesystem', 'content_dir')),
132
+ original_path TEXT, -- Original file path (filesystem only)
133
+ content_path TEXT NOT NULL, -- Actual storage path
134
+ display_name TEXT NOT NULL, -- User-friendly name
135
+ content_type TEXT NOT NULL, -- MIME type
136
+ file_size INTEGER NOT NULL, -- Size in bytes
137
+ content_hash TEXT NOT NULL, -- SHA-256 hash
138
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP
139
+ )
140
+ `);
141
+ // Create storage_stats table for basic content directory tracking
142
+ await connection.run(`
143
+ CREATE TABLE IF NOT EXISTS storage_stats (
144
+ id INTEGER PRIMARY KEY CHECK (id = 1),
145
+ content_dir_files INTEGER DEFAULT 0,
146
+ content_dir_size INTEGER DEFAULT 0,
147
+ filesystem_refs INTEGER DEFAULT 0,
148
+ last_cleanup DATETIME,
149
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
150
+ )
151
+ `);
152
+ // Create system_info table for mode persistence and model tracking
153
+ await connection.run(`
154
+ CREATE TABLE IF NOT EXISTS system_info (
155
+ id INTEGER PRIMARY KEY CHECK (id = 1),
156
+
157
+ -- Core mode and model information
158
+ mode TEXT NOT NULL DEFAULT 'text' CHECK (mode IN ('text', 'multimodal')),
159
+ model_name TEXT NOT NULL DEFAULT 'sentence-transformers/all-MiniLM-L6-v2',
160
+ model_type TEXT NOT NULL DEFAULT 'sentence-transformer' CHECK (model_type IN ('sentence-transformer', 'clip')),
161
+ model_dimensions INTEGER NOT NULL DEFAULT 384,
162
+ model_version TEXT NOT NULL DEFAULT '',
163
+
164
+ -- Content type support (JSON array)
165
+ supported_content_types TEXT NOT NULL DEFAULT '["text"]',
166
+
167
+ -- Reranking configuration
168
+ reranking_strategy TEXT DEFAULT 'cross-encoder' CHECK (
169
+ reranking_strategy IN ('cross-encoder', 'text-derived', 'disabled')
170
+ ),
171
+ reranking_model TEXT,
172
+ reranking_config TEXT, -- JSON configuration for strategy-specific settings
173
+
174
+ -- Timestamps
175
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
176
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
177
+ )
178
+ `);
179
+ // Clean slate approach - no migration logic needed
180
+ // Users will perform fresh ingestion with the new architecture
181
+ // Create indexes for performance
182
+ await connection.run(`
183
+ CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks(document_id)
184
+ `);
185
+ await connection.run(`
186
+ CREATE INDEX IF NOT EXISTS idx_chunks_embedding_id ON chunks(embedding_id)
187
+ `);
188
+ await connection.run(`
189
+ CREATE INDEX IF NOT EXISTS idx_documents_source ON documents(source)
190
+ `);
191
+ await connection.run(`
192
+ CREATE INDEX IF NOT EXISTS idx_chunks_content_type ON chunks(content_type)
193
+ `);
194
+ await connection.run(`
195
+ CREATE INDEX IF NOT EXISTS idx_documents_content_type ON documents(content_type)
196
+ `);
197
+ await connection.run(`
198
+ CREATE INDEX IF NOT EXISTS idx_documents_content_id ON documents(content_id)
199
+ `);
200
+ // Create indexes for content metadata table for efficient lookup
201
+ await connection.run(`
202
+ CREATE INDEX IF NOT EXISTS idx_content_hash ON content_metadata(content_hash)
203
+ `);
204
+ await connection.run(`
205
+ CREATE INDEX IF NOT EXISTS idx_storage_type ON content_metadata(storage_type)
206
+ `);
207
+ console.log('Database schema initialized successfully');
208
+ }
209
+ catch (error) {
210
+ throw new Error(`Failed to initialize database schema: ${error instanceof Error ? error.message : 'Unknown error'}`);
211
+ }
212
+ }
213
+ /**
214
+ * Inserts a new document into the database with content type support
215
+ * @param connection - Database connection object
216
+ * @param source - Source path of the document
217
+ * @param title - Title of the document
218
+ * @param contentType - Type of content ('text', 'image', etc.)
219
+ * @param metadata - Optional metadata object
220
+ * @param contentId - Optional content ID referencing content_metadata table
221
+ * @returns Promise that resolves to the document ID
222
+ */
223
+ export async function insertDocument(connection, source, title, contentType = 'text', metadata, contentId) {
224
+ try {
225
+ // Validate content type
226
+ validateContentType(contentType);
227
+ const metadataJson = metadata ? JSON.stringify(metadata) : null;
228
+ const result = await connection.run('INSERT INTO documents (content_id, source, title, content_type, metadata) VALUES (?, ?, ?, ?, ?)', [contentId || null, source, title, contentType, metadataJson]);
229
+ if (typeof result.lastID !== 'number' || result.lastID <= 0) {
230
+ throw new Error('Failed to get document ID after insertion');
231
+ }
232
+ return result.lastID;
233
+ }
234
+ catch (error) {
235
+ if (error instanceof Error && error.message.includes('UNIQUE constraint failed')) {
236
+ throw new Error(`Document with source '${source}' already exists`);
237
+ }
238
+ throw new Error(`Failed to insert document: ${error instanceof Error ? error.message : 'Unknown error'}`);
239
+ }
240
+ }
241
+ /**
242
+ * Inserts or updates a chunk in the database with content type support (upsert operation)
243
+ * @param connection - Database connection object
244
+ * @param embeddingId - Unique embedding ID for the chunk
245
+ * @param documentId - ID of the parent document
246
+ * @param content - Content of the chunk (text, image path, etc.)
247
+ * @param chunkIndex - Index of the chunk within the document
248
+ * @param contentType - Type of content ('text', 'image', etc.)
249
+ * @param metadata - Optional metadata object
250
+ */
251
+ export async function insertChunk(connection, embeddingId, documentId, content, chunkIndex, contentType = 'text', metadata) {
252
+ try {
253
+ // Validate content type
254
+ validateContentType(contentType);
255
+ const metadataJson = metadata ? JSON.stringify(metadata) : null;
256
+ // Use INSERT OR REPLACE to handle duplicates gracefully
257
+ await connection.run('INSERT OR REPLACE INTO chunks (embedding_id, document_id, content, chunk_index, content_type, metadata) VALUES (?, ?, ?, ?, ?, ?)', [embeddingId, documentId, content, chunkIndex, contentType, metadataJson]);
258
+ }
259
+ catch (error) {
260
+ if (error instanceof Error && error.message.includes('FOREIGN KEY constraint failed')) {
261
+ throw new Error(`Document with ID ${documentId} does not exist`);
262
+ }
263
+ throw new Error(`Failed to insert/update chunk: ${error instanceof Error ? error.message : 'Unknown error'}`);
264
+ }
265
+ }
266
+ /**
267
+ * Inserts a new document or returns existing document ID if it already exists
268
+ * Enhanced with content type support
269
+ * @param connection - Database connection object
270
+ * @param source - Source path of the document
271
+ * @param title - Title of the document
272
+ * @param contentType - Type of content ('text', 'image', etc.)
273
+ * @param metadata - Optional metadata object
274
+ * @param contentId - Optional content ID referencing content_metadata table
275
+ * @returns Promise that resolves to the document ID
276
+ */
277
+ export async function upsertDocument(connection, source, title, contentType = 'text', metadata, contentId) {
278
+ try {
279
+ // Validate content type
280
+ validateContentType(contentType);
281
+ // First try to get existing document
282
+ const existing = await connection.get('SELECT id FROM documents WHERE source = ?', [source]);
283
+ if (existing) {
284
+ return existing.id;
285
+ }
286
+ // Insert new document if it doesn't exist
287
+ const metadataJson = metadata ? JSON.stringify(metadata) : null;
288
+ const result = await connection.run('INSERT INTO documents (content_id, source, title, content_type, metadata) VALUES (?, ?, ?, ?, ?)', [contentId || null, source, title, contentType, metadataJson]);
289
+ if (typeof result.lastID !== 'number' || result.lastID <= 0) {
290
+ throw new Error('Failed to get document ID after insertion');
291
+ }
292
+ return result.lastID;
293
+ }
294
+ catch (error) {
295
+ throw new Error(`Failed to upsert document: ${error instanceof Error ? error.message : 'Unknown error'}`);
296
+ }
297
+ }
298
+ /**
299
+ * Retrieves chunks by their embedding IDs with document metadata
300
+ * Enhanced to include content type information
301
+ * @param connection - Database connection object
302
+ * @param embeddingIds - Array of embedding IDs to retrieve
303
+ * @returns Promise that resolves to an array of chunk results with document metadata
304
+ */
305
+ export async function getChunksByEmbeddingIds(connection, embeddingIds) {
306
+ if (embeddingIds.length === 0) {
307
+ return [];
308
+ }
309
+ try {
310
+ const placeholders = embeddingIds.map(() => '?').join(',');
311
+ const sql = `
312
+ SELECT
313
+ c.id,
314
+ c.embedding_id,
315
+ c.document_id,
316
+ c.content,
317
+ c.content_type,
318
+ c.chunk_index,
319
+ c.metadata,
320
+ c.created_at,
321
+ d.source as document_source,
322
+ d.title as document_title,
323
+ d.content_type as document_content_type,
324
+ d.content_id as document_content_id
325
+ FROM chunks c
326
+ JOIN documents d ON c.document_id = d.id
327
+ WHERE c.embedding_id IN (${placeholders})
328
+ ORDER BY c.chunk_index
329
+ `;
330
+ const results = await connection.all(sql, embeddingIds);
331
+ // Parse metadata JSON strings back to objects
332
+ return results.map((row) => ({
333
+ ...row,
334
+ metadata: row.metadata ? JSON.parse(row.metadata) : undefined
335
+ }));
336
+ }
337
+ catch (error) {
338
+ throw new Error(`Failed to retrieve chunks: ${error instanceof Error ? error.message : 'Unknown error'}`);
339
+ }
340
+ }
341
+ /**
342
+ * Validates mode value against allowed enum values
343
+ */
344
+ function validateMode(mode) {
345
+ const validModes = ['text', 'multimodal'];
346
+ if (!validModes.includes(mode)) {
347
+ throw new Error(`Invalid mode '${mode}'. Must be one of: ${validModes.join(', ')}`);
348
+ }
349
+ }
350
+ /**
351
+ * Validates model type value against allowed enum values
352
+ */
353
+ function validateModelType(modelType) {
354
+ const validTypes = ['sentence-transformer', 'clip'];
355
+ if (!validTypes.includes(modelType)) {
356
+ throw new Error(`Invalid model type '${modelType}'. Must be one of: ${validTypes.join(', ')}`);
357
+ }
358
+ }
359
+ /**
360
+ * Validates reranking strategy value against allowed enum values
361
+ */
362
+ function validateRerankingStrategy(strategy) {
363
+ const validStrategies = ['cross-encoder', 'text-derived', 'metadata', 'hybrid', 'disabled'];
364
+ if (!validStrategies.includes(strategy)) {
365
+ throw new Error(`Invalid reranking strategy '${strategy}'. Must be one of: ${validStrategies.join(', ')}`);
366
+ }
367
+ }
368
+ /**
369
+ * Validates content type value against allowed types
370
+ */
371
+ function validateContentType(contentType) {
372
+ const validTypes = ['text', 'image', 'pdf', 'docx'];
373
+ if (!validTypes.includes(contentType)) {
374
+ throw new Error(`Invalid content type '${contentType}'. Must be one of: ${validTypes.join(', ')}`);
375
+ }
376
+ }
377
+ /**
378
+ * Gets the complete system information from system_info table
379
+ * @param connection - Database connection object
380
+ * @returns Promise that resolves to SystemInfo object or null if not set
381
+ */
382
+ export async function getSystemInfo(connection) {
383
+ try {
384
+ const result = await connection.get(`
385
+ SELECT
386
+ mode, model_name, model_type, model_dimensions, model_version,
387
+ supported_content_types, reranking_strategy, reranking_model,
388
+ reranking_config, created_at, updated_at
389
+ FROM system_info WHERE id = 1
390
+ `);
391
+ if (!result) {
392
+ return null;
393
+ }
394
+ // Parse JSON fields and convert to proper types
395
+ const supportedContentTypes = result.supported_content_types
396
+ ? JSON.parse(result.supported_content_types)
397
+ : ['text'];
398
+ const rerankingConfig = result.reranking_config
399
+ ? JSON.parse(result.reranking_config)
400
+ : undefined;
401
+ return {
402
+ mode: result.mode,
403
+ modelName: result.model_name,
404
+ modelType: result.model_type,
405
+ modelDimensions: result.model_dimensions,
406
+ modelVersion: result.model_version,
407
+ supportedContentTypes,
408
+ rerankingStrategy: result.reranking_strategy,
409
+ rerankingModel: result.reranking_model,
410
+ rerankingConfig,
411
+ createdAt: new Date(result.created_at),
412
+ updatedAt: new Date(result.updated_at)
413
+ };
414
+ }
415
+ catch (error) {
416
+ throw new Error(`Failed to get system info: ${error instanceof Error ? error.message : 'Unknown error'}`);
417
+ }
418
+ }
419
+ /**
420
+ * Sets the complete system information in system_info table
421
+ * @param connection - Database connection object
422
+ * @param systemInfo - SystemInfo object to store
423
+ */
424
+ export async function setSystemInfo(connection, systemInfo) {
425
+ try {
426
+ // Validate enum values if provided
427
+ if (systemInfo.mode) {
428
+ validateMode(systemInfo.mode);
429
+ }
430
+ if (systemInfo.modelType) {
431
+ validateModelType(systemInfo.modelType);
432
+ }
433
+ if (systemInfo.rerankingStrategy) {
434
+ validateRerankingStrategy(systemInfo.rerankingStrategy);
435
+ }
436
+ // Check if there's already a row
437
+ const existing = await connection.get('SELECT id FROM system_info WHERE id = 1');
438
+ // Prepare JSON fields
439
+ const supportedContentTypesJson = systemInfo.supportedContentTypes
440
+ ? JSON.stringify(systemInfo.supportedContentTypes)
441
+ : undefined;
442
+ const rerankingConfigJson = systemInfo.rerankingConfig
443
+ ? JSON.stringify(systemInfo.rerankingConfig)
444
+ : undefined;
445
+ if (existing) {
446
+ // Build dynamic UPDATE query based on provided fields
447
+ const updateFields = [];
448
+ const updateValues = [];
449
+ if (systemInfo.mode !== undefined) {
450
+ updateFields.push('mode = ?');
451
+ updateValues.push(systemInfo.mode);
452
+ }
453
+ if (systemInfo.modelName !== undefined) {
454
+ updateFields.push('model_name = ?');
455
+ updateValues.push(systemInfo.modelName);
456
+ }
457
+ if (systemInfo.modelType !== undefined) {
458
+ updateFields.push('model_type = ?');
459
+ updateValues.push(systemInfo.modelType);
460
+ }
461
+ if (systemInfo.modelDimensions !== undefined) {
462
+ updateFields.push('model_dimensions = ?');
463
+ updateValues.push(systemInfo.modelDimensions);
464
+ }
465
+ if (systemInfo.modelVersion !== undefined) {
466
+ updateFields.push('model_version = ?');
467
+ updateValues.push(systemInfo.modelVersion);
468
+ }
469
+ if (supportedContentTypesJson !== undefined) {
470
+ updateFields.push('supported_content_types = ?');
471
+ updateValues.push(supportedContentTypesJson);
472
+ }
473
+ if (systemInfo.rerankingStrategy !== undefined) {
474
+ updateFields.push('reranking_strategy = ?');
475
+ updateValues.push(systemInfo.rerankingStrategy);
476
+ }
477
+ if (systemInfo.rerankingModel !== undefined) {
478
+ updateFields.push('reranking_model = ?');
479
+ updateValues.push(systemInfo.rerankingModel);
480
+ }
481
+ if (rerankingConfigJson !== undefined) {
482
+ updateFields.push('reranking_config = ?');
483
+ updateValues.push(rerankingConfigJson);
484
+ }
485
+ // Always update the timestamp
486
+ updateFields.push('updated_at = CURRENT_TIMESTAMP');
487
+ updateValues.push(1); // Add WHERE clause parameter
488
+ if (updateFields.length > 1) { // More than just the timestamp
489
+ const sql = `UPDATE system_info SET ${updateFields.join(', ')} WHERE id = ?`;
490
+ await connection.run(sql, updateValues);
491
+ }
492
+ }
493
+ else {
494
+ // Insert new row with provided values and defaults
495
+ const insertSql = `
496
+ INSERT INTO system_info (
497
+ id, mode, model_name, model_type, model_dimensions, model_version,
498
+ supported_content_types, reranking_strategy, reranking_model, reranking_config,
499
+ created_at, updated_at
500
+ ) VALUES (1, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
501
+ `;
502
+ await connection.run(insertSql, [
503
+ systemInfo.mode || 'text',
504
+ systemInfo.modelName || 'sentence-transformers/all-MiniLM-L6-v2',
505
+ systemInfo.modelType || 'sentence-transformer',
506
+ systemInfo.modelDimensions || 384,
507
+ systemInfo.modelVersion || '',
508
+ supportedContentTypesJson || '["text"]',
509
+ systemInfo.rerankingStrategy || 'cross-encoder',
510
+ systemInfo.rerankingModel || null,
511
+ rerankingConfigJson || null
512
+ ]);
513
+ }
514
+ }
515
+ catch (error) {
516
+ throw new Error(`Failed to set system info: ${error instanceof Error ? error.message : 'Unknown error'}`);
517
+ }
518
+ }
519
+ // =============================================================================
520
+ // REMOVED IN v3.0.0: Legacy database functions
521
+ // =============================================================================
522
+ // The following functions have been removed. Use getSystemInfo() and setSystemInfo() instead:
523
+ //
524
+ // - getModelVersion() → Use: const systemInfo = await getSystemInfo(db); const version = systemInfo?.modelVersion;
525
+ // - setModelVersion() → Use: await setSystemInfo(db, { modelVersion: 'version' });
526
+ // - getStoredModelInfo() → Use: const systemInfo = await getSystemInfo(db); access systemInfo.modelName and systemInfo.modelDimensions
527
+ // - setStoredModelInfo() → Use: await setSystemInfo(db, { modelName: 'name', modelDimensions: 384 });
528
+ //
529
+ // Migration guide: See CHANGELOG.md for v3.0.0 breaking changes
530
+ /**
531
+ * Retrieves documents by content type
532
+ * @param connection - Database connection object
533
+ * @param contentType - Content type to filter by
534
+ * @returns Promise that resolves to an array of documents
535
+ */
536
+ export async function getDocumentsByContentType(connection, contentType) {
537
+ try {
538
+ validateContentType(contentType);
539
+ const results = await connection.all('SELECT id, source, title, content_type, metadata, created_at FROM documents WHERE content_type = ? ORDER BY created_at DESC', [contentType]);
540
+ // Parse metadata JSON strings back to objects
541
+ return results.map((row) => ({
542
+ ...row,
543
+ metadata: row.metadata ? JSON.parse(row.metadata) : undefined
544
+ }));
545
+ }
546
+ catch (error) {
547
+ throw new Error(`Failed to get documents by content type: ${error instanceof Error ? error.message : 'Unknown error'}`);
548
+ }
549
+ }
550
+ /**
551
+ * Retrieves chunks by content type
552
+ * @param connection - Database connection object
553
+ * @param contentType - Content type to filter by
554
+ * @returns Promise that resolves to an array of chunks with document metadata
555
+ */
556
+ export async function getChunksByContentType(connection, contentType) {
557
+ try {
558
+ validateContentType(contentType);
559
+ const sql = `
560
+ SELECT
561
+ c.id,
562
+ c.embedding_id,
563
+ c.document_id,
564
+ c.content,
565
+ c.content_type,
566
+ c.chunk_index,
567
+ c.metadata,
568
+ c.created_at,
569
+ d.source as document_source,
570
+ d.title as document_title,
571
+ d.content_type as document_content_type,
572
+ d.content_id as document_content_id
573
+ FROM chunks c
574
+ JOIN documents d ON c.document_id = d.id
575
+ WHERE c.content_type = ?
576
+ ORDER BY d.source, c.chunk_index
577
+ `;
578
+ const results = await connection.all(sql, [contentType]);
579
+ // Parse metadata JSON strings back to objects
580
+ return results.map((row) => ({
581
+ ...row,
582
+ metadata: row.metadata ? JSON.parse(row.metadata) : undefined
583
+ }));
584
+ }
585
+ catch (error) {
586
+ throw new Error(`Failed to get chunks by content type: ${error instanceof Error ? error.message : 'Unknown error'}`);
587
+ }
588
+ }
589
+ /**
590
+ * Gets content type statistics from the database
591
+ * @param connection - Database connection object
592
+ * @returns Promise that resolves to content type statistics
593
+ */
594
+ export async function getContentTypeStatistics(connection) {
595
+ try {
596
+ // Get document statistics
597
+ const docStats = await connection.all(`
598
+ SELECT content_type, COUNT(*) as count
599
+ FROM documents
600
+ GROUP BY content_type
601
+ `);
602
+ // Get chunk statistics
603
+ const chunkStats = await connection.all(`
604
+ SELECT content_type, COUNT(*) as count
605
+ FROM chunks
606
+ GROUP BY content_type
607
+ `);
608
+ // Get totals
609
+ const totalDocs = await connection.get('SELECT COUNT(*) as count FROM documents');
610
+ const totalChunks = await connection.get('SELECT COUNT(*) as count FROM chunks');
611
+ const documentStats = {};
612
+ const chunkStatsMap = {};
613
+ docStats.forEach((row) => {
614
+ documentStats[row.content_type] = row.count;
615
+ });
616
+ chunkStats.forEach((row) => {
617
+ chunkStatsMap[row.content_type] = row.count;
618
+ });
619
+ return {
620
+ documents: documentStats,
621
+ chunks: chunkStatsMap,
622
+ total: {
623
+ documents: totalDocs.count,
624
+ chunks: totalChunks.count
625
+ }
626
+ };
627
+ }
628
+ catch (error) {
629
+ throw new Error(`Failed to get content type statistics: ${error instanceof Error ? error.message : 'Unknown error'}`);
630
+ }
631
+ }
632
+ /**
633
+ * Updates document metadata
634
+ * @param connection - Database connection object
635
+ * @param documentId - ID of the document to update
636
+ * @param metadata - New metadata object
637
+ */
638
+ export async function updateDocumentMetadata(connection, documentId, metadata) {
639
+ try {
640
+ const metadataJson = JSON.stringify(metadata);
641
+ const result = await connection.run('UPDATE documents SET metadata = ?, updated_at = CURRENT_TIMESTAMP WHERE id = ?', [metadataJson, documentId]);
642
+ if (result.changes === 0) {
643
+ throw new Error(`Document with ID ${documentId} not found`);
644
+ }
645
+ }
646
+ catch (error) {
647
+ throw new Error(`Failed to update document metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
648
+ }
649
+ }
650
+ /**
651
+ * Updates chunk metadata
652
+ * @param connection - Database connection object
653
+ * @param chunkId - ID of the chunk to update
654
+ * @param metadata - New metadata object
655
+ */
656
+ export async function updateChunkMetadata(connection, chunkId, metadata) {
657
+ try {
658
+ const metadataJson = JSON.stringify(metadata);
659
+ const result = await connection.run('UPDATE chunks SET metadata = ? WHERE id = ?', [metadataJson, chunkId]);
660
+ if (result.changes === 0) {
661
+ throw new Error(`Chunk with ID ${chunkId} not found`);
662
+ }
663
+ }
664
+ catch (error) {
665
+ throw new Error(`Failed to update chunk metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
666
+ }
667
+ }
668
+ /**
669
+ * Inserts content metadata into the content_metadata table
670
+ * @param connection - Database connection object
671
+ * @param contentMetadata - Content metadata to insert
672
+ */
673
+ export async function insertContentMetadata(connection, contentMetadata) {
674
+ try {
675
+ await connection.run(`
676
+ INSERT INTO content_metadata (
677
+ id, storage_type, original_path, content_path, display_name,
678
+ content_type, file_size, content_hash
679
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
680
+ `, [
681
+ contentMetadata.id,
682
+ contentMetadata.storageType,
683
+ contentMetadata.originalPath || null,
684
+ contentMetadata.contentPath,
685
+ contentMetadata.displayName,
686
+ contentMetadata.contentType,
687
+ contentMetadata.fileSize,
688
+ contentMetadata.contentHash
689
+ ]);
690
+ }
691
+ catch (error) {
692
+ if (error instanceof Error && error.message.includes('UNIQUE constraint failed')) {
693
+ throw new Error(`Content with ID '${contentMetadata.id}' already exists`);
694
+ }
695
+ throw new Error(`Failed to insert content metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
696
+ }
697
+ }
698
+ /**
699
+ * Gets content metadata by content ID
700
+ * @param connection - Database connection object
701
+ * @param contentId - Content ID to retrieve
702
+ * @returns Promise that resolves to ContentMetadata or null if not found
703
+ */
704
+ export async function getContentMetadata(connection, contentId) {
705
+ try {
706
+ const result = await connection.get(`
707
+ SELECT id, storage_type, original_path, content_path, display_name,
708
+ content_type, file_size, content_hash, created_at
709
+ FROM content_metadata
710
+ WHERE id = ?
711
+ `, [contentId]);
712
+ if (!result) {
713
+ return null;
714
+ }
715
+ return {
716
+ id: result.id,
717
+ storageType: result.storage_type,
718
+ originalPath: result.original_path,
719
+ contentPath: result.content_path,
720
+ displayName: result.display_name,
721
+ contentType: result.content_type,
722
+ fileSize: result.file_size,
723
+ contentHash: result.content_hash,
724
+ createdAt: new Date(result.created_at)
725
+ };
726
+ }
727
+ catch (error) {
728
+ throw new Error(`Failed to get content metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
729
+ }
730
+ }
731
+ /**
732
+ * Gets content metadata by content hash (for deduplication)
733
+ * @param connection - Database connection object
734
+ * @param contentHash - Content hash to search for
735
+ * @returns Promise that resolves to ContentMetadata or null if not found
736
+ */
737
+ export async function getContentMetadataByHash(connection, contentHash) {
738
+ try {
739
+ const result = await connection.get(`
740
+ SELECT id, storage_type, original_path, content_path, display_name,
741
+ content_type, file_size, content_hash, created_at
742
+ FROM content_metadata
743
+ WHERE content_hash = ?
744
+ `, [contentHash]);
745
+ if (!result) {
746
+ return null;
747
+ }
748
+ return {
749
+ id: result.id,
750
+ storageType: result.storage_type,
751
+ originalPath: result.original_path,
752
+ contentPath: result.content_path,
753
+ displayName: result.display_name,
754
+ contentType: result.content_type,
755
+ fileSize: result.file_size,
756
+ contentHash: result.content_hash,
757
+ createdAt: new Date(result.created_at)
758
+ };
759
+ }
760
+ catch (error) {
761
+ throw new Error(`Failed to get content metadata by hash: ${error instanceof Error ? error.message : 'Unknown error'}`);
762
+ }
763
+ }
764
+ /**
765
+ * Gets all content metadata by storage type
766
+ * @param connection - Database connection object
767
+ * @param storageType - Storage type to filter by
768
+ * @returns Promise that resolves to array of ContentMetadata
769
+ */
770
+ export async function getContentMetadataByStorageType(connection, storageType) {
771
+ try {
772
+ const results = await connection.all(`
773
+ SELECT id, storage_type, original_path, content_path, display_name,
774
+ content_type, file_size, content_hash, created_at
775
+ FROM content_metadata
776
+ WHERE storage_type = ?
777
+ ORDER BY created_at DESC
778
+ `, [storageType]);
779
+ return results.map((result) => ({
780
+ id: result.id,
781
+ storageType: result.storage_type,
782
+ originalPath: result.original_path,
783
+ contentPath: result.content_path,
784
+ displayName: result.display_name,
785
+ contentType: result.content_type,
786
+ fileSize: result.file_size,
787
+ contentHash: result.content_hash,
788
+ createdAt: new Date(result.created_at)
789
+ }));
790
+ }
791
+ catch (error) {
792
+ throw new Error(`Failed to get content metadata by storage type: ${error instanceof Error ? error.message : 'Unknown error'}`);
793
+ }
794
+ }
795
+ /**
796
+ * Deletes content metadata by content ID
797
+ * @param connection - Database connection object
798
+ * @param contentId - Content ID to delete
799
+ * @returns Promise that resolves to true if deleted, false if not found
800
+ */
801
+ export async function deleteContentMetadata(connection, contentId) {
802
+ try {
803
+ const result = await connection.run('DELETE FROM content_metadata WHERE id = ?', [contentId]);
804
+ return result.changes > 0;
805
+ }
806
+ catch (error) {
807
+ throw new Error(`Failed to delete content metadata: ${error instanceof Error ? error.message : 'Unknown error'}`);
808
+ }
809
+ }
810
+ /**
811
+ * Gets storage statistics from storage_stats table
812
+ * @param connection - Database connection object
813
+ * @returns Promise that resolves to storage statistics
814
+ */
815
+ export async function getStorageStats(connection) {
816
+ try {
817
+ const result = await connection.get(`
818
+ SELECT content_dir_files, content_dir_size, filesystem_refs,
819
+ last_cleanup, updated_at
820
+ FROM storage_stats
821
+ WHERE id = 1
822
+ `);
823
+ if (!result) {
824
+ return null;
825
+ }
826
+ return {
827
+ contentDirFiles: result.content_dir_files,
828
+ contentDirSize: result.content_dir_size,
829
+ filesystemRefs: result.filesystem_refs,
830
+ lastCleanup: result.last_cleanup ? new Date(result.last_cleanup) : null,
831
+ updatedAt: new Date(result.updated_at)
832
+ };
833
+ }
834
+ catch (error) {
835
+ throw new Error(`Failed to get storage stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
836
+ }
837
+ }
838
+ /**
839
+ * Updates storage statistics in storage_stats table
840
+ * @param connection - Database connection object
841
+ * @param stats - Partial storage statistics to update
842
+ */
843
+ export async function updateStorageStats(connection, stats) {
844
+ try {
845
+ // Check if there's already a row
846
+ const existing = await connection.get('SELECT id FROM storage_stats WHERE id = 1');
847
+ if (existing) {
848
+ // Build dynamic UPDATE query based on provided fields
849
+ const updateFields = [];
850
+ const updateValues = [];
851
+ if (stats.contentDirFiles !== undefined) {
852
+ updateFields.push('content_dir_files = ?');
853
+ updateValues.push(stats.contentDirFiles);
854
+ }
855
+ if (stats.contentDirSize !== undefined) {
856
+ updateFields.push('content_dir_size = ?');
857
+ updateValues.push(stats.contentDirSize);
858
+ }
859
+ if (stats.filesystemRefs !== undefined) {
860
+ updateFields.push('filesystem_refs = ?');
861
+ updateValues.push(stats.filesystemRefs);
862
+ }
863
+ if (stats.lastCleanup !== undefined) {
864
+ updateFields.push('last_cleanup = ?');
865
+ updateValues.push(stats.lastCleanup.toISOString());
866
+ }
867
+ // Always update the timestamp
868
+ updateFields.push('updated_at = CURRENT_TIMESTAMP');
869
+ updateValues.push(1); // Add WHERE clause parameter
870
+ if (updateFields.length > 1) { // More than just the timestamp
871
+ const sql = `UPDATE storage_stats SET ${updateFields.join(', ')} WHERE id = ?`;
872
+ await connection.run(sql, updateValues);
873
+ }
874
+ }
875
+ else {
876
+ // Insert new row with provided values and defaults
877
+ const insertSql = `
878
+ INSERT INTO storage_stats (
879
+ id, content_dir_files, content_dir_size, filesystem_refs,
880
+ last_cleanup, updated_at
881
+ ) VALUES (1, ?, ?, ?, ?, CURRENT_TIMESTAMP)
882
+ `;
883
+ await connection.run(insertSql, [
884
+ stats.contentDirFiles || 0,
885
+ stats.contentDirSize || 0,
886
+ stats.filesystemRefs || 0,
887
+ stats.lastCleanup ? stats.lastCleanup.toISOString() : null
888
+ ]);
889
+ }
890
+ }
891
+ catch (error) {
892
+ throw new Error(`Failed to update storage stats: ${error instanceof Error ? error.message : 'Unknown error'}`);
893
+ }
894
+ }
895
+ /**
896
+ * Reset the database by deleting all data while keeping the schema intact.
897
+ * This is a safer alternative to file deletion that avoids file locking issues on Windows.
898
+ *
899
+ * This function:
900
+ * 1. Deletes all rows from chunks, documents, content_metadata tables
901
+ * 2. Optionally clears system_info (mode/model configuration)
902
+ * 3. Resets storage_stats counters
903
+ * 4. Optionally runs VACUUM to reclaim disk space
904
+ *
905
+ * @param connection - Database connection object
906
+ * @param options - Reset options
907
+ * @returns Promise resolving to reset result statistics
908
+ *
909
+ * @example
910
+ * ```typescript
911
+ * const db = await openDatabase('./db.sqlite');
912
+ * const result = await resetDatabase(db, { preserveSystemInfo: false });
913
+ * console.log(`Deleted ${result.documentsDeleted} documents and ${result.chunksDeleted} chunks`);
914
+ * ```
915
+ */
916
+ export async function resetDatabase(connection, options = {}) {
917
+ const startTime = Date.now();
918
+ const { preserveSystemInfo = false, runVacuum = true } = options;
919
+ try {
920
+ console.log('🔄 Starting database reset...');
921
+ // Get counts before deletion for reporting
922
+ const docCountResult = await connection.get('SELECT COUNT(*) as count FROM documents');
923
+ const chunkCountResult = await connection.get('SELECT COUNT(*) as count FROM chunks');
924
+ const contentMetadataCountResult = await connection.get('SELECT COUNT(*) as count FROM content_metadata');
925
+ const documentsDeleted = docCountResult?.count || 0;
926
+ const chunksDeleted = chunkCountResult?.count || 0;
927
+ const contentMetadataDeleted = contentMetadataCountResult?.count || 0;
928
+ // Delete in order respecting foreign key constraints
929
+ // chunks → documents → content_metadata (chunks reference documents, documents reference content_metadata)
930
+ console.log(' Deleting chunks...');
931
+ await connection.run('DELETE FROM chunks');
932
+ console.log(' Deleting documents...');
933
+ await connection.run('DELETE FROM documents');
934
+ console.log(' Deleting content_metadata...');
935
+ await connection.run('DELETE FROM content_metadata');
936
+ // Reset storage_stats counters
937
+ console.log(' Resetting storage_stats...');
938
+ await connection.run(`
939
+ UPDATE storage_stats SET
940
+ content_dir_files = 0,
941
+ content_dir_size = 0,
942
+ filesystem_refs = 0,
943
+ updated_at = CURRENT_TIMESTAMP
944
+ WHERE id = 1
945
+ `);
946
+ // Optionally clear system_info
947
+ let systemInfoCleared = false;
948
+ if (!preserveSystemInfo) {
949
+ console.log(' Clearing system_info...');
950
+ await connection.run('DELETE FROM system_info WHERE id = 1');
951
+ systemInfoCleared = true;
952
+ }
953
+ else {
954
+ console.log(' Preserving system_info (mode/model configuration)');
955
+ }
956
+ // Run VACUUM to reclaim disk space
957
+ if (runVacuum) {
958
+ console.log(' Running VACUUM to reclaim disk space...');
959
+ await connection.run('VACUUM');
960
+ }
961
+ const resetTimeMs = Date.now() - startTime;
962
+ console.log(`✓ Database reset complete in ${resetTimeMs}ms`);
963
+ console.log(` Documents deleted: ${documentsDeleted}`);
964
+ console.log(` Chunks deleted: ${chunksDeleted}`);
965
+ console.log(` Content metadata deleted: ${contentMetadataDeleted}`);
966
+ console.log(` System info cleared: ${systemInfoCleared}`);
967
+ return {
968
+ success: true,
969
+ documentsDeleted,
970
+ chunksDeleted,
971
+ contentMetadataDeleted,
972
+ systemInfoCleared,
973
+ resetTimeMs
974
+ };
975
+ }
976
+ catch (error) {
977
+ const resetTimeMs = Date.now() - startTime;
978
+ console.error(`❌ Database reset failed after ${resetTimeMs}ms:`, error);
979
+ throw new Error(`Failed to reset database: ${error instanceof Error ? error.message : 'Unknown error'}`);
980
+ }
981
+ }
982
+ /**
983
+ * Check if the database has any data (documents, chunks, or content)
984
+ * Useful for determining if a reset is needed
985
+ *
986
+ * @param connection - Database connection object
987
+ * @returns Promise resolving to true if database has data, false if empty
988
+ */
989
+ export async function hasDatabaseData(connection) {
990
+ try {
991
+ const docCount = await connection.get('SELECT COUNT(*) as count FROM documents');
992
+ const chunkCount = await connection.get('SELECT COUNT(*) as count FROM chunks');
993
+ return (docCount?.count || 0) > 0 || (chunkCount?.count || 0) > 0;
994
+ }
995
+ catch (error) {
996
+ // If tables don't exist, consider it empty
997
+ return false;
998
+ }
999
+ }
1000
+ //# sourceMappingURL=db.js.map