ocr-provenance-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocr-provenance-mcp might be problematic. Click here for more details.

Files changed (578) hide show
  1. package/.env.example +55 -0
  2. package/LICENSE +78 -0
  3. package/README.md +1154 -0
  4. package/dist/bin-http.d.ts +24 -0
  5. package/dist/bin-http.d.ts.map +1 -0
  6. package/dist/bin-http.js +275 -0
  7. package/dist/bin-http.js.map +1 -0
  8. package/dist/bin-setup.d.ts +11 -0
  9. package/dist/bin-setup.d.ts.map +1 -0
  10. package/dist/bin-setup.js +610 -0
  11. package/dist/bin-setup.js.map +1 -0
  12. package/dist/bin.d.ts +16 -0
  13. package/dist/bin.d.ts.map +1 -0
  14. package/dist/bin.js +16 -0
  15. package/dist/bin.js.map +1 -0
  16. package/dist/index.d.ts +13 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +90 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/models/chunk.d.ts +136 -0
  21. package/dist/models/chunk.d.ts.map +1 -0
  22. package/dist/models/chunk.js +27 -0
  23. package/dist/models/chunk.js.map +1 -0
  24. package/dist/models/cluster.d.ts +79 -0
  25. package/dist/models/cluster.d.ts.map +1 -0
  26. package/dist/models/cluster.js +10 -0
  27. package/dist/models/cluster.js.map +1 -0
  28. package/dist/models/comparison.d.ts +62 -0
  29. package/dist/models/comparison.d.ts.map +1 -0
  30. package/dist/models/comparison.js +8 -0
  31. package/dist/models/comparison.js.map +1 -0
  32. package/dist/models/document.d.ts +104 -0
  33. package/dist/models/document.d.ts.map +1 -0
  34. package/dist/models/document.js +15 -0
  35. package/dist/models/document.js.map +1 -0
  36. package/dist/models/embedding.d.ts +87 -0
  37. package/dist/models/embedding.d.ts.map +1 -0
  38. package/dist/models/embedding.js +23 -0
  39. package/dist/models/embedding.js.map +1 -0
  40. package/dist/models/extraction.d.ts +15 -0
  41. package/dist/models/extraction.d.ts.map +1 -0
  42. package/dist/models/extraction.js +2 -0
  43. package/dist/models/extraction.js.map +1 -0
  44. package/dist/models/form-fill.d.ts +23 -0
  45. package/dist/models/form-fill.d.ts.map +1 -0
  46. package/dist/models/form-fill.js +2 -0
  47. package/dist/models/form-fill.js.map +1 -0
  48. package/dist/models/image.d.ts +177 -0
  49. package/dist/models/image.d.ts.map +1 -0
  50. package/dist/models/image.js +8 -0
  51. package/dist/models/image.js.map +1 -0
  52. package/dist/models/index.d.ts +14 -0
  53. package/dist/models/index.d.ts.map +1 -0
  54. package/dist/models/index.js +22 -0
  55. package/dist/models/index.js.map +1 -0
  56. package/dist/models/provenance.d.ts +174 -0
  57. package/dist/models/provenance.d.ts.map +1 -0
  58. package/dist/models/provenance.js +53 -0
  59. package/dist/models/provenance.js.map +1 -0
  60. package/dist/models/uploaded-file.d.ts +20 -0
  61. package/dist/models/uploaded-file.d.ts.map +1 -0
  62. package/dist/models/uploaded-file.js +2 -0
  63. package/dist/models/uploaded-file.js.map +1 -0
  64. package/dist/server/errors.d.ts +93 -0
  65. package/dist/server/errors.d.ts.map +1 -0
  66. package/dist/server/errors.js +256 -0
  67. package/dist/server/errors.js.map +1 -0
  68. package/dist/server/events.d.ts +36 -0
  69. package/dist/server/events.d.ts.map +1 -0
  70. package/dist/server/events.js +48 -0
  71. package/dist/server/events.js.map +1 -0
  72. package/dist/server/permissions.d.ts +26 -0
  73. package/dist/server/permissions.d.ts.map +1 -0
  74. package/dist/server/permissions.js +194 -0
  75. package/dist/server/permissions.js.map +1 -0
  76. package/dist/server/register-tools.d.ts +25 -0
  77. package/dist/server/register-tools.d.ts.map +1 -0
  78. package/dist/server/register-tools.js +102 -0
  79. package/dist/server/register-tools.js.map +1 -0
  80. package/dist/server/startup.d.ts +16 -0
  81. package/dist/server/startup.d.ts.map +1 -0
  82. package/dist/server/startup.js +37 -0
  83. package/dist/server/startup.js.map +1 -0
  84. package/dist/server/state.d.ts +166 -0
  85. package/dist/server/state.d.ts.map +1 -0
  86. package/dist/server/state.js +424 -0
  87. package/dist/server/state.js.map +1 -0
  88. package/dist/server/transports/http-transport.d.ts +37 -0
  89. package/dist/server/transports/http-transport.d.ts.map +1 -0
  90. package/dist/server/transports/http-transport.js +204 -0
  91. package/dist/server/transports/http-transport.js.map +1 -0
  92. package/dist/server/transports/index.d.ts +9 -0
  93. package/dist/server/transports/index.d.ts.map +1 -0
  94. package/dist/server/transports/index.js +9 -0
  95. package/dist/server/transports/index.js.map +1 -0
  96. package/dist/server/transports/session-manager.d.ts +40 -0
  97. package/dist/server/transports/session-manager.d.ts.map +1 -0
  98. package/dist/server/transports/session-manager.js +74 -0
  99. package/dist/server/transports/session-manager.js.map +1 -0
  100. package/dist/server/types.d.ts +82 -0
  101. package/dist/server/types.d.ts.map +1 -0
  102. package/dist/server/types.js +14 -0
  103. package/dist/server/types.js.map +1 -0
  104. package/dist/services/audit.d.ts +26 -0
  105. package/dist/services/audit.d.ts.map +1 -0
  106. package/dist/services/audit.js +43 -0
  107. package/dist/services/audit.js.map +1 -0
  108. package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
  109. package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
  110. package/dist/services/chunking/chunk-deduplicator.js +46 -0
  111. package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
  112. package/dist/services/chunking/chunk-merger.d.ts +26 -0
  113. package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
  114. package/dist/services/chunking/chunk-merger.js +94 -0
  115. package/dist/services/chunking/chunk-merger.js.map +1 -0
  116. package/dist/services/chunking/chunker.d.ts +62 -0
  117. package/dist/services/chunking/chunker.d.ts.map +1 -0
  118. package/dist/services/chunking/chunker.js +566 -0
  119. package/dist/services/chunking/chunker.js.map +1 -0
  120. package/dist/services/chunking/heading-normalizer.d.ts +33 -0
  121. package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
  122. package/dist/services/chunking/heading-normalizer.js +101 -0
  123. package/dist/services/chunking/heading-normalizer.js.map +1 -0
  124. package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
  125. package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
  126. package/dist/services/chunking/json-block-analyzer.js +1033 -0
  127. package/dist/services/chunking/json-block-analyzer.js.map +1 -0
  128. package/dist/services/chunking/markdown-parser.d.ts +75 -0
  129. package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
  130. package/dist/services/chunking/markdown-parser.js +428 -0
  131. package/dist/services/chunking/markdown-parser.js.map +1 -0
  132. package/dist/services/chunking/text-normalizer.d.ts +20 -0
  133. package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
  134. package/dist/services/chunking/text-normalizer.js +36 -0
  135. package/dist/services/chunking/text-normalizer.js.map +1 -0
  136. package/dist/services/clm/contract-schemas.d.ts +36 -0
  137. package/dist/services/clm/contract-schemas.d.ts.map +1 -0
  138. package/dist/services/clm/contract-schemas.js +92 -0
  139. package/dist/services/clm/contract-schemas.js.map +1 -0
  140. package/dist/services/clm/summarization.d.ts +46 -0
  141. package/dist/services/clm/summarization.d.ts.map +1 -0
  142. package/dist/services/clm/summarization.js +61 -0
  143. package/dist/services/clm/summarization.js.map +1 -0
  144. package/dist/services/clustering/clustering-service.d.ts +58 -0
  145. package/dist/services/clustering/clustering-service.d.ts.map +1 -0
  146. package/dist/services/clustering/clustering-service.js +467 -0
  147. package/dist/services/clustering/clustering-service.js.map +1 -0
  148. package/dist/services/comparison/diff-service.d.ts +41 -0
  149. package/dist/services/comparison/diff-service.d.ts.map +1 -0
  150. package/dist/services/comparison/diff-service.js +120 -0
  151. package/dist/services/comparison/diff-service.js.map +1 -0
  152. package/dist/services/embedding/embedder.d.ts +55 -0
  153. package/dist/services/embedding/embedder.d.ts.map +1 -0
  154. package/dist/services/embedding/embedder.js +202 -0
  155. package/dist/services/embedding/embedder.js.map +1 -0
  156. package/dist/services/embedding/nomic.d.ts +67 -0
  157. package/dist/services/embedding/nomic.d.ts.map +1 -0
  158. package/dist/services/embedding/nomic.js +280 -0
  159. package/dist/services/embedding/nomic.js.map +1 -0
  160. package/dist/services/gemini/circuit-breaker.d.ts +106 -0
  161. package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
  162. package/dist/services/gemini/circuit-breaker.js +237 -0
  163. package/dist/services/gemini/circuit-breaker.js.map +1 -0
  164. package/dist/services/gemini/client.d.ts +173 -0
  165. package/dist/services/gemini/client.d.ts.map +1 -0
  166. package/dist/services/gemini/client.js +483 -0
  167. package/dist/services/gemini/client.js.map +1 -0
  168. package/dist/services/gemini/config.d.ts +116 -0
  169. package/dist/services/gemini/config.d.ts.map +1 -0
  170. package/dist/services/gemini/config.js +118 -0
  171. package/dist/services/gemini/config.js.map +1 -0
  172. package/dist/services/gemini/index.d.ts +9 -0
  173. package/dist/services/gemini/index.d.ts.map +1 -0
  174. package/dist/services/gemini/index.js +13 -0
  175. package/dist/services/gemini/index.js.map +1 -0
  176. package/dist/services/gemini/rate-limiter.d.ts +62 -0
  177. package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
  178. package/dist/services/gemini/rate-limiter.js +120 -0
  179. package/dist/services/gemini/rate-limiter.js.map +1 -0
  180. package/dist/services/images/extractor.d.ts +88 -0
  181. package/dist/services/images/extractor.d.ts.map +1 -0
  182. package/dist/services/images/extractor.js +340 -0
  183. package/dist/services/images/extractor.js.map +1 -0
  184. package/dist/services/images/optimizer.d.ts +130 -0
  185. package/dist/services/images/optimizer.d.ts.map +1 -0
  186. package/dist/services/images/optimizer.js +228 -0
  187. package/dist/services/images/optimizer.js.map +1 -0
  188. package/dist/services/ocr/datalab.d.ts +64 -0
  189. package/dist/services/ocr/datalab.d.ts.map +1 -0
  190. package/dist/services/ocr/datalab.js +425 -0
  191. package/dist/services/ocr/datalab.js.map +1 -0
  192. package/dist/services/ocr/errors.d.ts +38 -0
  193. package/dist/services/ocr/errors.d.ts.map +1 -0
  194. package/dist/services/ocr/errors.js +83 -0
  195. package/dist/services/ocr/errors.js.map +1 -0
  196. package/dist/services/ocr/file-manager.d.ts +76 -0
  197. package/dist/services/ocr/file-manager.d.ts.map +1 -0
  198. package/dist/services/ocr/file-manager.js +238 -0
  199. package/dist/services/ocr/file-manager.js.map +1 -0
  200. package/dist/services/ocr/form-fill.d.ts +48 -0
  201. package/dist/services/ocr/form-fill.d.ts.map +1 -0
  202. package/dist/services/ocr/form-fill.js +213 -0
  203. package/dist/services/ocr/form-fill.js.map +1 -0
  204. package/dist/services/ocr/processor.d.ts +95 -0
  205. package/dist/services/ocr/processor.d.ts.map +1 -0
  206. package/dist/services/ocr/processor.js +259 -0
  207. package/dist/services/ocr/processor.js.map +1 -0
  208. package/dist/services/provenance/agent-metadata.d.ts +82 -0
  209. package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
  210. package/dist/services/provenance/agent-metadata.js +106 -0
  211. package/dist/services/provenance/agent-metadata.js.map +1 -0
  212. package/dist/services/provenance/chain-hash.d.ts +57 -0
  213. package/dist/services/provenance/chain-hash.d.ts.map +1 -0
  214. package/dist/services/provenance/chain-hash.js +131 -0
  215. package/dist/services/provenance/chain-hash.js.map +1 -0
  216. package/dist/services/provenance/exporter.d.ts +202 -0
  217. package/dist/services/provenance/exporter.d.ts.map +1 -0
  218. package/dist/services/provenance/exporter.js +457 -0
  219. package/dist/services/provenance/exporter.js.map +1 -0
  220. package/dist/services/provenance/index.d.ts +15 -0
  221. package/dist/services/provenance/index.d.ts.map +1 -0
  222. package/dist/services/provenance/index.js +17 -0
  223. package/dist/services/provenance/index.js.map +1 -0
  224. package/dist/services/provenance/tracker.d.ts +138 -0
  225. package/dist/services/provenance/tracker.d.ts.map +1 -0
  226. package/dist/services/provenance/tracker.js +293 -0
  227. package/dist/services/provenance/tracker.js.map +1 -0
  228. package/dist/services/provenance/verifier.d.ts +153 -0
  229. package/dist/services/provenance/verifier.d.ts.map +1 -0
  230. package/dist/services/provenance/verifier.js +536 -0
  231. package/dist/services/provenance/verifier.js.map +1 -0
  232. package/dist/services/python-pool.d.ts +70 -0
  233. package/dist/services/python-pool.d.ts.map +1 -0
  234. package/dist/services/python-pool.js +265 -0
  235. package/dist/services/python-pool.js.map +1 -0
  236. package/dist/services/search/bm25.d.ts +180 -0
  237. package/dist/services/search/bm25.d.ts.map +1 -0
  238. package/dist/services/search/bm25.js +656 -0
  239. package/dist/services/search/bm25.js.map +1 -0
  240. package/dist/services/search/fusion.d.ts +103 -0
  241. package/dist/services/search/fusion.d.ts.map +1 -0
  242. package/dist/services/search/fusion.js +122 -0
  243. package/dist/services/search/fusion.js.map +1 -0
  244. package/dist/services/search/local-reranker.d.ts +30 -0
  245. package/dist/services/search/local-reranker.d.ts.map +1 -0
  246. package/dist/services/search/local-reranker.js +123 -0
  247. package/dist/services/search/local-reranker.js.map +1 -0
  248. package/dist/services/search/quality.d.ts +11 -0
  249. package/dist/services/search/quality.d.ts.map +1 -0
  250. package/dist/services/search/quality.js +17 -0
  251. package/dist/services/search/quality.js.map +1 -0
  252. package/dist/services/search/query-classifier.d.ts +34 -0
  253. package/dist/services/search/query-classifier.d.ts.map +1 -0
  254. package/dist/services/search/query-classifier.js +114 -0
  255. package/dist/services/search/query-classifier.js.map +1 -0
  256. package/dist/services/search/query-expander.d.ts +73 -0
  257. package/dist/services/search/query-expander.d.ts.map +1 -0
  258. package/dist/services/search/query-expander.js +281 -0
  259. package/dist/services/search/query-expander.js.map +1 -0
  260. package/dist/services/search/reranker.d.ts +44 -0
  261. package/dist/services/search/reranker.d.ts.map +1 -0
  262. package/dist/services/search/reranker.js +101 -0
  263. package/dist/services/search/reranker.js.map +1 -0
  264. package/dist/services/storage/database/annotation-operations.d.ts +113 -0
  265. package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
  266. package/dist/services/storage/database/annotation-operations.js +177 -0
  267. package/dist/services/storage/database/annotation-operations.js.map +1 -0
  268. package/dist/services/storage/database/approval-operations.d.ts +132 -0
  269. package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
  270. package/dist/services/storage/database/approval-operations.js +206 -0
  271. package/dist/services/storage/database/approval-operations.js.map +1 -0
  272. package/dist/services/storage/database/chunk-operations.d.ts +132 -0
  273. package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
  274. package/dist/services/storage/database/chunk-operations.js +306 -0
  275. package/dist/services/storage/database/chunk-operations.js.map +1 -0
  276. package/dist/services/storage/database/cluster-operations.d.ts +97 -0
  277. package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
  278. package/dist/services/storage/database/cluster-operations.js +258 -0
  279. package/dist/services/storage/database/cluster-operations.js.map +1 -0
  280. package/dist/services/storage/database/comparison-operations.d.ts +41 -0
  281. package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
  282. package/dist/services/storage/database/comparison-operations.js +65 -0
  283. package/dist/services/storage/database/comparison-operations.js.map +1 -0
  284. package/dist/services/storage/database/converters.d.ts +36 -0
  285. package/dist/services/storage/database/converters.d.ts.map +1 -0
  286. package/dist/services/storage/database/converters.js +244 -0
  287. package/dist/services/storage/database/converters.js.map +1 -0
  288. package/dist/services/storage/database/document-operations.d.ts +145 -0
  289. package/dist/services/storage/database/document-operations.d.ts.map +1 -0
  290. package/dist/services/storage/database/document-operations.js +498 -0
  291. package/dist/services/storage/database/document-operations.js.map +1 -0
  292. package/dist/services/storage/database/embedding-operations.d.ts +130 -0
  293. package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
  294. package/dist/services/storage/database/embedding-operations.js +315 -0
  295. package/dist/services/storage/database/embedding-operations.js.map +1 -0
  296. package/dist/services/storage/database/extraction-operations.d.ts +47 -0
  297. package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
  298. package/dist/services/storage/database/extraction-operations.js +85 -0
  299. package/dist/services/storage/database/extraction-operations.js.map +1 -0
  300. package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
  301. package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
  302. package/dist/services/storage/database/form-fill-operations.js +116 -0
  303. package/dist/services/storage/database/form-fill-operations.js.map +1 -0
  304. package/dist/services/storage/database/helpers.d.ts +29 -0
  305. package/dist/services/storage/database/helpers.d.ts.map +1 -0
  306. package/dist/services/storage/database/helpers.js +55 -0
  307. package/dist/services/storage/database/helpers.js.map +1 -0
  308. package/dist/services/storage/database/image-operations.d.ts +202 -0
  309. package/dist/services/storage/database/image-operations.d.ts.map +1 -0
  310. package/dist/services/storage/database/image-operations.js +484 -0
  311. package/dist/services/storage/database/image-operations.js.map +1 -0
  312. package/dist/services/storage/database/index.d.ts +13 -0
  313. package/dist/services/storage/database/index.d.ts.map +1 -0
  314. package/dist/services/storage/database/index.js +16 -0
  315. package/dist/services/storage/database/index.js.map +1 -0
  316. package/dist/services/storage/database/lock-operations.d.ts +59 -0
  317. package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
  318. package/dist/services/storage/database/lock-operations.js +89 -0
  319. package/dist/services/storage/database/lock-operations.js.map +1 -0
  320. package/dist/services/storage/database/obligation-operations.d.ts +88 -0
  321. package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
  322. package/dist/services/storage/database/obligation-operations.js +206 -0
  323. package/dist/services/storage/database/obligation-operations.js.map +1 -0
  324. package/dist/services/storage/database/ocr-operations.d.ts +33 -0
  325. package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
  326. package/dist/services/storage/database/ocr-operations.js +70 -0
  327. package/dist/services/storage/database/ocr-operations.js.map +1 -0
  328. package/dist/services/storage/database/playbook-operations.d.ts +72 -0
  329. package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
  330. package/dist/services/storage/database/playbook-operations.js +247 -0
  331. package/dist/services/storage/database/playbook-operations.js.map +1 -0
  332. package/dist/services/storage/database/provenance-operations.d.ts +112 -0
  333. package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
  334. package/dist/services/storage/database/provenance-operations.js +251 -0
  335. package/dist/services/storage/database/provenance-operations.js.map +1 -0
  336. package/dist/services/storage/database/service.d.ts +142 -0
  337. package/dist/services/storage/database/service.d.ts.map +1 -0
  338. package/dist/services/storage/database/service.js +310 -0
  339. package/dist/services/storage/database/service.js.map +1 -0
  340. package/dist/services/storage/database/static-operations.d.ts +30 -0
  341. package/dist/services/storage/database/static-operations.d.ts.map +1 -0
  342. package/dist/services/storage/database/static-operations.js +218 -0
  343. package/dist/services/storage/database/static-operations.js.map +1 -0
  344. package/dist/services/storage/database/stats-operations.d.ts +101 -0
  345. package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
  346. package/dist/services/storage/database/stats-operations.js +394 -0
  347. package/dist/services/storage/database/stats-operations.js.map +1 -0
  348. package/dist/services/storage/database/tag-operations.d.ts +76 -0
  349. package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
  350. package/dist/services/storage/database/tag-operations.js +178 -0
  351. package/dist/services/storage/database/tag-operations.js.map +1 -0
  352. package/dist/services/storage/database/types.d.ts +286 -0
  353. package/dist/services/storage/database/types.d.ts.map +1 -0
  354. package/dist/services/storage/database/types.js +39 -0
  355. package/dist/services/storage/database/types.js.map +1 -0
  356. package/dist/services/storage/database/upload-operations.d.ts +71 -0
  357. package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
  358. package/dist/services/storage/database/upload-operations.js +124 -0
  359. package/dist/services/storage/database/upload-operations.js.map +1 -0
  360. package/dist/services/storage/database/user-operations.d.ts +102 -0
  361. package/dist/services/storage/database/user-operations.d.ts.map +1 -0
  362. package/dist/services/storage/database/user-operations.js +151 -0
  363. package/dist/services/storage/database/user-operations.js.map +1 -0
  364. package/dist/services/storage/database/workflow-operations.d.ts +98 -0
  365. package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
  366. package/dist/services/storage/database/workflow-operations.js +157 -0
  367. package/dist/services/storage/database/workflow-operations.js.map +1 -0
  368. package/dist/services/storage/database.d.ts +16 -0
  369. package/dist/services/storage/database.d.ts.map +1 -0
  370. package/dist/services/storage/database.js +15 -0
  371. package/dist/services/storage/database.js.map +1 -0
  372. package/dist/services/storage/index.d.ts +10 -0
  373. package/dist/services/storage/index.d.ts.map +1 -0
  374. package/dist/services/storage/index.js +10 -0
  375. package/dist/services/storage/index.js.map +1 -0
  376. package/dist/services/storage/migrations/index.d.ts +16 -0
  377. package/dist/services/storage/migrations/index.d.ts.map +1 -0
  378. package/dist/services/storage/migrations/index.js +20 -0
  379. package/dist/services/storage/migrations/index.js.map +1 -0
  380. package/dist/services/storage/migrations/operations.d.ts +40 -0
  381. package/dist/services/storage/migrations/operations.d.ts.map +1 -0
  382. package/dist/services/storage/migrations/operations.js +2910 -0
  383. package/dist/services/storage/migrations/operations.js.map +1 -0
  384. package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
  385. package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
  386. package/dist/services/storage/migrations/schema-definitions.js +1006 -0
  387. package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
  388. package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
  389. package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
  390. package/dist/services/storage/migrations/schema-helpers.js +176 -0
  391. package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
  392. package/dist/services/storage/migrations/types.d.ts +15 -0
  393. package/dist/services/storage/migrations/types.d.ts.map +1 -0
  394. package/dist/services/storage/migrations/types.js +21 -0
  395. package/dist/services/storage/migrations/types.js.map +1 -0
  396. package/dist/services/storage/migrations/verification.d.ts +20 -0
  397. package/dist/services/storage/migrations/verification.d.ts.map +1 -0
  398. package/dist/services/storage/migrations/verification.js +78 -0
  399. package/dist/services/storage/migrations/verification.js.map +1 -0
  400. package/dist/services/storage/migrations.d.ts +16 -0
  401. package/dist/services/storage/migrations.d.ts.map +1 -0
  402. package/dist/services/storage/migrations.js +17 -0
  403. package/dist/services/storage/migrations.js.map +1 -0
  404. package/dist/services/storage/types.d.ts +12 -0
  405. package/dist/services/storage/types.d.ts.map +1 -0
  406. package/dist/services/storage/types.js +5 -0
  407. package/dist/services/storage/types.js.map +1 -0
  408. package/dist/services/storage/vector.d.ts +208 -0
  409. package/dist/services/storage/vector.d.ts.map +1 -0
  410. package/dist/services/storage/vector.js +526 -0
  411. package/dist/services/storage/vector.js.map +1 -0
  412. package/dist/services/vlm/pipeline.d.ts +194 -0
  413. package/dist/services/vlm/pipeline.d.ts.map +1 -0
  414. package/dist/services/vlm/pipeline.js +800 -0
  415. package/dist/services/vlm/pipeline.js.map +1 -0
  416. package/dist/services/vlm/prompts.d.ts +171 -0
  417. package/dist/services/vlm/prompts.d.ts.map +1 -0
  418. package/dist/services/vlm/prompts.js +229 -0
  419. package/dist/services/vlm/prompts.js.map +1 -0
  420. package/dist/services/vlm/service.d.ts +174 -0
  421. package/dist/services/vlm/service.d.ts.map +1 -0
  422. package/dist/services/vlm/service.js +256 -0
  423. package/dist/services/vlm/service.js.map +1 -0
  424. package/dist/services/webhook-delivery.d.ts +4 -0
  425. package/dist/services/webhook-delivery.d.ts.map +1 -0
  426. package/dist/services/webhook-delivery.js +140 -0
  427. package/dist/services/webhook-delivery.js.map +1 -0
  428. package/dist/tools/chunks.d.ts +19 -0
  429. package/dist/tools/chunks.d.ts.map +1 -0
  430. package/dist/tools/chunks.js +392 -0
  431. package/dist/tools/chunks.js.map +1 -0
  432. package/dist/tools/clm.d.ts +16 -0
  433. package/dist/tools/clm.d.ts.map +1 -0
  434. package/dist/tools/clm.js +668 -0
  435. package/dist/tools/clm.js.map +1 -0
  436. package/dist/tools/clustering.d.ts +13 -0
  437. package/dist/tools/clustering.d.ts.map +1 -0
  438. package/dist/tools/clustering.js +498 -0
  439. package/dist/tools/clustering.js.map +1 -0
  440. package/dist/tools/collaboration.d.ts +15 -0
  441. package/dist/tools/collaboration.d.ts.map +1 -0
  442. package/dist/tools/collaboration.js +516 -0
  443. package/dist/tools/collaboration.js.map +1 -0
  444. package/dist/tools/comparison.d.ts +13 -0
  445. package/dist/tools/comparison.d.ts.map +1 -0
  446. package/dist/tools/comparison.js +735 -0
  447. package/dist/tools/comparison.js.map +1 -0
  448. package/dist/tools/compliance.d.ts +15 -0
  449. package/dist/tools/compliance.d.ts.map +1 -0
  450. package/dist/tools/compliance.js +640 -0
  451. package/dist/tools/compliance.js.map +1 -0
  452. package/dist/tools/config.d.ts +19 -0
  453. package/dist/tools/config.d.ts.map +1 -0
  454. package/dist/tools/config.js +213 -0
  455. package/dist/tools/config.js.map +1 -0
  456. package/dist/tools/database.d.ts +62 -0
  457. package/dist/tools/database.d.ts.map +1 -0
  458. package/dist/tools/database.js +288 -0
  459. package/dist/tools/database.js.map +1 -0
  460. package/dist/tools/documents.d.ts +61 -0
  461. package/dist/tools/documents.d.ts.map +1 -0
  462. package/dist/tools/documents.js +1624 -0
  463. package/dist/tools/documents.js.map +1 -0
  464. package/dist/tools/embeddings.d.ts +14 -0
  465. package/dist/tools/embeddings.d.ts.map +1 -0
  466. package/dist/tools/embeddings.js +626 -0
  467. package/dist/tools/embeddings.js.map +1 -0
  468. package/dist/tools/evaluation.d.ts +25 -0
  469. package/dist/tools/evaluation.d.ts.map +1 -0
  470. package/dist/tools/evaluation.js +523 -0
  471. package/dist/tools/evaluation.js.map +1 -0
  472. package/dist/tools/events.d.ts +16 -0
  473. package/dist/tools/events.d.ts.map +1 -0
  474. package/dist/tools/events.js +493 -0
  475. package/dist/tools/events.js.map +1 -0
  476. package/dist/tools/extraction-structured.d.ts +13 -0
  477. package/dist/tools/extraction-structured.d.ts.map +1 -0
  478. package/dist/tools/extraction-structured.js +390 -0
  479. package/dist/tools/extraction-structured.js.map +1 -0
  480. package/dist/tools/extraction.d.ts +24 -0
  481. package/dist/tools/extraction.d.ts.map +1 -0
  482. package/dist/tools/extraction.js +424 -0
  483. package/dist/tools/extraction.js.map +1 -0
  484. package/dist/tools/file-management.d.ts +14 -0
  485. package/dist/tools/file-management.d.ts.map +1 -0
  486. package/dist/tools/file-management.js +523 -0
  487. package/dist/tools/file-management.js.map +1 -0
  488. package/dist/tools/form-fill.d.ts +13 -0
  489. package/dist/tools/form-fill.d.ts.map +1 -0
  490. package/dist/tools/form-fill.js +250 -0
  491. package/dist/tools/form-fill.js.map +1 -0
  492. package/dist/tools/health.d.ts +19 -0
  493. package/dist/tools/health.d.ts.map +1 -0
  494. package/dist/tools/health.js +229 -0
  495. package/dist/tools/health.js.map +1 -0
  496. package/dist/tools/images.d.ts +54 -0
  497. package/dist/tools/images.d.ts.map +1 -0
  498. package/dist/tools/images.js +787 -0
  499. package/dist/tools/images.js.map +1 -0
  500. package/dist/tools/ingestion.d.ts +94 -0
  501. package/dist/tools/ingestion.d.ts.map +1 -0
  502. package/dist/tools/ingestion.js +1659 -0
  503. package/dist/tools/ingestion.js.map +1 -0
  504. package/dist/tools/intelligence.d.ts +18 -0
  505. package/dist/tools/intelligence.d.ts.map +1 -0
  506. package/dist/tools/intelligence.js +1039 -0
  507. package/dist/tools/intelligence.js.map +1 -0
  508. package/dist/tools/provenance.d.ts +51 -0
  509. package/dist/tools/provenance.d.ts.map +1 -0
  510. package/dist/tools/provenance.js +691 -0
  511. package/dist/tools/provenance.js.map +1 -0
  512. package/dist/tools/reports.d.ts +41 -0
  513. package/dist/tools/reports.d.ts.map +1 -0
  514. package/dist/tools/reports.js +1394 -0
  515. package/dist/tools/reports.js.map +1 -0
  516. package/dist/tools/search.d.ts +35 -0
  517. package/dist/tools/search.d.ts.map +1 -0
  518. package/dist/tools/search.js +2528 -0
  519. package/dist/tools/search.js.map +1 -0
  520. package/dist/tools/shared.d.ts +52 -0
  521. package/dist/tools/shared.d.ts.map +1 -0
  522. package/dist/tools/shared.js +54 -0
  523. package/dist/tools/shared.js.map +1 -0
  524. package/dist/tools/tags.d.ts +15 -0
  525. package/dist/tools/tags.d.ts.map +1 -0
  526. package/dist/tools/tags.js +287 -0
  527. package/dist/tools/tags.js.map +1 -0
  528. package/dist/tools/timeline.d.ts +15 -0
  529. package/dist/tools/timeline.d.ts.map +1 -0
  530. package/dist/tools/timeline.js +14 -0
  531. package/dist/tools/timeline.js.map +1 -0
  532. package/dist/tools/users.d.ts +14 -0
  533. package/dist/tools/users.d.ts.map +1 -0
  534. package/dist/tools/users.js +257 -0
  535. package/dist/tools/users.js.map +1 -0
  536. package/dist/tools/vlm.d.ts +40 -0
  537. package/dist/tools/vlm.d.ts.map +1 -0
  538. package/dist/tools/vlm.js +475 -0
  539. package/dist/tools/vlm.js.map +1 -0
  540. package/dist/tools/workflow.d.ts +16 -0
  541. package/dist/tools/workflow.d.ts.map +1 -0
  542. package/dist/tools/workflow.js +495 -0
  543. package/dist/tools/workflow.js.map +1 -0
  544. package/dist/utils/backoff.d.ts +53 -0
  545. package/dist/utils/backoff.d.ts.map +1 -0
  546. package/dist/utils/backoff.js +78 -0
  547. package/dist/utils/backoff.js.map +1 -0
  548. package/dist/utils/config-persistence.d.ts +33 -0
  549. package/dist/utils/config-persistence.d.ts.map +1 -0
  550. package/dist/utils/config-persistence.js +61 -0
  551. package/dist/utils/config-persistence.js.map +1 -0
  552. package/dist/utils/hash.d.ts +65 -0
  553. package/dist/utils/hash.d.ts.map +1 -0
  554. package/dist/utils/hash.js +146 -0
  555. package/dist/utils/hash.js.map +1 -0
  556. package/dist/utils/math.d.ts +21 -0
  557. package/dist/utils/math.d.ts.map +1 -0
  558. package/dist/utils/math.js +39 -0
  559. package/dist/utils/math.js.map +1 -0
  560. package/dist/utils/validation.d.ts +697 -0
  561. package/dist/utils/validation.d.ts.map +1 -0
  562. package/dist/utils/validation.js +529 -0
  563. package/dist/utils/validation.js.map +1 -0
  564. package/package.json +96 -0
  565. package/python/.gitkeep +0 -0
  566. package/python/__init__.py +104 -0
  567. package/python/clustering_worker.py +440 -0
  568. package/python/docx_image_extractor.py +524 -0
  569. package/python/embedding_worker.py +552 -0
  570. package/python/file_manager_worker.py +564 -0
  571. package/python/form_fill_worker.py +399 -0
  572. package/python/gpu_utils.py +582 -0
  573. package/python/image_extractor.py +317 -0
  574. package/python/image_optimizer.py +444 -0
  575. package/python/ocr_worker.py +712 -0
  576. package/python/pyproject.toml +76 -0
  577. package/python/requirements.txt +51 -0
  578. package/python/reranker_worker.py +87 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"converters.js","sourceRoot":"","sources":["../../../../src/services/storage/database/converters.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAqBH,yDAAyD;AACzD,MAAM,uBAAuB,GAA8B,CAAC,SAAS,EAAE,YAAY,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;AAE3G,yDAAyD;AACzD,MAAM,sBAAsB,GAAsB;IAChD,UAAU,EAAE,YAAY,EAAE,OAAO,EAAE,OAAO,EAAE,iBAAiB;IAC7D,YAAY,EAAE,WAAW,EAAE,YAAY,EAAE,YAAY,EAAE,WAAW;CACnE,CAAC;AAEF,oDAAoD;AACpD,MAAM,kBAAkB,GAAyB,CAAC,SAAS,EAAE,YAAY,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;AAEjG;;;GAGG;AACH,SAAS,YAAY,CAAmB,KAAa,EAAE,WAAyB,EAAE,SAAiB,EAAE,EAAU;IAC7G,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,KAAU,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,KAAK,CAAC,WAAW,SAAS,KAAK,KAAK,eAAe,EAAE,mBAAmB,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC9G,CAAC;IACD,OAAO,KAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,SAAS,qBAAqB,CAAC,EAAU,EAAE,GAAW;IACpD,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAA4B,CAAC;IACpD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CACX,wDAAwD,EAAE,KAAK,GAAG,GAAG,EACrE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CACvD,CAAC;QACF,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC;IAC3C,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,EAAU,EAAE,GAAW;IAC5C,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAuB,CAAC;IAC/C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CACX,+CAA+C,EAAE,KAAK,GAAG,GAAG,EAC5D,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CACvD,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,sBAAsB,CAAC,EAAU,EAAE,GAAW;IACrD,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAsB,CAAC;IAC9C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CACX,qDAAqD,EAAE,KAAK,GAAG,GAAG,EAClE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CACvD,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,GAAgB;IAC5C,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,MAAM,EAAE,YAAY,CAAC,GAAG,CAAC,MAAM,EAAE,uBAAuB,EAAE,gBAAgB,EAAE,GAAG,CAAC,EAAE,CAAC;QACnF,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,SAAS,EAAE,GAAG,CAAC,SAAS,IAAI,IAAI;QAChC,UAAU,EAAE,GAAG,CAAC,UAAU,IAAI,IAAI;QAClC,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,IAAI;QACpC,eAAe,EAAE,GAAG,CAAC,eAAe,IAAI,IAAI;KAC7C,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,GAAiB;IAC9C,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,cAAc,EAAE,GAAG,CAAC,cAAc;QAClC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,kBAAkB,EAAE,GAAG,CAAC,kBAAkB;QAC1C,YAAY,EAAE,GAAG,CAAC,YAAgD;QAClE,mBAAmB,EAAE,GAAG,CAAC,mBAAmB;QAC5C,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,qBAAqB,EAAE,GAAG,CAAC,qBAAqB;QAChD,uBAAuB,EAAE,GAAG,CAAC,uBAAuB;QACpD,sBAAsB,EAAE,GAAG,CAAC,sBAAsB;QAClD,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,IAAI;QACpC,WAAW,EAAE,GAAG,CAAC,WAAW,IAAI,IAAI;KACrC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,GAAa;IACtC,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,eAAe,EAAE,GAAG,CAAC,eAAe;QACpC,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,gBAAgB,EAAE,GAAG,CAAC,gBAAqD;QAC3E,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,iBAAiB,EAAE,GAAG,CAAC,iBAAiB,IAAI,IAAI;QAChD,eAAe,EAAE,GAAG,CAAC,eAAe;QACpC,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,iBAAiB,EAAE,GAAG,CAAC,iBAAiB;KACzC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,GAAiB;IAC9C,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,QAAQ,EAAE,GAAG,CAAC,QAAQ;QACtB,QAAQ,EAAE,GAAG,CAAC,QAAQ;QACtB,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,oBAAoB,EAAE,GAAG,CAAC,oBAAoB;QAC9C,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,eAAe,EAAE,GAAG,CAAC,eAAe;QACpC,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,SAAS,EAAE,GAAG,CAAC,SAA+C;QAC9D,cAAc,EAAE,GAAG,CAAC,cAAyB;QAC7C,UAAU,EAAE,GAAG,CAAC,UAAU,IAAI,EAAE;QAChC,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,sBAAsB,EAAE,GAAG,CAAC,sBAAsB;KACnD,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,GAAkB;IAChD,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,IAAI,EAAE,YAAY,CAAC,GAAG,CAAC,IAAI,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,GAAG,CAAC,EAAE,CAAmB;QAChG,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,sBAAsB,EAAE,GAAG,CAAC,sBAAsB;QAClD,uBAAuB,EAAE,GAAG,CAAC,uBAAuB;QACpD,WAAW,EAAE,GAAG,CAAC,WAAyB;QAC1C,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,QAAQ,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,aAAa,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI;QACnE,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,iBAAiB,EAAE,GAAG,CAAC,iBAAiB;QACxC,iBAAiB,EAAE,qBAAqB,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,iBAAiB,CAAC;QACvE,sBAAsB,EAAE,GAAG,CAAC,sBAAsB;QAClD,wBAAwB,EAAE,GAAG,CAAC,wBAAwB;QACtD,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,UAAU,EAAE,GAAG,CAAC,UAAU;KAC3B,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,GAAa;IACtC,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,YAAY,EAAE;YACZ,CAAC,EAAE,GAAG,CAAC,MAAM;YACb,CAAC,EAAE,GAAG,CAAC,MAAM;YACb,KAAK,EAAE,GAAG,CAAC,UAAU;YACrB,MAAM,EAAE,GAAG,CAAC,WAAW;SACxB;QACD,WAAW,EAAE,GAAG,CAAC,WAAW;QAC5B,MAAM,EAAE,GAAG,CAAC,MAAM;QAClB,UAAU,EAAE;YACV,KAAK,EAAE,GAAG,CAAC,KAAK;YAChB,MAAM,EAAE,GAAG,CAAC,MAAM;SACnB;QACD,cAAc,EAAE,GAAG,CAAC,cAAc;QAClC,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,UAAU,EAAE,YAAY,CAAC,GAAG,CAAC,UAAU,EAAE,kBAAkB,EAAE,WAAW,EAAE,GAAG,CAAC,EAAE,CAAC;QACjF,eAAe,EAAE,GAAG,CAAC,eAAe;QACpC,mBAAmB,EAAE,GAAG,CAAC,mBAAmB;YAC1C,CAAC,CAAC,sBAAsB,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,mBAAmB,CAAC;YACzD,CAAC,CAAC,IAAI;QACR,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,cAAc,EAAE,GAAG,CAAC,cAAc;QAClC,gBAAgB,EAAE,GAAG,CAAC,gBAAgB;QACtC,eAAe,EAAE,GAAG,CAAC,eAAe;QACpC,YAAY,EAAE,GAAG,CAAC,YAAY;QAC9B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,aAAa,EAAE,GAAG,CAAC,aAAa;QAChC,UAAU,EAAE,GAAG,CAAC,UAAU,IAAI,IAAI;QAClC,gBAAgB,EAAE,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC;QAC/C,YAAY,EAAE,GAAG,CAAC,YAAY,IAAI,IAAI;KACvC,CAAC;AACJ,CAAC"}
@@ -0,0 +1,145 @@
1
+ /**
2
+ * Document operations for DatabaseService
3
+ *
4
+ * Handles all CRUD operations for documents including
5
+ * insert, get, list, update, and delete with cascade.
6
+ */
7
+ import Database from 'better-sqlite3';
8
+ import { Document, DocumentStatus } from '../../../models/document.js';
9
+ import { ListDocumentsOptions } from './types.js';
10
+ /**
11
+ * Encode a cursor from a (created_at, id) tuple.
12
+ * Uses base64url encoding for URL-safe transport.
13
+ *
14
+ * @param createdAt - ISO 8601 timestamp
15
+ * @param id - Document UUID
16
+ * @returns Base64url-encoded cursor string
17
+ */
18
+ export declare function encodeCursor(createdAt: string, id: string): string;
19
+ /**
20
+ * Decode a cursor back to a (created_at, id) tuple.
21
+ *
22
+ * @param cursor - Base64url-encoded cursor string
23
+ * @returns Decoded cursor with created_at and id
24
+ * @throws Error if cursor is invalid or malformed
25
+ */
26
+ export declare function decodeCursor(cursor: string): {
27
+ created_at: string;
28
+ id: string;
29
+ };
30
+ /**
31
+ * Insert a new document
32
+ *
33
+ * @param db - Database connection
34
+ * @param doc - Document data (created_at will be generated)
35
+ * @param updateMetadataCounts - Callback to update metadata counts
36
+ * @returns string - The document ID
37
+ */
38
+ export declare function insertDocument(db: Database.Database, doc: Omit<Document, 'created_at'>, updateMetadataCounts: () => void): string;
39
+ /**
40
+ * Get a document by ID
41
+ *
42
+ * @param db - Database connection
43
+ * @param id - Document ID
44
+ * @returns Document | null - The document or null if not found
45
+ */
46
+ export declare function getDocument(db: Database.Database, id: string): Document | null;
47
+ /**
48
+ * Get a document by file path
49
+ *
50
+ * @param db - Database connection
51
+ * @param filePath - Full file path
52
+ * @returns Document | null - The document or null if not found
53
+ */
54
+ export declare function getDocumentByPath(db: Database.Database, filePath: string): Document | null;
55
+ /**
56
+ * Get a document by file hash
57
+ *
58
+ * @param db - Database connection
59
+ * @param fileHash - SHA-256 file hash
60
+ * @returns Document | null - The document or null if not found
61
+ */
62
+ export declare function getDocumentByHash(db: Database.Database, fileHash: string): Document | null;
63
+ /**
64
+ * Result from listDocuments when cursor-based pagination is used
65
+ */
66
+ export interface ListDocumentsResult {
67
+ documents: Document[];
68
+ /** Cursor for the next page (null if no more results) */
69
+ next_cursor: string | null;
70
+ }
71
+ /**
72
+ * List documents with optional filtering.
73
+ *
74
+ * Supports both offset-based and cursor-based pagination:
75
+ * - When `cursor` is provided, uses keyset pagination (WHERE created_at < cursor.created_at
76
+ * OR (created_at = cursor.created_at AND id < cursor.id)) and ignores offset.
77
+ * - When `cursor` is absent, uses traditional LIMIT/OFFSET.
78
+ *
79
+ * @param db - Database connection
80
+ * @param options - Optional filter options (status, limit, offset, cursor)
81
+ * @returns Document[] - Array of documents (backward-compatible)
82
+ */
83
+ export declare function listDocuments(db: Database.Database, options?: ListDocumentsOptions): Document[];
84
+ /**
85
+ * List documents with cursor-based pagination support.
86
+ *
87
+ * Returns both the documents and a next_cursor for fetching the next page.
88
+ *
89
+ * @param db - Database connection
90
+ * @param options - Optional filter options (status, limit, offset, cursor)
91
+ * @returns ListDocumentsResult with documents and next_cursor
92
+ */
93
+ export declare function listDocumentsWithCursor(db: Database.Database, options?: ListDocumentsOptions): ListDocumentsResult;
94
+ /**
95
+ * Update document status
96
+ *
97
+ * @param db - Database connection
98
+ * @param id - Document ID
99
+ * @param status - New status
100
+ * @param errorMessage - Optional error message (for 'failed' status)
101
+ * @param updateMetadataModified - Callback to update metadata modified timestamp
102
+ */
103
+ export declare function updateDocumentStatus(db: Database.Database, id: string, status: DocumentStatus, errorMessage: string | undefined, updateMetadataModified: () => void): void;
104
+ /**
105
+ * Update document when OCR completes
106
+ *
107
+ * @param db - Database connection
108
+ * @param id - Document ID
109
+ * @param pageCount - Number of pages processed
110
+ * @param ocrCompletedAt - ISO 8601 completion timestamp
111
+ * @param updateMetadataModified - Callback to update metadata modified timestamp
112
+ */
113
+ export declare function updateDocumentOCRComplete(db: Database.Database, id: string, pageCount: number, ocrCompletedAt: string, updateMetadataModified: () => void): void;
114
+ /**
115
+ * Update document metadata (title, author, subject) from OCR extraction
116
+ *
117
+ * @param db - Database connection
118
+ * @param id - Document ID
119
+ * @param metadata - Metadata fields to update (null values are ignored via COALESCE)
120
+ * @param updateMetadataModified - Callback to update metadata modified timestamp
121
+ */
122
+ export declare function updateDocumentMetadata(db: Database.Database, id: string, metadata: {
123
+ docTitle?: string | null;
124
+ docAuthor?: string | null;
125
+ docSubject?: string | null;
126
+ }, updateMetadataModified: () => void): void;
127
+ /**
128
+ * Delete a document and all related data (CASCADE DELETE)
129
+ *
130
+ * @param db - Database connection
131
+ * @param id - Document ID to delete
132
+ * @param updateMetadataCounts - Callback to update metadata counts
133
+ */
134
+ export declare function deleteDocument(db: Database.Database, id: string, updateMetadataCounts: () => void): void;
135
+ /**
136
+ * Clean all derived data for a document, keeping the document record and its DOCUMENT-level provenance.
137
+ *
138
+ * Deletes: vec_embeddings, embeddings, images, chunks, ocr_results, and non-root provenance records.
139
+ * This is used by retry_failed to reset a document to a clean "pending" state.
140
+ *
141
+ * @param db - Database connection
142
+ * @param documentId - Document ID to clean
143
+ */
144
+ export declare function cleanDocumentDerivedData(db: Database.Database, documentId: string): void;
145
+ //# sourceMappingURL=document-operations.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document-operations.d.ts","sourceRoot":"","sources":["../../../../src/services/storage/database/document-operations.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AAEtC,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AACvE,OAAO,EAAiD,oBAAoB,EAAE,MAAM,YAAY,CAAC;AASjG;;;;;;;GAOG;AACH,wBAAgB,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,GAAG,MAAM,CAElE;AAED;;;;;;GAMG;AACH,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG;IAAE,UAAU,EAAE,MAAM,CAAC;IAAC,EAAE,EAAE,MAAM,CAAA;CAAE,CAU/E;AAED;;;;;;;GAOG;AACH,wBAAgB,cAAc,CAC5B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,GAAG,EAAE,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,EACjC,oBAAoB,EAAE,MAAM,IAAI,GAC/B,MAAM,CAiCR;AAED;;;;;;GAMG;AACH,wBAAgB,WAAW,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,EAAE,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAI9E;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAI1F;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,GAAG,QAAQ,GAAG,IAAI,CAI1F;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,yDAAyD;IACzD,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;CAC5B;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,aAAa,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,oBAAoB,GAAG,QAAQ,EAAE,CAG/F;AAED;;;;;;;;GAQG;AACH,wBAAgB,uBAAuB,CACrC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,OAAO,CAAC,EAAE,oBAAoB,GAC7B,mBAAmB,CA6CrB;AAED;;;;;;;;GAQG;AACH,wBAAgB,oBAAoB,CAClC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,EAAE,EAAE,MAAM,EACV,MAAM,EAAE,cAAc,EACtB,YAAY,EAAE,MAAM,GAAG,SAAS,EAChC,sBAAsB,EAAE,MAAM,IAAI,GACjC,IAAI,CAgBN;AAED;;;;;;;;GAQG;AACH,wBAAgB,yBAAyB,CACvC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,EAAE,EAAE,MAAM,EACV,SAAS,EAAE,MAAM,EACjB,cAAc,EAAE,MAAM,EACtB,sBAAsB,EAAE,MAAM,IAAI,GACjC,IAAI,CAgBN;AAED;;;;;;;GAOG;AACH,wBAAgB,sBAAsB,CACpC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,EAAE,EAAE,MAAM,EACV,QAAQ,EAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,EAC7F,sBAAsB,EAAE,MAAM,IAAI,GACjC,IAAI,CAkBN;AAmPD;;;;;;GAMG;AACH,wBAAgB,cAAc,CAC5B,EAAE,EAAE,QAAQ,CAAC,QAAQ,EACrB,EAAE,EAAE,MAAM,EACV,oBAAoB,EAAE,MAAM,IAAI,GAC/B,IAAI,CA4DN;AAED;;;;;;;;GAQG;AACH,wBAAgB,wBAAwB,CAAC,EAAE,EAAE,QAAQ,CAAC,QAAQ,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI,CAuCxF"}
@@ -0,0 +1,498 @@
1
+ /**
2
+ * Document operations for DatabaseService
3
+ *
4
+ * Handles all CRUD operations for documents including
5
+ * insert, get, list, update, and delete with cascade.
6
+ */
7
+ import { v4 as uuidv4 } from 'uuid';
8
+ import { DatabaseError, DatabaseErrorCode } from './types.js';
9
+ import { runWithForeignKeyCheck } from './helpers.js';
10
+ import { rowToDocument } from './converters.js';
11
+ import { computeHash } from '../../../utils/hash.js';
12
+ // ═══════════════════════════════════════════════════════════════════════════════
13
+ // CURSOR-BASED PAGINATION HELPERS
14
+ // ═══════════════════════════════════════════════════════════════════════════════
15
+ /**
16
+ * Encode a cursor from a (created_at, id) tuple.
17
+ * Uses base64url encoding for URL-safe transport.
18
+ *
19
+ * @param createdAt - ISO 8601 timestamp
20
+ * @param id - Document UUID
21
+ * @returns Base64url-encoded cursor string
22
+ */
23
+ export function encodeCursor(createdAt, id) {
24
+ return Buffer.from(JSON.stringify({ created_at: createdAt, id })).toString('base64url');
25
+ }
26
+ /**
27
+ * Decode a cursor back to a (created_at, id) tuple.
28
+ *
29
+ * @param cursor - Base64url-encoded cursor string
30
+ * @returns Decoded cursor with created_at and id
31
+ * @throws Error if cursor is invalid or malformed
32
+ */
33
+ export function decodeCursor(cursor) {
34
+ try {
35
+ const decoded = JSON.parse(Buffer.from(cursor, 'base64url').toString('utf-8'));
36
+ if (typeof decoded.created_at !== 'string' || typeof decoded.id !== 'string') {
37
+ throw new Error('Invalid cursor format: missing created_at or id');
38
+ }
39
+ return { created_at: decoded.created_at, id: decoded.id };
40
+ }
41
+ catch (error) {
42
+ throw new Error(`Invalid cursor: ${error instanceof Error ? error.message : String(error)}`);
43
+ }
44
+ }
45
+ /**
46
+ * Insert a new document
47
+ *
48
+ * @param db - Database connection
49
+ * @param doc - Document data (created_at will be generated)
50
+ * @param updateMetadataCounts - Callback to update metadata counts
51
+ * @returns string - The document ID
52
+ */
53
+ export function insertDocument(db, doc, updateMetadataCounts) {
54
+ const created_at = new Date().toISOString();
55
+ const stmt = db.prepare(`
56
+ INSERT INTO documents (
57
+ id, file_path, file_name, file_hash, file_size, file_type,
58
+ status, page_count, provenance_id, created_at, modified_at,
59
+ ocr_completed_at, error_message
60
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
61
+ `);
62
+ runWithForeignKeyCheck(stmt, [
63
+ doc.id,
64
+ doc.file_path,
65
+ doc.file_name,
66
+ doc.file_hash,
67
+ doc.file_size,
68
+ doc.file_type,
69
+ doc.status,
70
+ doc.page_count,
71
+ doc.provenance_id,
72
+ created_at,
73
+ doc.modified_at,
74
+ doc.ocr_completed_at,
75
+ doc.error_message,
76
+ ], `inserting document: provenance_id "${doc.provenance_id}" does not exist`);
77
+ updateMetadataCounts();
78
+ return doc.id;
79
+ }
80
+ /**
81
+ * Get a document by ID
82
+ *
83
+ * @param db - Database connection
84
+ * @param id - Document ID
85
+ * @returns Document | null - The document or null if not found
86
+ */
87
+ export function getDocument(db, id) {
88
+ const stmt = db.prepare('SELECT * FROM documents WHERE id = ?');
89
+ const row = stmt.get(id);
90
+ return row ? rowToDocument(row) : null;
91
+ }
92
+ /**
93
+ * Get a document by file path
94
+ *
95
+ * @param db - Database connection
96
+ * @param filePath - Full file path
97
+ * @returns Document | null - The document or null if not found
98
+ */
99
+ export function getDocumentByPath(db, filePath) {
100
+ const stmt = db.prepare('SELECT * FROM documents WHERE file_path = ?');
101
+ const row = stmt.get(filePath);
102
+ return row ? rowToDocument(row) : null;
103
+ }
104
+ /**
105
+ * Get a document by file hash
106
+ *
107
+ * @param db - Database connection
108
+ * @param fileHash - SHA-256 file hash
109
+ * @returns Document | null - The document or null if not found
110
+ */
111
+ export function getDocumentByHash(db, fileHash) {
112
+ const stmt = db.prepare('SELECT * FROM documents WHERE file_hash = ?');
113
+ const row = stmt.get(fileHash);
114
+ return row ? rowToDocument(row) : null;
115
+ }
116
+ /**
117
+ * List documents with optional filtering.
118
+ *
119
+ * Supports both offset-based and cursor-based pagination:
120
+ * - When `cursor` is provided, uses keyset pagination (WHERE created_at < cursor.created_at
121
+ * OR (created_at = cursor.created_at AND id < cursor.id)) and ignores offset.
122
+ * - When `cursor` is absent, uses traditional LIMIT/OFFSET.
123
+ *
124
+ * @param db - Database connection
125
+ * @param options - Optional filter options (status, limit, offset, cursor)
126
+ * @returns Document[] - Array of documents (backward-compatible)
127
+ */
128
+ export function listDocuments(db, options) {
129
+ const result = listDocumentsWithCursor(db, options);
130
+ return result.documents;
131
+ }
132
+ /**
133
+ * List documents with cursor-based pagination support.
134
+ *
135
+ * Returns both the documents and a next_cursor for fetching the next page.
136
+ *
137
+ * @param db - Database connection
138
+ * @param options - Optional filter options (status, limit, offset, cursor)
139
+ * @returns ListDocumentsResult with documents and next_cursor
140
+ */
141
+ export function listDocumentsWithCursor(db, options) {
142
+ const conditions = [];
143
+ const params = [];
144
+ if (options?.status) {
145
+ conditions.push('status = ?');
146
+ params.push(options.status);
147
+ }
148
+ // Cursor-based pagination: keyset filtering
149
+ if (options?.cursor) {
150
+ const decoded = decodeCursor(options.cursor);
151
+ conditions.push('(created_at < ? OR (created_at = ? AND id < ?))');
152
+ params.push(decoded.created_at, decoded.created_at, decoded.id);
153
+ }
154
+ let query = 'SELECT * FROM documents';
155
+ if (conditions.length > 0) {
156
+ query += ' WHERE ' + conditions.join(' AND ');
157
+ }
158
+ query += ' ORDER BY created_at DESC, id DESC';
159
+ const limit = options?.limit ?? 10000;
160
+ query += ' LIMIT ?';
161
+ params.push(limit);
162
+ // Only apply OFFSET when NOT using cursor-based pagination
163
+ if (!options?.cursor && options?.offset !== undefined) {
164
+ query += ' OFFSET ?';
165
+ params.push(options.offset);
166
+ }
167
+ const stmt = db.prepare(query);
168
+ const rows = stmt.all(...params);
169
+ const documents = rows.map(rowToDocument);
170
+ // Compute next_cursor from the last row
171
+ let next_cursor = null;
172
+ if (documents.length > 0 && documents.length === limit) {
173
+ const lastDoc = documents[documents.length - 1];
174
+ next_cursor = encodeCursor(lastDoc.created_at, lastDoc.id);
175
+ }
176
+ return { documents, next_cursor };
177
+ }
178
+ /**
179
+ * Update document status
180
+ *
181
+ * @param db - Database connection
182
+ * @param id - Document ID
183
+ * @param status - New status
184
+ * @param errorMessage - Optional error message (for 'failed' status)
185
+ * @param updateMetadataModified - Callback to update metadata modified timestamp
186
+ */
187
+ export function updateDocumentStatus(db, id, status, errorMessage, updateMetadataModified) {
188
+ const modified_at = new Date().toISOString();
189
+ const stmt = db.prepare(`
190
+ UPDATE documents
191
+ SET status = ?, error_message = ?, modified_at = ?
192
+ WHERE id = ?
193
+ `);
194
+ const result = stmt.run(status, errorMessage ?? null, modified_at, id);
195
+ if (result.changes === 0) {
196
+ throw new DatabaseError(`Document "${id}" not found`, DatabaseErrorCode.DOCUMENT_NOT_FOUND);
197
+ }
198
+ updateMetadataModified();
199
+ }
200
+ /**
201
+ * Update document when OCR completes
202
+ *
203
+ * @param db - Database connection
204
+ * @param id - Document ID
205
+ * @param pageCount - Number of pages processed
206
+ * @param ocrCompletedAt - ISO 8601 completion timestamp
207
+ * @param updateMetadataModified - Callback to update metadata modified timestamp
208
+ */
209
+ export function updateDocumentOCRComplete(db, id, pageCount, ocrCompletedAt, updateMetadataModified) {
210
+ const modified_at = new Date().toISOString();
211
+ const stmt = db.prepare(`
212
+ UPDATE documents
213
+ SET status = 'processing', page_count = ?, ocr_completed_at = ?, modified_at = ?
214
+ WHERE id = ?
215
+ `);
216
+ const result = stmt.run(pageCount, ocrCompletedAt, modified_at, id);
217
+ if (result.changes === 0) {
218
+ throw new DatabaseError(`Document "${id}" not found`, DatabaseErrorCode.DOCUMENT_NOT_FOUND);
219
+ }
220
+ updateMetadataModified();
221
+ }
222
+ /**
223
+ * Update document metadata (title, author, subject) from OCR extraction
224
+ *
225
+ * @param db - Database connection
226
+ * @param id - Document ID
227
+ * @param metadata - Metadata fields to update (null values are ignored via COALESCE)
228
+ * @param updateMetadataModified - Callback to update metadata modified timestamp
229
+ */
230
+ export function updateDocumentMetadata(db, id, metadata, updateMetadataModified) {
231
+ const modified_at = new Date().toISOString();
232
+ const stmt = db.prepare(`
233
+ UPDATE documents
234
+ SET doc_title = COALESCE(?, doc_title),
235
+ doc_author = COALESCE(?, doc_author),
236
+ doc_subject = COALESCE(?, doc_subject),
237
+ modified_at = ?
238
+ WHERE id = ?
239
+ `);
240
+ const result = stmt.run(metadata.docTitle ?? null, metadata.docAuthor ?? null, metadata.docSubject ?? null, modified_at, id);
241
+ if (result.changes > 0)
242
+ updateMetadataModified();
243
+ }
244
+ /**
245
+ * Shared cleanup: delete all derived records for a document.
246
+ *
247
+ * Deletion order (FK-safe):
248
+ * 1. vec_embeddings (no inbound FKs)
249
+ * 2. NULL images.vlm_embedding_id (break circular FK with embeddings)
250
+ * 3. Re-queue orphaned images from other documents (VLM dedup)
251
+ * 4. embeddings (covers chunk, VLM, and extraction types in one pass)
252
+ * 5. images (safe after embeddings.image_id references gone)
253
+ * 6. chunks
254
+ * 7. extractions (before ocr_results: extractions.ocr_result_id -> ocr_results)
255
+ * 8. ocr_results
256
+ * 9. FTS metadata count updates (ids 1, 2, 3)
257
+ *
258
+ * @returns The number of embedding IDs deleted (for logging)
259
+ */
260
+ function deleteDerivedRecords(db, documentId, caller) {
261
+ // M-3: Count embeddings first, then use subquery DELETE instead of loading all IDs
262
+ const embeddingCount = db.prepare('SELECT COUNT(*) as cnt FROM embeddings WHERE document_id = ?').get(documentId).cnt;
263
+ // Delete from vec_embeddings using a single subquery
264
+ db.prepare('DELETE FROM vec_embeddings WHERE embedding_id IN (SELECT id FROM embeddings WHERE document_id = ?)').run(documentId);
265
+ // Break circular FK: images.vlm_embedding_id → embeddings ↔ embeddings.image_id → images
266
+ // NULL out vlm_embedding_id on THIS document's images so embeddings can be deleted
267
+ db.prepare('UPDATE images SET vlm_embedding_id = NULL WHERE document_id = ?').run(documentId);
268
+ // Re-queue OTHER documents' images that shared embeddings via VLM dedup.
269
+ // Setting vlm_status='pending' ensures they get re-processed instead of
270
+ // silently remaining 'complete' but invisible to search (orphaned).
271
+ const orphanedImages = db
272
+ .prepare(`
273
+ SELECT id, document_id FROM images
274
+ WHERE vlm_embedding_id IN (SELECT id FROM embeddings WHERE document_id = ?)
275
+ AND document_id != ?
276
+ `)
277
+ .all(documentId, documentId);
278
+ if (orphanedImages.length > 0) {
279
+ console.error(`[WARN] ${caller} "${documentId}": re-queuing ${orphanedImages.length} images from other documents ` +
280
+ `that shared VLM embeddings (document_ids: ${[...new Set(orphanedImages.map((i) => i.document_id))].join(', ')})`);
281
+ db.prepare(`
282
+ UPDATE images SET vlm_embedding_id = NULL, vlm_status = 'pending'
283
+ WHERE vlm_embedding_id IN (SELECT id FROM embeddings WHERE document_id = ?)
284
+ AND document_id != ?
285
+ `).run(documentId, documentId);
286
+ }
287
+ // Delete entity_tags referencing entities about to be deleted (polymorphic FK, no CASCADE)
288
+ try {
289
+ db.prepare(`
290
+ DELETE FROM entity_tags WHERE
291
+ (entity_type = 'document' AND entity_id = ?)
292
+ OR (entity_type = 'chunk' AND entity_id IN (SELECT id FROM chunks WHERE document_id = ?))
293
+ OR (entity_type = 'image' AND entity_id IN (SELECT id FROM images WHERE document_id = ?))
294
+ OR (entity_type = 'extraction' AND entity_id IN (SELECT id FROM extractions WHERE document_id = ?))
295
+ `).run(documentId, documentId, documentId, documentId);
296
+ }
297
+ catch (e) {
298
+ const msg = e instanceof Error ? e.message : String(e);
299
+ if (!msg.includes('no such table'))
300
+ throw e;
301
+ console.error('[document-operations] entity_tags table not found, skipping:', msg);
302
+ }
303
+ // Delete from embeddings (safe: images.vlm_embedding_id already NULLed)
304
+ db.prepare('DELETE FROM embeddings WHERE document_id = ?').run(documentId);
305
+ // Delete from images (safe: embeddings.image_id references gone)
306
+ db.prepare('DELETE FROM images WHERE document_id = ?').run(documentId);
307
+ // Decrement cluster document_count before removing assignments
308
+ db.prepare(`UPDATE clusters SET document_count = document_count - 1
309
+ WHERE id IN (SELECT cluster_id FROM document_clusters WHERE document_id = ? AND cluster_id IS NOT NULL)`).run(documentId);
310
+ // Delete document-cluster assignments
311
+ db.prepare('DELETE FROM document_clusters WHERE document_id = ?').run(documentId);
312
+ // Delete comparisons referencing this document
313
+ db.prepare('DELETE FROM comparisons WHERE document_id_1 = ? OR document_id_2 = ?').run(documentId, documentId);
314
+ // Delete form_fills linked to this document via source_file_hash
315
+ // (form_fills has no document_id FK — it joins through source_file_hash)
316
+ try {
317
+ const docRow = db.prepare('SELECT file_hash FROM documents WHERE id = ?').get(documentId);
318
+ if (docRow) {
319
+ db.prepare('DELETE FROM form_fills WHERE source_file_hash = ?').run(docRow.file_hash);
320
+ }
321
+ }
322
+ catch (e) {
323
+ const msg = e instanceof Error ? e.message : String(e);
324
+ if (!msg.includes('no such table'))
325
+ throw e;
326
+ console.error('[document-operations] form_fills table not found, skipping:', msg);
327
+ }
328
+ // Delete from chunks
329
+ db.prepare('DELETE FROM chunks WHERE document_id = ?').run(documentId);
330
+ // Delete from extractions (BEFORE ocr_results: extractions.ocr_result_id REFERENCES ocr_results(id))
331
+ db.prepare('DELETE FROM extractions WHERE document_id = ?').run(documentId);
332
+ // Delete from ocr_results (safe now that extractions are gone)
333
+ db.prepare('DELETE FROM ocr_results WHERE document_id = ?').run(documentId);
334
+ // Delete uploaded_files whose provenance_id references this document's provenance chain.
335
+ // Must happen before provenance cleanup (callers delete provenance after this function).
336
+ // uploaded_files has provenance_id NOT NULL REFERENCES provenance(id).
337
+ try {
338
+ const docForProv = db.prepare('SELECT provenance_id FROM documents WHERE id = ?').get(documentId);
339
+ if (docForProv) {
340
+ db.prepare('DELETE FROM uploaded_files WHERE provenance_id IN (SELECT id FROM provenance WHERE root_document_id = ?)').run(docForProv.provenance_id);
341
+ }
342
+ }
343
+ catch (e) {
344
+ const msg = e instanceof Error ? e.message : String(e);
345
+ if (!msg.includes('no such table'))
346
+ throw e;
347
+ console.error('[document-operations] uploaded_files table not found, skipping:', msg);
348
+ }
349
+ // Update FTS metadata counts after chunk/embedding deletion
350
+ try {
351
+ const chunkCount = db.prepare('SELECT COUNT(*) as cnt FROM chunks').get()
352
+ .cnt;
353
+ db.prepare(`
354
+ UPDATE fts_index_metadata SET chunks_indexed = ?, last_rebuild_at = ?
355
+ WHERE id = 1
356
+ `).run(chunkCount, new Date().toISOString());
357
+ // Update VLM FTS metadata if table exists
358
+ const vlmCount = db.prepare('SELECT COUNT(*) as cnt FROM embeddings WHERE image_id IS NOT NULL').get().cnt;
359
+ db.prepare(`
360
+ UPDATE fts_index_metadata SET chunks_indexed = ?, last_rebuild_at = ?
361
+ WHERE id = 2
362
+ `).run(vlmCount, new Date().toISOString());
363
+ // Update extractions FTS metadata (id=3)
364
+ const extCount = db.prepare('SELECT COUNT(*) as cnt FROM extractions').get().cnt;
365
+ db.prepare(`
366
+ UPDATE fts_index_metadata SET chunks_indexed = ?, last_rebuild_at = ?
367
+ WHERE id = 3
368
+ `).run(extCount, new Date().toISOString());
369
+ }
370
+ catch (e) {
371
+ // Only ignore "no such table" errors from older schemas pre-v4
372
+ const msg = e instanceof Error ? e.message : String(e);
373
+ if (!msg.includes('no such table')) {
374
+ throw e;
375
+ }
376
+ console.error('[document-operations] fts_index_metadata table not found, skipping FTS update:', msg);
377
+ }
378
+ return embeddingCount;
379
+ }
380
+ /**
381
+ * Get or create the synthetic ORPHANED_ROOT provenance record.
382
+ * Used to re-parent provenance records when their original document is deleted
383
+ * but surviving clusters still reference them (P1.4).
384
+ *
385
+ * @param db - Database connection
386
+ * @returns The ID of the ORPHANED_ROOT provenance record
387
+ */
388
+ function getOrCreateOrphanedRoot(db) {
389
+ const existing = db
390
+ .prepare("SELECT id FROM provenance WHERE root_document_id = 'ORPHANED_ROOT' AND type = 'DOCUMENT' LIMIT 1")
391
+ .get();
392
+ if (existing) {
393
+ return existing.id;
394
+ }
395
+ // Create synthetic orphaned root provenance
396
+ const id = uuidv4();
397
+ const now = new Date().toISOString();
398
+ const contentHash = computeHash('ORPHANED_ROOT');
399
+ db.prepare(`
400
+ INSERT INTO provenance (
401
+ id, type, created_at, processed_at, source_type, source_id,
402
+ root_document_id, content_hash, input_hash, processor,
403
+ processor_version, processing_params, parent_id, parent_ids,
404
+ chain_depth, chain_path
405
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
406
+ `).run(id, 'DOCUMENT', now, now, 'FILE', null, 'ORPHANED_ROOT', contentHash, null, 'system', '1.0.0', '{}', null, '[]', 0, '["DOCUMENT"]');
407
+ return id;
408
+ }
409
+ /**
410
+ * Delete a document and all related data (CASCADE DELETE)
411
+ *
412
+ * @param db - Database connection
413
+ * @param id - Document ID to delete
414
+ * @param updateMetadataCounts - Callback to update metadata counts
415
+ */
416
+ export function deleteDocument(db, id, updateMetadataCounts) {
417
+ // First check document exists (outside transaction - read-only)
418
+ const doc = getDocument(db, id);
419
+ if (!doc) {
420
+ throw new DatabaseError(`Document "${id}" not found`, DatabaseErrorCode.DOCUMENT_NOT_FOUND);
421
+ }
422
+ // H-5: Wrap entire cascade delete in a transaction so a crash mid-sequence
423
+ // cannot leave the database in an inconsistent state.
424
+ const runInTransaction = db.transaction(() => {
425
+ deleteDerivedRecords(db, id, 'deleteDocument');
426
+ // Delete the document itself BEFORE provenance
427
+ // (document has FK to provenance via provenance_id)
428
+ db.prepare('DELETE FROM documents WHERE id = ?').run(id);
429
+ // Delete from provenance - must delete in reverse chain_depth order
430
+ // due to self-referential FKs on source_id and parent_id
431
+ // NOTE: root_document_id stores the document's provenance_id, NOT document id
432
+ const provenanceIds = db
433
+ .prepare('SELECT id FROM provenance WHERE root_document_id = ? ORDER BY chain_depth DESC')
434
+ .all(doc.provenance_id);
435
+ // P1.4: Get or create orphaned root provenance for re-parenting
436
+ const orphanedRootId = getOrCreateOrphanedRoot(db);
437
+ // Pre-clear self-referencing FKs (parent_id, source_id) on provenance records being deleted.
438
+ // Within the same chain_depth, parent provenance may appear before child provenance in the
439
+ // iteration order, causing FK violations. NULLing these first breaks the circular references.
440
+ const clearSelfRefStmt = db.prepare('UPDATE provenance SET parent_id = NULL, source_id = NULL WHERE id = ?');
441
+ for (const { id: provId } of provenanceIds) {
442
+ clearSelfRefStmt.run(provId);
443
+ }
444
+ const deleteProvStmt = db.prepare('DELETE FROM provenance WHERE id = ?');
445
+ const clusterRefCheck = db.prepare('SELECT COUNT(*) as cnt FROM clusters WHERE provenance_id = ?');
446
+ const reparentProvStmt = db.prepare('UPDATE provenance SET source_id = NULL, parent_id = ?, root_document_id = ? WHERE id = ?');
447
+ for (const { id: provId } of provenanceIds) {
448
+ // Skip CLUSTERING provenance still referenced by clusters (NOT NULL FK).
449
+ // Re-parent to orphaned root so provenance chain is preserved (P1.4).
450
+ // These are cleaned up when the cluster run is deleted.
451
+ const clusterRefs = clusterRefCheck.get(provId).cnt;
452
+ if (clusterRefs > 0) {
453
+ reparentProvStmt.run(orphanedRootId, 'ORPHANED_ROOT', provId);
454
+ continue;
455
+ }
456
+ deleteProvStmt.run(provId);
457
+ }
458
+ // Update metadata counts inside transaction for atomicity
459
+ updateMetadataCounts();
460
+ });
461
+ runInTransaction();
462
+ }
463
+ /**
464
+ * Clean all derived data for a document, keeping the document record and its DOCUMENT-level provenance.
465
+ *
466
+ * Deletes: vec_embeddings, embeddings, images, chunks, ocr_results, and non-root provenance records.
467
+ * This is used by retry_failed to reset a document to a clean "pending" state.
468
+ *
469
+ * @param db - Database connection
470
+ * @param documentId - Document ID to clean
471
+ */
472
+ export function cleanDocumentDerivedData(db, documentId) {
473
+ // Validate document exists (outside transaction - read-only)
474
+ const doc = getDocument(db, documentId);
475
+ if (!doc) {
476
+ throw new DatabaseError(`Document "${documentId}" not found`, DatabaseErrorCode.DOCUMENT_NOT_FOUND);
477
+ }
478
+ // H-5: Wrap cleanup in a transaction so partial deletes cannot leave
479
+ // the database in an inconsistent state.
480
+ let embeddingCount = 0;
481
+ let provenanceCount = 0;
482
+ const runInTransaction = db.transaction(() => {
483
+ embeddingCount = deleteDerivedRecords(db, documentId, 'cleanDocumentDerivedData');
484
+ // Delete non-root provenance records (keep DOCUMENT-level provenance at chain_depth=0)
485
+ // root_document_id stores the document's provenance_id, NOT document id
486
+ const nonRootProvIds = db
487
+ .prepare('SELECT id FROM provenance WHERE root_document_id = ? AND chain_depth > 0 ORDER BY chain_depth DESC')
488
+ .all(doc.provenance_id);
489
+ const deleteProvStmt = db.prepare('DELETE FROM provenance WHERE id = ?');
490
+ for (const { id: provId } of nonRootProvIds) {
491
+ deleteProvStmt.run(provId);
492
+ }
493
+ provenanceCount = nonRootProvIds.length;
494
+ });
495
+ runInTransaction();
496
+ console.error(`[INFO] Cleaned derived data for document ${documentId}: ${embeddingCount} embeddings, ${provenanceCount} provenance records removed`);
497
+ }
498
+ //# sourceMappingURL=document-operations.js.map