ocr-provenance-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocr-provenance-mcp might be problematic. Click here for more details.

Files changed (578) hide show
  1. package/.env.example +55 -0
  2. package/LICENSE +78 -0
  3. package/README.md +1154 -0
  4. package/dist/bin-http.d.ts +24 -0
  5. package/dist/bin-http.d.ts.map +1 -0
  6. package/dist/bin-http.js +275 -0
  7. package/dist/bin-http.js.map +1 -0
  8. package/dist/bin-setup.d.ts +11 -0
  9. package/dist/bin-setup.d.ts.map +1 -0
  10. package/dist/bin-setup.js +610 -0
  11. package/dist/bin-setup.js.map +1 -0
  12. package/dist/bin.d.ts +16 -0
  13. package/dist/bin.d.ts.map +1 -0
  14. package/dist/bin.js +16 -0
  15. package/dist/bin.js.map +1 -0
  16. package/dist/index.d.ts +13 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +90 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/models/chunk.d.ts +136 -0
  21. package/dist/models/chunk.d.ts.map +1 -0
  22. package/dist/models/chunk.js +27 -0
  23. package/dist/models/chunk.js.map +1 -0
  24. package/dist/models/cluster.d.ts +79 -0
  25. package/dist/models/cluster.d.ts.map +1 -0
  26. package/dist/models/cluster.js +10 -0
  27. package/dist/models/cluster.js.map +1 -0
  28. package/dist/models/comparison.d.ts +62 -0
  29. package/dist/models/comparison.d.ts.map +1 -0
  30. package/dist/models/comparison.js +8 -0
  31. package/dist/models/comparison.js.map +1 -0
  32. package/dist/models/document.d.ts +104 -0
  33. package/dist/models/document.d.ts.map +1 -0
  34. package/dist/models/document.js +15 -0
  35. package/dist/models/document.js.map +1 -0
  36. package/dist/models/embedding.d.ts +87 -0
  37. package/dist/models/embedding.d.ts.map +1 -0
  38. package/dist/models/embedding.js +23 -0
  39. package/dist/models/embedding.js.map +1 -0
  40. package/dist/models/extraction.d.ts +15 -0
  41. package/dist/models/extraction.d.ts.map +1 -0
  42. package/dist/models/extraction.js +2 -0
  43. package/dist/models/extraction.js.map +1 -0
  44. package/dist/models/form-fill.d.ts +23 -0
  45. package/dist/models/form-fill.d.ts.map +1 -0
  46. package/dist/models/form-fill.js +2 -0
  47. package/dist/models/form-fill.js.map +1 -0
  48. package/dist/models/image.d.ts +177 -0
  49. package/dist/models/image.d.ts.map +1 -0
  50. package/dist/models/image.js +8 -0
  51. package/dist/models/image.js.map +1 -0
  52. package/dist/models/index.d.ts +14 -0
  53. package/dist/models/index.d.ts.map +1 -0
  54. package/dist/models/index.js +22 -0
  55. package/dist/models/index.js.map +1 -0
  56. package/dist/models/provenance.d.ts +174 -0
  57. package/dist/models/provenance.d.ts.map +1 -0
  58. package/dist/models/provenance.js +53 -0
  59. package/dist/models/provenance.js.map +1 -0
  60. package/dist/models/uploaded-file.d.ts +20 -0
  61. package/dist/models/uploaded-file.d.ts.map +1 -0
  62. package/dist/models/uploaded-file.js +2 -0
  63. package/dist/models/uploaded-file.js.map +1 -0
  64. package/dist/server/errors.d.ts +93 -0
  65. package/dist/server/errors.d.ts.map +1 -0
  66. package/dist/server/errors.js +256 -0
  67. package/dist/server/errors.js.map +1 -0
  68. package/dist/server/events.d.ts +36 -0
  69. package/dist/server/events.d.ts.map +1 -0
  70. package/dist/server/events.js +48 -0
  71. package/dist/server/events.js.map +1 -0
  72. package/dist/server/permissions.d.ts +26 -0
  73. package/dist/server/permissions.d.ts.map +1 -0
  74. package/dist/server/permissions.js +194 -0
  75. package/dist/server/permissions.js.map +1 -0
  76. package/dist/server/register-tools.d.ts +25 -0
  77. package/dist/server/register-tools.d.ts.map +1 -0
  78. package/dist/server/register-tools.js +102 -0
  79. package/dist/server/register-tools.js.map +1 -0
  80. package/dist/server/startup.d.ts +16 -0
  81. package/dist/server/startup.d.ts.map +1 -0
  82. package/dist/server/startup.js +37 -0
  83. package/dist/server/startup.js.map +1 -0
  84. package/dist/server/state.d.ts +166 -0
  85. package/dist/server/state.d.ts.map +1 -0
  86. package/dist/server/state.js +424 -0
  87. package/dist/server/state.js.map +1 -0
  88. package/dist/server/transports/http-transport.d.ts +37 -0
  89. package/dist/server/transports/http-transport.d.ts.map +1 -0
  90. package/dist/server/transports/http-transport.js +204 -0
  91. package/dist/server/transports/http-transport.js.map +1 -0
  92. package/dist/server/transports/index.d.ts +9 -0
  93. package/dist/server/transports/index.d.ts.map +1 -0
  94. package/dist/server/transports/index.js +9 -0
  95. package/dist/server/transports/index.js.map +1 -0
  96. package/dist/server/transports/session-manager.d.ts +40 -0
  97. package/dist/server/transports/session-manager.d.ts.map +1 -0
  98. package/dist/server/transports/session-manager.js +74 -0
  99. package/dist/server/transports/session-manager.js.map +1 -0
  100. package/dist/server/types.d.ts +82 -0
  101. package/dist/server/types.d.ts.map +1 -0
  102. package/dist/server/types.js +14 -0
  103. package/dist/server/types.js.map +1 -0
  104. package/dist/services/audit.d.ts +26 -0
  105. package/dist/services/audit.d.ts.map +1 -0
  106. package/dist/services/audit.js +43 -0
  107. package/dist/services/audit.js.map +1 -0
  108. package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
  109. package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
  110. package/dist/services/chunking/chunk-deduplicator.js +46 -0
  111. package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
  112. package/dist/services/chunking/chunk-merger.d.ts +26 -0
  113. package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
  114. package/dist/services/chunking/chunk-merger.js +94 -0
  115. package/dist/services/chunking/chunk-merger.js.map +1 -0
  116. package/dist/services/chunking/chunker.d.ts +62 -0
  117. package/dist/services/chunking/chunker.d.ts.map +1 -0
  118. package/dist/services/chunking/chunker.js +566 -0
  119. package/dist/services/chunking/chunker.js.map +1 -0
  120. package/dist/services/chunking/heading-normalizer.d.ts +33 -0
  121. package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
  122. package/dist/services/chunking/heading-normalizer.js +101 -0
  123. package/dist/services/chunking/heading-normalizer.js.map +1 -0
  124. package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
  125. package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
  126. package/dist/services/chunking/json-block-analyzer.js +1033 -0
  127. package/dist/services/chunking/json-block-analyzer.js.map +1 -0
  128. package/dist/services/chunking/markdown-parser.d.ts +75 -0
  129. package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
  130. package/dist/services/chunking/markdown-parser.js +428 -0
  131. package/dist/services/chunking/markdown-parser.js.map +1 -0
  132. package/dist/services/chunking/text-normalizer.d.ts +20 -0
  133. package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
  134. package/dist/services/chunking/text-normalizer.js +36 -0
  135. package/dist/services/chunking/text-normalizer.js.map +1 -0
  136. package/dist/services/clm/contract-schemas.d.ts +36 -0
  137. package/dist/services/clm/contract-schemas.d.ts.map +1 -0
  138. package/dist/services/clm/contract-schemas.js +92 -0
  139. package/dist/services/clm/contract-schemas.js.map +1 -0
  140. package/dist/services/clm/summarization.d.ts +46 -0
  141. package/dist/services/clm/summarization.d.ts.map +1 -0
  142. package/dist/services/clm/summarization.js +61 -0
  143. package/dist/services/clm/summarization.js.map +1 -0
  144. package/dist/services/clustering/clustering-service.d.ts +58 -0
  145. package/dist/services/clustering/clustering-service.d.ts.map +1 -0
  146. package/dist/services/clustering/clustering-service.js +467 -0
  147. package/dist/services/clustering/clustering-service.js.map +1 -0
  148. package/dist/services/comparison/diff-service.d.ts +41 -0
  149. package/dist/services/comparison/diff-service.d.ts.map +1 -0
  150. package/dist/services/comparison/diff-service.js +120 -0
  151. package/dist/services/comparison/diff-service.js.map +1 -0
  152. package/dist/services/embedding/embedder.d.ts +55 -0
  153. package/dist/services/embedding/embedder.d.ts.map +1 -0
  154. package/dist/services/embedding/embedder.js +202 -0
  155. package/dist/services/embedding/embedder.js.map +1 -0
  156. package/dist/services/embedding/nomic.d.ts +67 -0
  157. package/dist/services/embedding/nomic.d.ts.map +1 -0
  158. package/dist/services/embedding/nomic.js +280 -0
  159. package/dist/services/embedding/nomic.js.map +1 -0
  160. package/dist/services/gemini/circuit-breaker.d.ts +106 -0
  161. package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
  162. package/dist/services/gemini/circuit-breaker.js +237 -0
  163. package/dist/services/gemini/circuit-breaker.js.map +1 -0
  164. package/dist/services/gemini/client.d.ts +173 -0
  165. package/dist/services/gemini/client.d.ts.map +1 -0
  166. package/dist/services/gemini/client.js +483 -0
  167. package/dist/services/gemini/client.js.map +1 -0
  168. package/dist/services/gemini/config.d.ts +116 -0
  169. package/dist/services/gemini/config.d.ts.map +1 -0
  170. package/dist/services/gemini/config.js +118 -0
  171. package/dist/services/gemini/config.js.map +1 -0
  172. package/dist/services/gemini/index.d.ts +9 -0
  173. package/dist/services/gemini/index.d.ts.map +1 -0
  174. package/dist/services/gemini/index.js +13 -0
  175. package/dist/services/gemini/index.js.map +1 -0
  176. package/dist/services/gemini/rate-limiter.d.ts +62 -0
  177. package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
  178. package/dist/services/gemini/rate-limiter.js +120 -0
  179. package/dist/services/gemini/rate-limiter.js.map +1 -0
  180. package/dist/services/images/extractor.d.ts +88 -0
  181. package/dist/services/images/extractor.d.ts.map +1 -0
  182. package/dist/services/images/extractor.js +340 -0
  183. package/dist/services/images/extractor.js.map +1 -0
  184. package/dist/services/images/optimizer.d.ts +130 -0
  185. package/dist/services/images/optimizer.d.ts.map +1 -0
  186. package/dist/services/images/optimizer.js +228 -0
  187. package/dist/services/images/optimizer.js.map +1 -0
  188. package/dist/services/ocr/datalab.d.ts +64 -0
  189. package/dist/services/ocr/datalab.d.ts.map +1 -0
  190. package/dist/services/ocr/datalab.js +425 -0
  191. package/dist/services/ocr/datalab.js.map +1 -0
  192. package/dist/services/ocr/errors.d.ts +38 -0
  193. package/dist/services/ocr/errors.d.ts.map +1 -0
  194. package/dist/services/ocr/errors.js +83 -0
  195. package/dist/services/ocr/errors.js.map +1 -0
  196. package/dist/services/ocr/file-manager.d.ts +76 -0
  197. package/dist/services/ocr/file-manager.d.ts.map +1 -0
  198. package/dist/services/ocr/file-manager.js +238 -0
  199. package/dist/services/ocr/file-manager.js.map +1 -0
  200. package/dist/services/ocr/form-fill.d.ts +48 -0
  201. package/dist/services/ocr/form-fill.d.ts.map +1 -0
  202. package/dist/services/ocr/form-fill.js +213 -0
  203. package/dist/services/ocr/form-fill.js.map +1 -0
  204. package/dist/services/ocr/processor.d.ts +95 -0
  205. package/dist/services/ocr/processor.d.ts.map +1 -0
  206. package/dist/services/ocr/processor.js +259 -0
  207. package/dist/services/ocr/processor.js.map +1 -0
  208. package/dist/services/provenance/agent-metadata.d.ts +82 -0
  209. package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
  210. package/dist/services/provenance/agent-metadata.js +106 -0
  211. package/dist/services/provenance/agent-metadata.js.map +1 -0
  212. package/dist/services/provenance/chain-hash.d.ts +57 -0
  213. package/dist/services/provenance/chain-hash.d.ts.map +1 -0
  214. package/dist/services/provenance/chain-hash.js +131 -0
  215. package/dist/services/provenance/chain-hash.js.map +1 -0
  216. package/dist/services/provenance/exporter.d.ts +202 -0
  217. package/dist/services/provenance/exporter.d.ts.map +1 -0
  218. package/dist/services/provenance/exporter.js +457 -0
  219. package/dist/services/provenance/exporter.js.map +1 -0
  220. package/dist/services/provenance/index.d.ts +15 -0
  221. package/dist/services/provenance/index.d.ts.map +1 -0
  222. package/dist/services/provenance/index.js +17 -0
  223. package/dist/services/provenance/index.js.map +1 -0
  224. package/dist/services/provenance/tracker.d.ts +138 -0
  225. package/dist/services/provenance/tracker.d.ts.map +1 -0
  226. package/dist/services/provenance/tracker.js +293 -0
  227. package/dist/services/provenance/tracker.js.map +1 -0
  228. package/dist/services/provenance/verifier.d.ts +153 -0
  229. package/dist/services/provenance/verifier.d.ts.map +1 -0
  230. package/dist/services/provenance/verifier.js +536 -0
  231. package/dist/services/provenance/verifier.js.map +1 -0
  232. package/dist/services/python-pool.d.ts +70 -0
  233. package/dist/services/python-pool.d.ts.map +1 -0
  234. package/dist/services/python-pool.js +265 -0
  235. package/dist/services/python-pool.js.map +1 -0
  236. package/dist/services/search/bm25.d.ts +180 -0
  237. package/dist/services/search/bm25.d.ts.map +1 -0
  238. package/dist/services/search/bm25.js +656 -0
  239. package/dist/services/search/bm25.js.map +1 -0
  240. package/dist/services/search/fusion.d.ts +103 -0
  241. package/dist/services/search/fusion.d.ts.map +1 -0
  242. package/dist/services/search/fusion.js +122 -0
  243. package/dist/services/search/fusion.js.map +1 -0
  244. package/dist/services/search/local-reranker.d.ts +30 -0
  245. package/dist/services/search/local-reranker.d.ts.map +1 -0
  246. package/dist/services/search/local-reranker.js +123 -0
  247. package/dist/services/search/local-reranker.js.map +1 -0
  248. package/dist/services/search/quality.d.ts +11 -0
  249. package/dist/services/search/quality.d.ts.map +1 -0
  250. package/dist/services/search/quality.js +17 -0
  251. package/dist/services/search/quality.js.map +1 -0
  252. package/dist/services/search/query-classifier.d.ts +34 -0
  253. package/dist/services/search/query-classifier.d.ts.map +1 -0
  254. package/dist/services/search/query-classifier.js +114 -0
  255. package/dist/services/search/query-classifier.js.map +1 -0
  256. package/dist/services/search/query-expander.d.ts +73 -0
  257. package/dist/services/search/query-expander.d.ts.map +1 -0
  258. package/dist/services/search/query-expander.js +281 -0
  259. package/dist/services/search/query-expander.js.map +1 -0
  260. package/dist/services/search/reranker.d.ts +44 -0
  261. package/dist/services/search/reranker.d.ts.map +1 -0
  262. package/dist/services/search/reranker.js +101 -0
  263. package/dist/services/search/reranker.js.map +1 -0
  264. package/dist/services/storage/database/annotation-operations.d.ts +113 -0
  265. package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
  266. package/dist/services/storage/database/annotation-operations.js +177 -0
  267. package/dist/services/storage/database/annotation-operations.js.map +1 -0
  268. package/dist/services/storage/database/approval-operations.d.ts +132 -0
  269. package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
  270. package/dist/services/storage/database/approval-operations.js +206 -0
  271. package/dist/services/storage/database/approval-operations.js.map +1 -0
  272. package/dist/services/storage/database/chunk-operations.d.ts +132 -0
  273. package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
  274. package/dist/services/storage/database/chunk-operations.js +306 -0
  275. package/dist/services/storage/database/chunk-operations.js.map +1 -0
  276. package/dist/services/storage/database/cluster-operations.d.ts +97 -0
  277. package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
  278. package/dist/services/storage/database/cluster-operations.js +258 -0
  279. package/dist/services/storage/database/cluster-operations.js.map +1 -0
  280. package/dist/services/storage/database/comparison-operations.d.ts +41 -0
  281. package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
  282. package/dist/services/storage/database/comparison-operations.js +65 -0
  283. package/dist/services/storage/database/comparison-operations.js.map +1 -0
  284. package/dist/services/storage/database/converters.d.ts +36 -0
  285. package/dist/services/storage/database/converters.d.ts.map +1 -0
  286. package/dist/services/storage/database/converters.js +244 -0
  287. package/dist/services/storage/database/converters.js.map +1 -0
  288. package/dist/services/storage/database/document-operations.d.ts +145 -0
  289. package/dist/services/storage/database/document-operations.d.ts.map +1 -0
  290. package/dist/services/storage/database/document-operations.js +498 -0
  291. package/dist/services/storage/database/document-operations.js.map +1 -0
  292. package/dist/services/storage/database/embedding-operations.d.ts +130 -0
  293. package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
  294. package/dist/services/storage/database/embedding-operations.js +315 -0
  295. package/dist/services/storage/database/embedding-operations.js.map +1 -0
  296. package/dist/services/storage/database/extraction-operations.d.ts +47 -0
  297. package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
  298. package/dist/services/storage/database/extraction-operations.js +85 -0
  299. package/dist/services/storage/database/extraction-operations.js.map +1 -0
  300. package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
  301. package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
  302. package/dist/services/storage/database/form-fill-operations.js +116 -0
  303. package/dist/services/storage/database/form-fill-operations.js.map +1 -0
  304. package/dist/services/storage/database/helpers.d.ts +29 -0
  305. package/dist/services/storage/database/helpers.d.ts.map +1 -0
  306. package/dist/services/storage/database/helpers.js +55 -0
  307. package/dist/services/storage/database/helpers.js.map +1 -0
  308. package/dist/services/storage/database/image-operations.d.ts +202 -0
  309. package/dist/services/storage/database/image-operations.d.ts.map +1 -0
  310. package/dist/services/storage/database/image-operations.js +484 -0
  311. package/dist/services/storage/database/image-operations.js.map +1 -0
  312. package/dist/services/storage/database/index.d.ts +13 -0
  313. package/dist/services/storage/database/index.d.ts.map +1 -0
  314. package/dist/services/storage/database/index.js +16 -0
  315. package/dist/services/storage/database/index.js.map +1 -0
  316. package/dist/services/storage/database/lock-operations.d.ts +59 -0
  317. package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
  318. package/dist/services/storage/database/lock-operations.js +89 -0
  319. package/dist/services/storage/database/lock-operations.js.map +1 -0
  320. package/dist/services/storage/database/obligation-operations.d.ts +88 -0
  321. package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
  322. package/dist/services/storage/database/obligation-operations.js +206 -0
  323. package/dist/services/storage/database/obligation-operations.js.map +1 -0
  324. package/dist/services/storage/database/ocr-operations.d.ts +33 -0
  325. package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
  326. package/dist/services/storage/database/ocr-operations.js +70 -0
  327. package/dist/services/storage/database/ocr-operations.js.map +1 -0
  328. package/dist/services/storage/database/playbook-operations.d.ts +72 -0
  329. package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
  330. package/dist/services/storage/database/playbook-operations.js +247 -0
  331. package/dist/services/storage/database/playbook-operations.js.map +1 -0
  332. package/dist/services/storage/database/provenance-operations.d.ts +112 -0
  333. package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
  334. package/dist/services/storage/database/provenance-operations.js +251 -0
  335. package/dist/services/storage/database/provenance-operations.js.map +1 -0
  336. package/dist/services/storage/database/service.d.ts +142 -0
  337. package/dist/services/storage/database/service.d.ts.map +1 -0
  338. package/dist/services/storage/database/service.js +310 -0
  339. package/dist/services/storage/database/service.js.map +1 -0
  340. package/dist/services/storage/database/static-operations.d.ts +30 -0
  341. package/dist/services/storage/database/static-operations.d.ts.map +1 -0
  342. package/dist/services/storage/database/static-operations.js +218 -0
  343. package/dist/services/storage/database/static-operations.js.map +1 -0
  344. package/dist/services/storage/database/stats-operations.d.ts +101 -0
  345. package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
  346. package/dist/services/storage/database/stats-operations.js +394 -0
  347. package/dist/services/storage/database/stats-operations.js.map +1 -0
  348. package/dist/services/storage/database/tag-operations.d.ts +76 -0
  349. package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
  350. package/dist/services/storage/database/tag-operations.js +178 -0
  351. package/dist/services/storage/database/tag-operations.js.map +1 -0
  352. package/dist/services/storage/database/types.d.ts +286 -0
  353. package/dist/services/storage/database/types.d.ts.map +1 -0
  354. package/dist/services/storage/database/types.js +39 -0
  355. package/dist/services/storage/database/types.js.map +1 -0
  356. package/dist/services/storage/database/upload-operations.d.ts +71 -0
  357. package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
  358. package/dist/services/storage/database/upload-operations.js +124 -0
  359. package/dist/services/storage/database/upload-operations.js.map +1 -0
  360. package/dist/services/storage/database/user-operations.d.ts +102 -0
  361. package/dist/services/storage/database/user-operations.d.ts.map +1 -0
  362. package/dist/services/storage/database/user-operations.js +151 -0
  363. package/dist/services/storage/database/user-operations.js.map +1 -0
  364. package/dist/services/storage/database/workflow-operations.d.ts +98 -0
  365. package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
  366. package/dist/services/storage/database/workflow-operations.js +157 -0
  367. package/dist/services/storage/database/workflow-operations.js.map +1 -0
  368. package/dist/services/storage/database.d.ts +16 -0
  369. package/dist/services/storage/database.d.ts.map +1 -0
  370. package/dist/services/storage/database.js +15 -0
  371. package/dist/services/storage/database.js.map +1 -0
  372. package/dist/services/storage/index.d.ts +10 -0
  373. package/dist/services/storage/index.d.ts.map +1 -0
  374. package/dist/services/storage/index.js +10 -0
  375. package/dist/services/storage/index.js.map +1 -0
  376. package/dist/services/storage/migrations/index.d.ts +16 -0
  377. package/dist/services/storage/migrations/index.d.ts.map +1 -0
  378. package/dist/services/storage/migrations/index.js +20 -0
  379. package/dist/services/storage/migrations/index.js.map +1 -0
  380. package/dist/services/storage/migrations/operations.d.ts +40 -0
  381. package/dist/services/storage/migrations/operations.d.ts.map +1 -0
  382. package/dist/services/storage/migrations/operations.js +2910 -0
  383. package/dist/services/storage/migrations/operations.js.map +1 -0
  384. package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
  385. package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
  386. package/dist/services/storage/migrations/schema-definitions.js +1006 -0
  387. package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
  388. package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
  389. package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
  390. package/dist/services/storage/migrations/schema-helpers.js +176 -0
  391. package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
  392. package/dist/services/storage/migrations/types.d.ts +15 -0
  393. package/dist/services/storage/migrations/types.d.ts.map +1 -0
  394. package/dist/services/storage/migrations/types.js +21 -0
  395. package/dist/services/storage/migrations/types.js.map +1 -0
  396. package/dist/services/storage/migrations/verification.d.ts +20 -0
  397. package/dist/services/storage/migrations/verification.d.ts.map +1 -0
  398. package/dist/services/storage/migrations/verification.js +78 -0
  399. package/dist/services/storage/migrations/verification.js.map +1 -0
  400. package/dist/services/storage/migrations.d.ts +16 -0
  401. package/dist/services/storage/migrations.d.ts.map +1 -0
  402. package/dist/services/storage/migrations.js +17 -0
  403. package/dist/services/storage/migrations.js.map +1 -0
  404. package/dist/services/storage/types.d.ts +12 -0
  405. package/dist/services/storage/types.d.ts.map +1 -0
  406. package/dist/services/storage/types.js +5 -0
  407. package/dist/services/storage/types.js.map +1 -0
  408. package/dist/services/storage/vector.d.ts +208 -0
  409. package/dist/services/storage/vector.d.ts.map +1 -0
  410. package/dist/services/storage/vector.js +526 -0
  411. package/dist/services/storage/vector.js.map +1 -0
  412. package/dist/services/vlm/pipeline.d.ts +194 -0
  413. package/dist/services/vlm/pipeline.d.ts.map +1 -0
  414. package/dist/services/vlm/pipeline.js +800 -0
  415. package/dist/services/vlm/pipeline.js.map +1 -0
  416. package/dist/services/vlm/prompts.d.ts +171 -0
  417. package/dist/services/vlm/prompts.d.ts.map +1 -0
  418. package/dist/services/vlm/prompts.js +229 -0
  419. package/dist/services/vlm/prompts.js.map +1 -0
  420. package/dist/services/vlm/service.d.ts +174 -0
  421. package/dist/services/vlm/service.d.ts.map +1 -0
  422. package/dist/services/vlm/service.js +256 -0
  423. package/dist/services/vlm/service.js.map +1 -0
  424. package/dist/services/webhook-delivery.d.ts +4 -0
  425. package/dist/services/webhook-delivery.d.ts.map +1 -0
  426. package/dist/services/webhook-delivery.js +140 -0
  427. package/dist/services/webhook-delivery.js.map +1 -0
  428. package/dist/tools/chunks.d.ts +19 -0
  429. package/dist/tools/chunks.d.ts.map +1 -0
  430. package/dist/tools/chunks.js +392 -0
  431. package/dist/tools/chunks.js.map +1 -0
  432. package/dist/tools/clm.d.ts +16 -0
  433. package/dist/tools/clm.d.ts.map +1 -0
  434. package/dist/tools/clm.js +668 -0
  435. package/dist/tools/clm.js.map +1 -0
  436. package/dist/tools/clustering.d.ts +13 -0
  437. package/dist/tools/clustering.d.ts.map +1 -0
  438. package/dist/tools/clustering.js +498 -0
  439. package/dist/tools/clustering.js.map +1 -0
  440. package/dist/tools/collaboration.d.ts +15 -0
  441. package/dist/tools/collaboration.d.ts.map +1 -0
  442. package/dist/tools/collaboration.js +516 -0
  443. package/dist/tools/collaboration.js.map +1 -0
  444. package/dist/tools/comparison.d.ts +13 -0
  445. package/dist/tools/comparison.d.ts.map +1 -0
  446. package/dist/tools/comparison.js +735 -0
  447. package/dist/tools/comparison.js.map +1 -0
  448. package/dist/tools/compliance.d.ts +15 -0
  449. package/dist/tools/compliance.d.ts.map +1 -0
  450. package/dist/tools/compliance.js +640 -0
  451. package/dist/tools/compliance.js.map +1 -0
  452. package/dist/tools/config.d.ts +19 -0
  453. package/dist/tools/config.d.ts.map +1 -0
  454. package/dist/tools/config.js +213 -0
  455. package/dist/tools/config.js.map +1 -0
  456. package/dist/tools/database.d.ts +62 -0
  457. package/dist/tools/database.d.ts.map +1 -0
  458. package/dist/tools/database.js +288 -0
  459. package/dist/tools/database.js.map +1 -0
  460. package/dist/tools/documents.d.ts +61 -0
  461. package/dist/tools/documents.d.ts.map +1 -0
  462. package/dist/tools/documents.js +1624 -0
  463. package/dist/tools/documents.js.map +1 -0
  464. package/dist/tools/embeddings.d.ts +14 -0
  465. package/dist/tools/embeddings.d.ts.map +1 -0
  466. package/dist/tools/embeddings.js +626 -0
  467. package/dist/tools/embeddings.js.map +1 -0
  468. package/dist/tools/evaluation.d.ts +25 -0
  469. package/dist/tools/evaluation.d.ts.map +1 -0
  470. package/dist/tools/evaluation.js +523 -0
  471. package/dist/tools/evaluation.js.map +1 -0
  472. package/dist/tools/events.d.ts +16 -0
  473. package/dist/tools/events.d.ts.map +1 -0
  474. package/dist/tools/events.js +493 -0
  475. package/dist/tools/events.js.map +1 -0
  476. package/dist/tools/extraction-structured.d.ts +13 -0
  477. package/dist/tools/extraction-structured.d.ts.map +1 -0
  478. package/dist/tools/extraction-structured.js +390 -0
  479. package/dist/tools/extraction-structured.js.map +1 -0
  480. package/dist/tools/extraction.d.ts +24 -0
  481. package/dist/tools/extraction.d.ts.map +1 -0
  482. package/dist/tools/extraction.js +424 -0
  483. package/dist/tools/extraction.js.map +1 -0
  484. package/dist/tools/file-management.d.ts +14 -0
  485. package/dist/tools/file-management.d.ts.map +1 -0
  486. package/dist/tools/file-management.js +523 -0
  487. package/dist/tools/file-management.js.map +1 -0
  488. package/dist/tools/form-fill.d.ts +13 -0
  489. package/dist/tools/form-fill.d.ts.map +1 -0
  490. package/dist/tools/form-fill.js +250 -0
  491. package/dist/tools/form-fill.js.map +1 -0
  492. package/dist/tools/health.d.ts +19 -0
  493. package/dist/tools/health.d.ts.map +1 -0
  494. package/dist/tools/health.js +229 -0
  495. package/dist/tools/health.js.map +1 -0
  496. package/dist/tools/images.d.ts +54 -0
  497. package/dist/tools/images.d.ts.map +1 -0
  498. package/dist/tools/images.js +787 -0
  499. package/dist/tools/images.js.map +1 -0
  500. package/dist/tools/ingestion.d.ts +94 -0
  501. package/dist/tools/ingestion.d.ts.map +1 -0
  502. package/dist/tools/ingestion.js +1659 -0
  503. package/dist/tools/ingestion.js.map +1 -0
  504. package/dist/tools/intelligence.d.ts +18 -0
  505. package/dist/tools/intelligence.d.ts.map +1 -0
  506. package/dist/tools/intelligence.js +1039 -0
  507. package/dist/tools/intelligence.js.map +1 -0
  508. package/dist/tools/provenance.d.ts +51 -0
  509. package/dist/tools/provenance.d.ts.map +1 -0
  510. package/dist/tools/provenance.js +691 -0
  511. package/dist/tools/provenance.js.map +1 -0
  512. package/dist/tools/reports.d.ts +41 -0
  513. package/dist/tools/reports.d.ts.map +1 -0
  514. package/dist/tools/reports.js +1394 -0
  515. package/dist/tools/reports.js.map +1 -0
  516. package/dist/tools/search.d.ts +35 -0
  517. package/dist/tools/search.d.ts.map +1 -0
  518. package/dist/tools/search.js +2528 -0
  519. package/dist/tools/search.js.map +1 -0
  520. package/dist/tools/shared.d.ts +52 -0
  521. package/dist/tools/shared.d.ts.map +1 -0
  522. package/dist/tools/shared.js +54 -0
  523. package/dist/tools/shared.js.map +1 -0
  524. package/dist/tools/tags.d.ts +15 -0
  525. package/dist/tools/tags.d.ts.map +1 -0
  526. package/dist/tools/tags.js +287 -0
  527. package/dist/tools/tags.js.map +1 -0
  528. package/dist/tools/timeline.d.ts +15 -0
  529. package/dist/tools/timeline.d.ts.map +1 -0
  530. package/dist/tools/timeline.js +14 -0
  531. package/dist/tools/timeline.js.map +1 -0
  532. package/dist/tools/users.d.ts +14 -0
  533. package/dist/tools/users.d.ts.map +1 -0
  534. package/dist/tools/users.js +257 -0
  535. package/dist/tools/users.js.map +1 -0
  536. package/dist/tools/vlm.d.ts +40 -0
  537. package/dist/tools/vlm.d.ts.map +1 -0
  538. package/dist/tools/vlm.js +475 -0
  539. package/dist/tools/vlm.js.map +1 -0
  540. package/dist/tools/workflow.d.ts +16 -0
  541. package/dist/tools/workflow.d.ts.map +1 -0
  542. package/dist/tools/workflow.js +495 -0
  543. package/dist/tools/workflow.js.map +1 -0
  544. package/dist/utils/backoff.d.ts +53 -0
  545. package/dist/utils/backoff.d.ts.map +1 -0
  546. package/dist/utils/backoff.js +78 -0
  547. package/dist/utils/backoff.js.map +1 -0
  548. package/dist/utils/config-persistence.d.ts +33 -0
  549. package/dist/utils/config-persistence.d.ts.map +1 -0
  550. package/dist/utils/config-persistence.js +61 -0
  551. package/dist/utils/config-persistence.js.map +1 -0
  552. package/dist/utils/hash.d.ts +65 -0
  553. package/dist/utils/hash.d.ts.map +1 -0
  554. package/dist/utils/hash.js +146 -0
  555. package/dist/utils/hash.js.map +1 -0
  556. package/dist/utils/math.d.ts +21 -0
  557. package/dist/utils/math.d.ts.map +1 -0
  558. package/dist/utils/math.js +39 -0
  559. package/dist/utils/math.js.map +1 -0
  560. package/dist/utils/validation.d.ts +697 -0
  561. package/dist/utils/validation.d.ts.map +1 -0
  562. package/dist/utils/validation.js +529 -0
  563. package/dist/utils/validation.js.map +1 -0
  564. package/package.json +96 -0
  565. package/python/.gitkeep +0 -0
  566. package/python/__init__.py +104 -0
  567. package/python/clustering_worker.py +440 -0
  568. package/python/docx_image_extractor.py +524 -0
  569. package/python/embedding_worker.py +552 -0
  570. package/python/file_manager_worker.py +564 -0
  571. package/python/form_fill_worker.py +399 -0
  572. package/python/gpu_utils.py +582 -0
  573. package/python/image_extractor.py +317 -0
  574. package/python/image_optimizer.py +444 -0
  575. package/python/ocr_worker.py +712 -0
  576. package/python/pyproject.toml +76 -0
  577. package/python/requirements.txt +51 -0
  578. package/python/reranker_worker.py +87 -0
@@ -0,0 +1,787 @@
1
+ /**
2
+ * Image Extraction and Management MCP Tools
3
+ *
4
+ * Tools for extracting images from PDFs and managing image records in the database.
5
+ * Uses PyMuPDF for extraction and integrates with VLM pipeline.
6
+ *
7
+ * CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
8
+ * Use console.error() for all logging.
9
+ *
10
+ * @module tools/images
11
+ */
12
+ import { z } from 'zod';
13
+ import * as fs from 'fs';
14
+ import { v4 as uuidv4 } from 'uuid';
15
+ import { requireDatabase } from '../server/state.js';
16
+ import { successResult } from '../server/types.js';
17
+ import { MCPError } from '../server/errors.js';
18
+ import { formatResponse, handleError, fetchProvenanceChain } from './shared.js';
19
+ import { validateInput } from '../utils/validation.js';
20
+ import { getImage, getImagesByDocument, getPendingImages, getImageStats, deleteImageCascade, deleteImagesByDocumentCascade, resetFailedImages, resetProcessingImages, updateImageVLMResult, } from '../services/storage/database/image-operations.js';
21
+ import { ProvenanceType } from '../models/provenance.js';
22
+ import { computeHash } from '../utils/hash.js';
23
+ import { getEmbeddingService } from '../services/embedding/embedder.js';
24
+ import { getVLMService } from '../services/vlm/service.js';
25
+ // ===============================================================================
26
+ // VALIDATION SCHEMAS
27
+ // ===============================================================================
28
+ const ImageListInput = z.object({
29
+ document_id: z.string().min(1),
30
+ include_descriptions: z.boolean().default(false),
31
+ vlm_status: z.enum(['pending', 'processing', 'complete', 'failed']).optional(),
32
+ });
33
+ const ImageGetInput = z.object({
34
+ image_id: z.string().min(1),
35
+ });
36
+ const ImageStatsInput = z.object({});
37
+ const ImageDeleteInput = z.object({
38
+ image_id: z.string().optional(),
39
+ document_id: z.string().optional(),
40
+ confirm: z.boolean().default(false),
41
+ delete_files: z.boolean().default(false),
42
+ });
43
+ const ImageResetFailedInput = z.object({
44
+ document_id: z.string().optional(),
45
+ });
46
+ const ImagePendingInput = z.object({
47
+ limit: z.number().int().min(1).max(1000).default(100),
48
+ });
49
+ const ImageSearchInput = z.object({
50
+ mode: z.enum(['keyword', 'semantic']).default('keyword'),
51
+ // keyword mode params
52
+ image_type: z.string().optional(),
53
+ block_type: z.string().optional(),
54
+ min_confidence: z.number().min(0).max(1).optional(),
55
+ document_id: z.string().optional(),
56
+ exclude_headers_footers: z.boolean().default(false),
57
+ page_number: z.number().int().min(1).optional(),
58
+ vlm_description_query: z.string().optional(),
59
+ // semantic mode params
60
+ query: z.string().optional(),
61
+ document_filter: z.array(z.string().min(1)).optional(),
62
+ similarity_threshold: z.number().min(0).max(1).default(0.5),
63
+ include_provenance: z.boolean().default(false),
64
+ // shared
65
+ limit: z.number().int().min(1).max(100).default(50),
66
+ });
67
+ const ImageReanalyzeInput = z.object({
68
+ image_id: z.string().min(1),
69
+ custom_prompt: z.string().optional(),
70
+ use_thinking: z.boolean().default(false),
71
+ });
72
+ // ═══════════════════════════════════════════════════════════════════════════════
73
+ // IMAGE TOOL HANDLERS
74
+ // ═══════════════════════════════════════════════════════════════════════════════
75
+ /**
76
+ * Handle ocr_image_list - List all images in a document
77
+ */
78
+ export async function handleImageList(params) {
79
+ try {
80
+ const input = validateInput(ImageListInput, params);
81
+ const documentId = input.document_id;
82
+ const includeDescriptions = input.include_descriptions ?? false;
83
+ const vlmStatusFilter = input.vlm_status;
84
+ const { db } = requireDatabase();
85
+ // Verify document exists
86
+ const doc = db.getDocument(documentId);
87
+ if (!doc) {
88
+ throw new MCPError('DOCUMENT_NOT_FOUND', `Document not found: ${documentId}`, {
89
+ document_id: documentId,
90
+ });
91
+ }
92
+ const images = getImagesByDocument(db.getConnection(), documentId, vlmStatusFilter ? { vlmStatus: vlmStatusFilter } : undefined);
93
+ return formatResponse(successResult({
94
+ document_id: documentId,
95
+ count: images.length,
96
+ images: images.map((img) => ({
97
+ id: img.id,
98
+ page: img.page_number,
99
+ index: img.image_index,
100
+ format: img.format,
101
+ dimensions: img.dimensions,
102
+ vlm_status: img.vlm_status,
103
+ has_vlm: img.vlm_status === 'complete',
104
+ confidence: img.vlm_confidence,
105
+ ...(includeDescriptions &&
106
+ img.vlm_description && {
107
+ description: img.vlm_description,
108
+ }),
109
+ })),
110
+ next_steps: images.length === 0
111
+ ? [
112
+ { tool: 'ocr_extract_images', description: 'Extract images from documents first' },
113
+ { tool: 'ocr_document_get', description: 'Check document processing status' },
114
+ ]
115
+ : [
116
+ { tool: 'ocr_image_get', description: 'Get full details for a specific image' },
117
+ { tool: 'ocr_vlm_process', description: 'Run VLM analysis on document images' },
118
+ ],
119
+ }));
120
+ }
121
+ catch (error) {
122
+ return handleError(error);
123
+ }
124
+ }
125
+ /**
126
+ * Handle ocr_image_get - Get details of a specific image
127
+ */
128
+ export async function handleImageGet(params) {
129
+ try {
130
+ const input = validateInput(ImageGetInput, params);
131
+ const imageId = input.image_id;
132
+ const { db } = requireDatabase();
133
+ const img = getImage(db.getConnection(), imageId);
134
+ if (!img) {
135
+ throw new MCPError('VALIDATION_ERROR', `Image not found: ${imageId}`, { image_id: imageId });
136
+ }
137
+ const responseData = {
138
+ image: {
139
+ id: img.id,
140
+ document_id: img.document_id,
141
+ ocr_result_id: img.ocr_result_id,
142
+ page: img.page_number,
143
+ index: img.image_index,
144
+ format: img.format,
145
+ dimensions: img.dimensions,
146
+ bounding_box: img.bounding_box,
147
+ path: img.extracted_path,
148
+ file_size: img.file_size,
149
+ vlm_status: img.vlm_status,
150
+ vlm: img.vlm_status === 'complete'
151
+ ? {
152
+ description: img.vlm_description,
153
+ structured_data: img.vlm_structured_data,
154
+ model: img.vlm_model,
155
+ confidence: img.vlm_confidence,
156
+ tokens_used: img.vlm_tokens_used,
157
+ processed_at: img.vlm_processed_at,
158
+ embedding_id: img.vlm_embedding_id,
159
+ }
160
+ : null,
161
+ error_message: img.error_message,
162
+ created_at: img.created_at,
163
+ },
164
+ next_steps: [
165
+ { tool: 'ocr_image_search', description: 'Find similar images (mode=semantic for meaning-based)' },
166
+ { tool: 'ocr_image_reanalyze', description: 'Re-run VLM analysis with custom prompt' },
167
+ { tool: 'ocr_document_page', description: 'View the page containing this image' },
168
+ ],
169
+ };
170
+ return formatResponse(successResult(responseData));
171
+ }
172
+ catch (error) {
173
+ return handleError(error);
174
+ }
175
+ }
176
+ /**
177
+ * Handle ocr_image_stats - Get image processing statistics
178
+ */
179
+ export async function handleImageStats(params) {
180
+ try {
181
+ validateInput(ImageStatsInput, params);
182
+ const { db } = requireDatabase();
183
+ const conn = db.getConnection();
184
+ const stats = getImageStats(conn);
185
+ return formatResponse(successResult({
186
+ stats: {
187
+ total: stats.total,
188
+ processed: stats.processed,
189
+ pending: stats.pending,
190
+ processing: stats.processing,
191
+ failed: stats.failed,
192
+ processing_rate: stats.total > 0 ? ((stats.processed / stats.total) * 100).toFixed(1) + '%' : '0%',
193
+ },
194
+ next_steps: [
195
+ { tool: 'ocr_vlm_process', description: 'Process pending VLM images' },
196
+ { tool: 'ocr_image_pending', description: 'List images awaiting processing' },
197
+ { tool: 'ocr_image_search', description: 'Search images by type or filter' },
198
+ ],
199
+ }));
200
+ }
201
+ catch (error) {
202
+ return handleError(error);
203
+ }
204
+ }
205
+ /**
206
+ * Handle ocr_image_delete - Delete images by image_id (single) or document_id (all for document)
207
+ *
208
+ * Must provide exactly one of image_id or document_id. Requires confirm=true.
209
+ */
210
+ export async function handleImageDelete(params) {
211
+ try {
212
+ const input = validateInput(ImageDeleteInput, params);
213
+ const { image_id: imageId, document_id: documentId, confirm, delete_files: deleteFiles } = input;
214
+ if (!imageId && !documentId) {
215
+ throw new MCPError('VALIDATION_ERROR', 'Must provide either image_id or document_id', {});
216
+ }
217
+ if (imageId && documentId) {
218
+ throw new MCPError('VALIDATION_ERROR', 'Provide only one of image_id or document_id, not both', {});
219
+ }
220
+ if (!confirm) {
221
+ throw new MCPError('VALIDATION_ERROR', 'Destructive operation requires confirm=true', {});
222
+ }
223
+ const { db } = requireDatabase();
224
+ if (imageId) {
225
+ // ── Single image delete ──
226
+ const img = getImage(db.getConnection(), imageId);
227
+ if (!img) {
228
+ throw new MCPError('VALIDATION_ERROR', `Image not found: ${imageId}`, { image_id: imageId });
229
+ }
230
+ if (deleteFiles && img.extracted_path && fs.existsSync(img.extracted_path)) {
231
+ fs.unlinkSync(img.extracted_path);
232
+ }
233
+ deleteImageCascade(db.getConnection(), imageId);
234
+ return formatResponse(successResult({
235
+ mode: 'single',
236
+ image_id: imageId,
237
+ deleted: true,
238
+ file_deleted: !!(deleteFiles && img.extracted_path),
239
+ next_steps: [
240
+ { tool: 'ocr_image_list', description: 'List remaining images for the document' },
241
+ ],
242
+ }));
243
+ }
244
+ else {
245
+ // ── Delete all images for document ──
246
+ let filesDeleted = 0;
247
+ if (deleteFiles) {
248
+ const images = getImagesByDocument(db.getConnection(), documentId);
249
+ for (const img of images) {
250
+ if (img.extracted_path && fs.existsSync(img.extracted_path)) {
251
+ fs.unlinkSync(img.extracted_path);
252
+ filesDeleted++;
253
+ }
254
+ }
255
+ }
256
+ const count = deleteImagesByDocumentCascade(db.getConnection(), documentId);
257
+ return formatResponse(successResult({
258
+ mode: 'document',
259
+ document_id: documentId,
260
+ images_deleted: count,
261
+ files_deleted: filesDeleted,
262
+ next_steps: [
263
+ { tool: 'ocr_extract_images', description: 'Re-extract images for the document' },
264
+ { tool: 'ocr_document_get', description: 'View the document after image cleanup' },
265
+ ],
266
+ }));
267
+ }
268
+ }
269
+ catch (error) {
270
+ return handleError(error);
271
+ }
272
+ }
273
+ /**
274
+ * Handle ocr_image_reset_failed - Reset failed and stuck processing images to pending status
275
+ */
276
+ export async function handleImageResetFailed(params) {
277
+ try {
278
+ const input = validateInput(ImageResetFailedInput, params);
279
+ const documentId = input.document_id;
280
+ const { db } = requireDatabase();
281
+ const failedCount = resetFailedImages(db.getConnection(), documentId);
282
+ const processingCount = resetProcessingImages(db.getConnection(), documentId);
283
+ return formatResponse(successResult({
284
+ document_id: documentId ?? 'all',
285
+ images_reset: failedCount + processingCount,
286
+ failed_reset: failedCount,
287
+ processing_reset: processingCount,
288
+ next_steps: [
289
+ { tool: 'ocr_vlm_process', description: 'Process the reset images' },
290
+ { tool: 'ocr_image_pending', description: 'Check pending images after reset' },
291
+ ],
292
+ }));
293
+ }
294
+ catch (error) {
295
+ return handleError(error);
296
+ }
297
+ }
298
+ /**
299
+ * Handle ocr_image_search - Search images by keyword filters or semantic similarity
300
+ *
301
+ * mode=keyword: SQL LIKE search on VLM descriptions and metadata filters
302
+ * mode=semantic: Vector similarity search on VLM embeddings
303
+ */
304
+ export async function handleImageSearch(params) {
305
+ try {
306
+ const input = validateInput(ImageSearchInput, params);
307
+ const mode = input.mode ?? 'keyword';
308
+ if (mode === 'semantic') {
309
+ // ── Semantic search mode ──
310
+ if (!input.query) {
311
+ throw new MCPError('VALIDATION_ERROR', 'query is required for mode=semantic', {});
312
+ }
313
+ const { db, vector } = requireDatabase();
314
+ const embeddingService = getEmbeddingService();
315
+ const queryVector = await embeddingService.embedSearchQuery(input.query);
316
+ const limit = input.limit ?? 10;
317
+ const searchResults = vector.searchSimilar(queryVector, {
318
+ limit: limit * 3,
319
+ threshold: input.similarity_threshold,
320
+ documentFilter: input.document_filter,
321
+ });
322
+ const vlmResults = searchResults.filter(r => r.image_id !== null);
323
+ const results = [];
324
+ for (const r of vlmResults) {
325
+ if (results.length >= limit)
326
+ break;
327
+ const img = getImage(db.getConnection(), r.image_id);
328
+ if (!img)
329
+ continue;
330
+ const doc = db.getDocument(r.document_id);
331
+ const result = {
332
+ image_id: img.id,
333
+ document_id: img.document_id,
334
+ document_file_path: doc?.file_path ?? null,
335
+ document_file_name: doc?.file_name ?? null,
336
+ extracted_path: img.extracted_path,
337
+ page_number: img.page_number,
338
+ image_index: img.image_index,
339
+ format: img.format,
340
+ dimensions: img.dimensions,
341
+ block_type: img.block_type,
342
+ vlm_description: img.vlm_description,
343
+ vlm_confidence: img.vlm_confidence,
344
+ similarity_score: r.similarity_score,
345
+ embedding_id: r.embedding_id,
346
+ };
347
+ if (img.vlm_structured_data) {
348
+ const structured = img.vlm_structured_data;
349
+ result.image_type = structured.imageType ?? null;
350
+ result.vlm_extracted_text = structured.extractedText ?? [];
351
+ result.vlm_dates = structured.dates ?? [];
352
+ result.vlm_names = structured.names ?? [];
353
+ result.vlm_numbers = structured.numbers ?? [];
354
+ result.vlm_primary_subject = structured.primarySubject ?? null;
355
+ }
356
+ if (input.include_provenance && img.provenance_id) {
357
+ result.provenance_chain = fetchProvenanceChain(db, img.provenance_id, '[image_search_semantic]');
358
+ }
359
+ results.push(result);
360
+ }
361
+ return formatResponse(successResult({
362
+ mode: 'semantic',
363
+ query: input.query,
364
+ total: results.length,
365
+ similarity_threshold: input.similarity_threshold,
366
+ results,
367
+ next_steps: [
368
+ { tool: 'ocr_image_get', description: 'Get full details for a matched image' },
369
+ { tool: 'ocr_document_page', description: 'View the page containing a matched image' },
370
+ ],
371
+ }));
372
+ }
373
+ else {
374
+ // ── Keyword search mode ──
375
+ const { db } = requireDatabase();
376
+ const conn = db.getConnection();
377
+ let sql = `SELECT id, document_id, page_number, image_index, format, width, height,
378
+ vlm_confidence, vlm_description, vlm_structured_data, block_type,
379
+ is_header_footer, extracted_path, file_size
380
+ FROM images WHERE vlm_status = 'complete'`;
381
+ const sqlParams = [];
382
+ if (input.image_type) {
383
+ sql += ` AND json_extract(vlm_structured_data, '$.imageType') = ?`;
384
+ sqlParams.push(input.image_type);
385
+ }
386
+ if (input.block_type) {
387
+ sql += ` AND block_type = ?`;
388
+ sqlParams.push(input.block_type);
389
+ }
390
+ if (input.min_confidence !== undefined) {
391
+ sql += ` AND vlm_confidence >= ?`;
392
+ sqlParams.push(input.min_confidence);
393
+ }
394
+ if (input.document_id) {
395
+ sql += ` AND document_id = ?`;
396
+ sqlParams.push(input.document_id);
397
+ }
398
+ if (input.exclude_headers_footers) {
399
+ sql += ` AND is_header_footer = 0`;
400
+ }
401
+ if (input.page_number !== undefined) {
402
+ sql += ` AND page_number = ?`;
403
+ sqlParams.push(input.page_number);
404
+ }
405
+ if (input.vlm_description_query) {
406
+ sql += ` AND vlm_description LIKE '%' || ? || '%'`;
407
+ sqlParams.push(input.vlm_description_query);
408
+ }
409
+ sql += ` ORDER BY document_id, page_number, image_index LIMIT ?`;
410
+ sqlParams.push(input.limit);
411
+ const rows = conn.prepare(sql).all(...sqlParams);
412
+ const results = rows.map(r => {
413
+ let structured = null;
414
+ if (r.vlm_structured_data) {
415
+ try {
416
+ structured = JSON.parse(r.vlm_structured_data);
417
+ }
418
+ catch (error) {
419
+ console.error(`[images] Failed to parse vlm_structured_data for image ${r.id}: ${error instanceof Error ? error.message : String(error)}`);
420
+ }
421
+ }
422
+ const base = {
423
+ id: r.id,
424
+ document_id: r.document_id,
425
+ page_number: r.page_number,
426
+ image_index: r.image_index,
427
+ format: r.format,
428
+ dimensions: { width: r.width, height: r.height },
429
+ vlm_confidence: r.vlm_confidence,
430
+ vlm_description: r.vlm_description,
431
+ vlm_structured_data: structured,
432
+ block_type: r.block_type,
433
+ is_header_footer: r.is_header_footer === 1,
434
+ extracted_path: r.extracted_path,
435
+ file_size: r.file_size,
436
+ };
437
+ if (structured) {
438
+ base.image_type = structured.imageType ?? null;
439
+ base.vlm_extracted_text = structured.extractedText ?? [];
440
+ base.vlm_dates = structured.dates ?? [];
441
+ base.vlm_names = structured.names ?? [];
442
+ base.vlm_numbers = structured.numbers ?? [];
443
+ base.vlm_primary_subject = structured.primarySubject ?? null;
444
+ }
445
+ return base;
446
+ });
447
+ const typeCounts = {};
448
+ for (const r of results) {
449
+ const type = r.image_type || 'unknown';
450
+ typeCounts[type] = (typeCounts[type] || 0) + 1;
451
+ }
452
+ return formatResponse(successResult({
453
+ mode: 'keyword',
454
+ images: results,
455
+ total: results.length,
456
+ type_distribution: typeCounts,
457
+ next_steps: [
458
+ { tool: 'ocr_image_get', description: 'Get full details for a specific image' },
459
+ { tool: 'ocr_image_search', description: 'Try mode=semantic for meaning-based search' },
460
+ ],
461
+ }));
462
+ }
463
+ }
464
+ catch (error) {
465
+ return handleError(error);
466
+ }
467
+ }
468
+ /**
469
+ * Handle ocr_image_pending - Get images pending VLM processing
470
+ */
471
+ export async function handleImagePending(params) {
472
+ try {
473
+ const input = validateInput(ImagePendingInput, params);
474
+ const limit = input.limit ?? 100;
475
+ const { db } = requireDatabase();
476
+ const images = getPendingImages(db.getConnection(), limit);
477
+ return formatResponse(successResult({
478
+ count: images.length,
479
+ limit,
480
+ images: images.map((img) => ({
481
+ id: img.id,
482
+ document_id: img.document_id,
483
+ page: img.page_number,
484
+ index: img.image_index,
485
+ format: img.format,
486
+ path: img.extracted_path,
487
+ created_at: img.created_at,
488
+ })),
489
+ next_steps: [
490
+ { tool: 'ocr_vlm_process', description: 'Process all pending VLM images' },
491
+ { tool: 'ocr_vlm_process', description: 'Process images for a specific document' },
492
+ ],
493
+ }));
494
+ }
495
+ catch (error) {
496
+ return handleError(error);
497
+ }
498
+ }
499
+ /**
500
+ * Handle ocr_image_reanalyze - Re-run VLM analysis on a specific image with optional custom prompt
501
+ */
502
+ export async function handleImageReanalyze(params) {
503
+ try {
504
+ const input = validateInput(ImageReanalyzeInput, params);
505
+ const { db, vector } = requireDatabase();
506
+ const conn = db.getConnection();
507
+ // Get image record
508
+ const img = getImage(conn, input.image_id);
509
+ if (!img) {
510
+ throw new MCPError('VALIDATION_ERROR', `Image not found: ${input.image_id}`, { image_id: input.image_id });
511
+ }
512
+ // Verify extracted_path exists on disk
513
+ if (!img.extracted_path || !fs.existsSync(img.extracted_path)) {
514
+ throw new MCPError('PATH_NOT_FOUND', `Image file not found on disk: ${img.extracted_path ?? '(no path)'}`, {
515
+ image_id: input.image_id,
516
+ extracted_path: img.extracted_path,
517
+ });
518
+ }
519
+ // Store previous description
520
+ const previousDescription = img.vlm_description;
521
+ // Run VLM analysis
522
+ const vlm = getVLMService();
523
+ const startMs = Date.now();
524
+ let vlmResult;
525
+ if (input.use_thinking) {
526
+ vlmResult = await vlm.analyzeDeep(img.extracted_path);
527
+ }
528
+ else if (input.custom_prompt) {
529
+ // Use describeImage with context as a way to inject custom prompt context
530
+ vlmResult = await vlm.describeImage(img.extracted_path, {
531
+ contextText: input.custom_prompt,
532
+ highResolution: true,
533
+ });
534
+ }
535
+ else {
536
+ vlmResult = await vlm.describeImage(img.extracted_path, {
537
+ highResolution: true,
538
+ });
539
+ }
540
+ const processingDurationMs = Date.now() - startMs;
541
+ // Generate new embedding for the VLM description
542
+ const { getEmbeddingClient, MODEL_NAME: EMBEDDING_MODEL } = await import('../services/embedding/nomic.js');
543
+ const embeddingClient = getEmbeddingClient();
544
+ const vectors = await embeddingClient.embedChunks([vlmResult.description], 1);
545
+ if (vectors.length === 0) {
546
+ throw new MCPError('EMBEDDING_FAILED', 'Failed to generate embedding for VLM description', {
547
+ image_id: input.image_id,
548
+ });
549
+ }
550
+ const embId = uuidv4();
551
+ const now = new Date().toISOString();
552
+ const descriptionHash = computeHash(vlmResult.description);
553
+ // Build provenance chain
554
+ let vlmDescProvId = null;
555
+ let embProvId = null;
556
+ if (img.provenance_id) {
557
+ const imageProv = db.getProvenance(img.provenance_id);
558
+ if (imageProv) {
559
+ const imageParentIds = JSON.parse(imageProv.parent_ids);
560
+ // Create VLM_DESCRIPTION provenance (depth 3)
561
+ vlmDescProvId = uuidv4();
562
+ const vlmParentIds = [...imageParentIds, img.provenance_id];
563
+ db.insertProvenance({
564
+ id: vlmDescProvId,
565
+ type: ProvenanceType.VLM_DESCRIPTION,
566
+ created_at: now,
567
+ processed_at: now,
568
+ source_file_created_at: null,
569
+ source_file_modified_at: null,
570
+ source_type: 'VLM',
571
+ source_path: img.extracted_path,
572
+ source_id: img.provenance_id,
573
+ root_document_id: imageProv.root_document_id,
574
+ location: {
575
+ page_number: img.page_number,
576
+ chunk_index: img.image_index,
577
+ },
578
+ content_hash: descriptionHash,
579
+ input_hash: imageProv.content_hash,
580
+ file_hash: imageProv.file_hash,
581
+ processor: 'gemini-vlm:reanalyze',
582
+ processor_version: '3.0',
583
+ processing_params: {
584
+ type: 'vlm_reanalyze',
585
+ use_thinking: input.use_thinking,
586
+ custom_prompt: !!input.custom_prompt,
587
+ },
588
+ processing_duration_ms: processingDurationMs,
589
+ processing_quality_score: vlmResult.analysis?.confidence ?? null,
590
+ parent_id: img.provenance_id,
591
+ parent_ids: JSON.stringify(vlmParentIds),
592
+ chain_depth: 3,
593
+ chain_path: JSON.stringify(['DOCUMENT', 'OCR_RESULT', 'IMAGE', 'VLM_DESCRIPTION']),
594
+ });
595
+ // Create EMBEDDING provenance (depth 4)
596
+ embProvId = uuidv4();
597
+ const embParentIds = [...vlmParentIds, vlmDescProvId];
598
+ db.insertProvenance({
599
+ id: embProvId,
600
+ type: ProvenanceType.EMBEDDING,
601
+ created_at: now,
602
+ processed_at: now,
603
+ source_file_created_at: null,
604
+ source_file_modified_at: null,
605
+ source_type: 'EMBEDDING',
606
+ source_path: null,
607
+ source_id: vlmDescProvId,
608
+ root_document_id: imageProv.root_document_id,
609
+ location: {
610
+ page_number: img.page_number,
611
+ chunk_index: img.image_index,
612
+ },
613
+ content_hash: descriptionHash,
614
+ input_hash: descriptionHash,
615
+ file_hash: imageProv.file_hash,
616
+ processor: EMBEDDING_MODEL,
617
+ processor_version: '1.5.0',
618
+ processing_params: { task_type: 'search_document', dimensions: 768 },
619
+ processing_duration_ms: null,
620
+ processing_quality_score: null,
621
+ parent_id: vlmDescProvId,
622
+ parent_ids: JSON.stringify(embParentIds),
623
+ chain_depth: 4,
624
+ chain_path: JSON.stringify(['DOCUMENT', 'OCR_RESULT', 'IMAGE', 'VLM_DESCRIPTION', 'EMBEDDING']),
625
+ });
626
+ }
627
+ }
628
+ // Insert embedding record
629
+ db.insertEmbedding({
630
+ id: embId,
631
+ chunk_id: null,
632
+ image_id: img.id,
633
+ extraction_id: null,
634
+ document_id: img.document_id,
635
+ original_text: vlmResult.description,
636
+ original_text_length: vlmResult.description.length,
637
+ source_file_path: img.extracted_path ?? 'unknown',
638
+ source_file_name: img.extracted_path?.split('/').pop() ?? 'vlm_description',
639
+ source_file_hash: 'vlm_generated',
640
+ page_number: img.page_number,
641
+ page_range: null,
642
+ character_start: 0,
643
+ character_end: vlmResult.description.length,
644
+ chunk_index: img.image_index,
645
+ total_chunks: 1,
646
+ model_name: EMBEDDING_MODEL,
647
+ model_version: '1.5.0',
648
+ task_type: 'search_document',
649
+ inference_mode: 'local',
650
+ gpu_device: 'cuda:0',
651
+ provenance_id: embProvId ?? uuidv4(),
652
+ content_hash: descriptionHash,
653
+ generation_duration_ms: null,
654
+ });
655
+ // Store vector
656
+ vector.storeVector(embId, vectors[0]);
657
+ // Update image record with new VLM results
658
+ updateImageVLMResult(conn, img.id, {
659
+ description: vlmResult.description,
660
+ structuredData: { ...vlmResult.analysis, imageType: vlmResult.analysis?.imageType ?? 'unknown' },
661
+ embeddingId: embId,
662
+ model: vlmResult.model,
663
+ confidence: vlmResult.analysis?.confidence ?? 0,
664
+ tokensUsed: vlmResult.tokensUsed,
665
+ });
666
+ return formatResponse(successResult({
667
+ image_id: img.id,
668
+ extracted_path: img.extracted_path,
669
+ previous_description: previousDescription,
670
+ new_description: vlmResult.description,
671
+ new_confidence: vlmResult.analysis?.confidence ?? null,
672
+ new_embedding_id: embId,
673
+ provenance_id: vlmDescProvId,
674
+ processing_time_ms: processingDurationMs,
675
+ tokens_used: vlmResult.tokensUsed,
676
+ next_steps: [
677
+ { tool: 'ocr_image_get', description: 'View the updated image details' },
678
+ { tool: 'ocr_image_search', description: 'Search for similar images (mode=semantic for meaning-based)' },
679
+ ],
680
+ }));
681
+ }
682
+ catch (error) {
683
+ return handleError(error);
684
+ }
685
+ }
686
+ // ═══════════════════════════════════════════════════════════════════════════════
687
+ // TOOL DEFINITIONS FOR MCP REGISTRATION
688
+ // ═══════════════════════════════════════════════════════════════════════════════
689
+ /**
690
+ * Image tools collection for MCP server registration
691
+ */
692
+ export const imageTools = {
693
+ ocr_image_list: {
694
+ description: '[ANALYSIS] Use to list all images from a document with optional VLM status filter. Returns image metadata and optionally descriptions.',
695
+ inputSchema: {
696
+ document_id: z.string().min(1).describe('Document ID'),
697
+ include_descriptions: z.boolean().default(false).describe('Include VLM descriptions'),
698
+ vlm_status: z
699
+ .enum(['pending', 'processing', 'complete', 'failed'])
700
+ .optional()
701
+ .describe('Filter by VLM status'),
702
+ },
703
+ handler: handleImageList,
704
+ },
705
+ ocr_image_get: {
706
+ description: '[ANALYSIS] Use to get full details for a single image (path, dimensions, VLM description, confidence, provenance). Returns complete image record.',
707
+ inputSchema: {
708
+ image_id: z.string().min(1).describe('Image ID'),
709
+ },
710
+ handler: handleImageGet,
711
+ },
712
+ ocr_image_stats: {
713
+ description: '[STATUS] Use to get image processing statistics (total, by status, by type). Returns aggregate counts across all documents.',
714
+ inputSchema: {},
715
+ handler: handleImageStats,
716
+ },
717
+ ocr_image_delete: {
718
+ description: '[DESTRUCTIVE] Use to delete images. Pass image_id for one image, or document_id for all document images. Requires confirm=true.',
719
+ inputSchema: {
720
+ image_id: z.string().optional().describe('Image ID (for single image delete)'),
721
+ document_id: z.string().optional().describe('Document ID (to delete all images for document)'),
722
+ confirm: z.boolean().default(false).describe('Must be true to confirm deletion'),
723
+ delete_files: z.boolean().default(false).describe('Also delete the extracted image files from disk'),
724
+ },
725
+ handler: handleImageDelete,
726
+ },
727
+ ocr_image_reset_failed: {
728
+ description: '[PROCESSING] Use to reset failed VLM images back to pending for retry. Returns reset count. Follow with ocr_vlm_process.',
729
+ inputSchema: {
730
+ document_id: z.string().optional().describe('Document ID (omit for all documents)'),
731
+ },
732
+ handler: handleImageResetFailed,
733
+ },
734
+ ocr_image_pending: {
735
+ description: '[STATUS] Use to list images that still need VLM processing. Returns pending image IDs and metadata. Check before running ocr_vlm_process.',
736
+ inputSchema: {
737
+ limit: z.number().int().min(1).max(1000).default(100).describe('Maximum images to return'),
738
+ },
739
+ handler: handleImagePending,
740
+ },
741
+ ocr_image_search: {
742
+ description: '[SEARCH] Use to find images by keyword in descriptions (mode=keyword) or by semantic similarity (mode=semantic). Returns image metadata with VLM data.',
743
+ inputSchema: {
744
+ mode: z.enum(['keyword', 'semantic']).default('keyword')
745
+ .describe('Search mode: keyword for SQL filters, semantic for vector similarity'),
746
+ // keyword mode params
747
+ image_type: z.string().optional()
748
+ .describe('Filter by VLM image type (keyword mode, e.g., "chart", "diagram", "photograph")'),
749
+ block_type: z.string().optional()
750
+ .describe('Filter by Datalab block type (keyword mode)'),
751
+ min_confidence: z.number().min(0).max(1).optional()
752
+ .describe('Minimum VLM confidence score (keyword mode)'),
753
+ document_id: z.string().optional()
754
+ .describe('Filter to specific document (keyword mode)'),
755
+ exclude_headers_footers: z.boolean().default(false)
756
+ .describe('Exclude header/footer images (keyword mode)'),
757
+ page_number: z.number().int().min(1).optional()
758
+ .describe('Filter to specific page (keyword mode)'),
759
+ vlm_description_query: z.string().optional()
760
+ .describe('Filter by VLM description text LIKE match (keyword mode)'),
761
+ // semantic mode params
762
+ query: z.string().optional()
763
+ .describe('Search query (required for semantic mode)'),
764
+ document_filter: z.array(z.string().min(1)).optional()
765
+ .describe('Filter to specific document IDs (semantic mode)'),
766
+ similarity_threshold: z.number().min(0).max(1).default(0.5)
767
+ .describe('Minimum similarity score (semantic mode)'),
768
+ include_provenance: z.boolean().default(false)
769
+ .describe('Include provenance chain (semantic mode)'),
770
+ // shared
771
+ limit: z.number().int().min(1).max(100).default(50).describe('Maximum results'),
772
+ },
773
+ handler: handleImageSearch,
774
+ },
775
+ ocr_image_reanalyze: {
776
+ description: '[PROCESSING] Use to re-run VLM analysis on a specific image with optional custom prompt. Returns new description while preserving audit trail.',
777
+ inputSchema: {
778
+ image_id: z.string().min(1).describe('Image ID to reanalyze'),
779
+ custom_prompt: z.string().optional()
780
+ .describe('Custom context/prompt for the VLM analysis'),
781
+ use_thinking: z.boolean().default(false)
782
+ .describe('Use extended reasoning (thinking mode) for deeper analysis'),
783
+ },
784
+ handler: handleImageReanalyze,
785
+ },
786
+ };
787
+ //# sourceMappingURL=images.js.map