ocr-provenance-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocr-provenance-mcp might be problematic. Click here for more details.

Files changed (578) hide show
  1. package/.env.example +55 -0
  2. package/LICENSE +78 -0
  3. package/README.md +1154 -0
  4. package/dist/bin-http.d.ts +24 -0
  5. package/dist/bin-http.d.ts.map +1 -0
  6. package/dist/bin-http.js +275 -0
  7. package/dist/bin-http.js.map +1 -0
  8. package/dist/bin-setup.d.ts +11 -0
  9. package/dist/bin-setup.d.ts.map +1 -0
  10. package/dist/bin-setup.js +610 -0
  11. package/dist/bin-setup.js.map +1 -0
  12. package/dist/bin.d.ts +16 -0
  13. package/dist/bin.d.ts.map +1 -0
  14. package/dist/bin.js +16 -0
  15. package/dist/bin.js.map +1 -0
  16. package/dist/index.d.ts +13 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +90 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/models/chunk.d.ts +136 -0
  21. package/dist/models/chunk.d.ts.map +1 -0
  22. package/dist/models/chunk.js +27 -0
  23. package/dist/models/chunk.js.map +1 -0
  24. package/dist/models/cluster.d.ts +79 -0
  25. package/dist/models/cluster.d.ts.map +1 -0
  26. package/dist/models/cluster.js +10 -0
  27. package/dist/models/cluster.js.map +1 -0
  28. package/dist/models/comparison.d.ts +62 -0
  29. package/dist/models/comparison.d.ts.map +1 -0
  30. package/dist/models/comparison.js +8 -0
  31. package/dist/models/comparison.js.map +1 -0
  32. package/dist/models/document.d.ts +104 -0
  33. package/dist/models/document.d.ts.map +1 -0
  34. package/dist/models/document.js +15 -0
  35. package/dist/models/document.js.map +1 -0
  36. package/dist/models/embedding.d.ts +87 -0
  37. package/dist/models/embedding.d.ts.map +1 -0
  38. package/dist/models/embedding.js +23 -0
  39. package/dist/models/embedding.js.map +1 -0
  40. package/dist/models/extraction.d.ts +15 -0
  41. package/dist/models/extraction.d.ts.map +1 -0
  42. package/dist/models/extraction.js +2 -0
  43. package/dist/models/extraction.js.map +1 -0
  44. package/dist/models/form-fill.d.ts +23 -0
  45. package/dist/models/form-fill.d.ts.map +1 -0
  46. package/dist/models/form-fill.js +2 -0
  47. package/dist/models/form-fill.js.map +1 -0
  48. package/dist/models/image.d.ts +177 -0
  49. package/dist/models/image.d.ts.map +1 -0
  50. package/dist/models/image.js +8 -0
  51. package/dist/models/image.js.map +1 -0
  52. package/dist/models/index.d.ts +14 -0
  53. package/dist/models/index.d.ts.map +1 -0
  54. package/dist/models/index.js +22 -0
  55. package/dist/models/index.js.map +1 -0
  56. package/dist/models/provenance.d.ts +174 -0
  57. package/dist/models/provenance.d.ts.map +1 -0
  58. package/dist/models/provenance.js +53 -0
  59. package/dist/models/provenance.js.map +1 -0
  60. package/dist/models/uploaded-file.d.ts +20 -0
  61. package/dist/models/uploaded-file.d.ts.map +1 -0
  62. package/dist/models/uploaded-file.js +2 -0
  63. package/dist/models/uploaded-file.js.map +1 -0
  64. package/dist/server/errors.d.ts +93 -0
  65. package/dist/server/errors.d.ts.map +1 -0
  66. package/dist/server/errors.js +256 -0
  67. package/dist/server/errors.js.map +1 -0
  68. package/dist/server/events.d.ts +36 -0
  69. package/dist/server/events.d.ts.map +1 -0
  70. package/dist/server/events.js +48 -0
  71. package/dist/server/events.js.map +1 -0
  72. package/dist/server/permissions.d.ts +26 -0
  73. package/dist/server/permissions.d.ts.map +1 -0
  74. package/dist/server/permissions.js +194 -0
  75. package/dist/server/permissions.js.map +1 -0
  76. package/dist/server/register-tools.d.ts +25 -0
  77. package/dist/server/register-tools.d.ts.map +1 -0
  78. package/dist/server/register-tools.js +102 -0
  79. package/dist/server/register-tools.js.map +1 -0
  80. package/dist/server/startup.d.ts +16 -0
  81. package/dist/server/startup.d.ts.map +1 -0
  82. package/dist/server/startup.js +37 -0
  83. package/dist/server/startup.js.map +1 -0
  84. package/dist/server/state.d.ts +166 -0
  85. package/dist/server/state.d.ts.map +1 -0
  86. package/dist/server/state.js +424 -0
  87. package/dist/server/state.js.map +1 -0
  88. package/dist/server/transports/http-transport.d.ts +37 -0
  89. package/dist/server/transports/http-transport.d.ts.map +1 -0
  90. package/dist/server/transports/http-transport.js +204 -0
  91. package/dist/server/transports/http-transport.js.map +1 -0
  92. package/dist/server/transports/index.d.ts +9 -0
  93. package/dist/server/transports/index.d.ts.map +1 -0
  94. package/dist/server/transports/index.js +9 -0
  95. package/dist/server/transports/index.js.map +1 -0
  96. package/dist/server/transports/session-manager.d.ts +40 -0
  97. package/dist/server/transports/session-manager.d.ts.map +1 -0
  98. package/dist/server/transports/session-manager.js +74 -0
  99. package/dist/server/transports/session-manager.js.map +1 -0
  100. package/dist/server/types.d.ts +82 -0
  101. package/dist/server/types.d.ts.map +1 -0
  102. package/dist/server/types.js +14 -0
  103. package/dist/server/types.js.map +1 -0
  104. package/dist/services/audit.d.ts +26 -0
  105. package/dist/services/audit.d.ts.map +1 -0
  106. package/dist/services/audit.js +43 -0
  107. package/dist/services/audit.js.map +1 -0
  108. package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
  109. package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
  110. package/dist/services/chunking/chunk-deduplicator.js +46 -0
  111. package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
  112. package/dist/services/chunking/chunk-merger.d.ts +26 -0
  113. package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
  114. package/dist/services/chunking/chunk-merger.js +94 -0
  115. package/dist/services/chunking/chunk-merger.js.map +1 -0
  116. package/dist/services/chunking/chunker.d.ts +62 -0
  117. package/dist/services/chunking/chunker.d.ts.map +1 -0
  118. package/dist/services/chunking/chunker.js +566 -0
  119. package/dist/services/chunking/chunker.js.map +1 -0
  120. package/dist/services/chunking/heading-normalizer.d.ts +33 -0
  121. package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
  122. package/dist/services/chunking/heading-normalizer.js +101 -0
  123. package/dist/services/chunking/heading-normalizer.js.map +1 -0
  124. package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
  125. package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
  126. package/dist/services/chunking/json-block-analyzer.js +1033 -0
  127. package/dist/services/chunking/json-block-analyzer.js.map +1 -0
  128. package/dist/services/chunking/markdown-parser.d.ts +75 -0
  129. package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
  130. package/dist/services/chunking/markdown-parser.js +428 -0
  131. package/dist/services/chunking/markdown-parser.js.map +1 -0
  132. package/dist/services/chunking/text-normalizer.d.ts +20 -0
  133. package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
  134. package/dist/services/chunking/text-normalizer.js +36 -0
  135. package/dist/services/chunking/text-normalizer.js.map +1 -0
  136. package/dist/services/clm/contract-schemas.d.ts +36 -0
  137. package/dist/services/clm/contract-schemas.d.ts.map +1 -0
  138. package/dist/services/clm/contract-schemas.js +92 -0
  139. package/dist/services/clm/contract-schemas.js.map +1 -0
  140. package/dist/services/clm/summarization.d.ts +46 -0
  141. package/dist/services/clm/summarization.d.ts.map +1 -0
  142. package/dist/services/clm/summarization.js +61 -0
  143. package/dist/services/clm/summarization.js.map +1 -0
  144. package/dist/services/clustering/clustering-service.d.ts +58 -0
  145. package/dist/services/clustering/clustering-service.d.ts.map +1 -0
  146. package/dist/services/clustering/clustering-service.js +467 -0
  147. package/dist/services/clustering/clustering-service.js.map +1 -0
  148. package/dist/services/comparison/diff-service.d.ts +41 -0
  149. package/dist/services/comparison/diff-service.d.ts.map +1 -0
  150. package/dist/services/comparison/diff-service.js +120 -0
  151. package/dist/services/comparison/diff-service.js.map +1 -0
  152. package/dist/services/embedding/embedder.d.ts +55 -0
  153. package/dist/services/embedding/embedder.d.ts.map +1 -0
  154. package/dist/services/embedding/embedder.js +202 -0
  155. package/dist/services/embedding/embedder.js.map +1 -0
  156. package/dist/services/embedding/nomic.d.ts +67 -0
  157. package/dist/services/embedding/nomic.d.ts.map +1 -0
  158. package/dist/services/embedding/nomic.js +280 -0
  159. package/dist/services/embedding/nomic.js.map +1 -0
  160. package/dist/services/gemini/circuit-breaker.d.ts +106 -0
  161. package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
  162. package/dist/services/gemini/circuit-breaker.js +237 -0
  163. package/dist/services/gemini/circuit-breaker.js.map +1 -0
  164. package/dist/services/gemini/client.d.ts +173 -0
  165. package/dist/services/gemini/client.d.ts.map +1 -0
  166. package/dist/services/gemini/client.js +483 -0
  167. package/dist/services/gemini/client.js.map +1 -0
  168. package/dist/services/gemini/config.d.ts +116 -0
  169. package/dist/services/gemini/config.d.ts.map +1 -0
  170. package/dist/services/gemini/config.js +118 -0
  171. package/dist/services/gemini/config.js.map +1 -0
  172. package/dist/services/gemini/index.d.ts +9 -0
  173. package/dist/services/gemini/index.d.ts.map +1 -0
  174. package/dist/services/gemini/index.js +13 -0
  175. package/dist/services/gemini/index.js.map +1 -0
  176. package/dist/services/gemini/rate-limiter.d.ts +62 -0
  177. package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
  178. package/dist/services/gemini/rate-limiter.js +120 -0
  179. package/dist/services/gemini/rate-limiter.js.map +1 -0
  180. package/dist/services/images/extractor.d.ts +88 -0
  181. package/dist/services/images/extractor.d.ts.map +1 -0
  182. package/dist/services/images/extractor.js +340 -0
  183. package/dist/services/images/extractor.js.map +1 -0
  184. package/dist/services/images/optimizer.d.ts +130 -0
  185. package/dist/services/images/optimizer.d.ts.map +1 -0
  186. package/dist/services/images/optimizer.js +228 -0
  187. package/dist/services/images/optimizer.js.map +1 -0
  188. package/dist/services/ocr/datalab.d.ts +64 -0
  189. package/dist/services/ocr/datalab.d.ts.map +1 -0
  190. package/dist/services/ocr/datalab.js +425 -0
  191. package/dist/services/ocr/datalab.js.map +1 -0
  192. package/dist/services/ocr/errors.d.ts +38 -0
  193. package/dist/services/ocr/errors.d.ts.map +1 -0
  194. package/dist/services/ocr/errors.js +83 -0
  195. package/dist/services/ocr/errors.js.map +1 -0
  196. package/dist/services/ocr/file-manager.d.ts +76 -0
  197. package/dist/services/ocr/file-manager.d.ts.map +1 -0
  198. package/dist/services/ocr/file-manager.js +238 -0
  199. package/dist/services/ocr/file-manager.js.map +1 -0
  200. package/dist/services/ocr/form-fill.d.ts +48 -0
  201. package/dist/services/ocr/form-fill.d.ts.map +1 -0
  202. package/dist/services/ocr/form-fill.js +213 -0
  203. package/dist/services/ocr/form-fill.js.map +1 -0
  204. package/dist/services/ocr/processor.d.ts +95 -0
  205. package/dist/services/ocr/processor.d.ts.map +1 -0
  206. package/dist/services/ocr/processor.js +259 -0
  207. package/dist/services/ocr/processor.js.map +1 -0
  208. package/dist/services/provenance/agent-metadata.d.ts +82 -0
  209. package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
  210. package/dist/services/provenance/agent-metadata.js +106 -0
  211. package/dist/services/provenance/agent-metadata.js.map +1 -0
  212. package/dist/services/provenance/chain-hash.d.ts +57 -0
  213. package/dist/services/provenance/chain-hash.d.ts.map +1 -0
  214. package/dist/services/provenance/chain-hash.js +131 -0
  215. package/dist/services/provenance/chain-hash.js.map +1 -0
  216. package/dist/services/provenance/exporter.d.ts +202 -0
  217. package/dist/services/provenance/exporter.d.ts.map +1 -0
  218. package/dist/services/provenance/exporter.js +457 -0
  219. package/dist/services/provenance/exporter.js.map +1 -0
  220. package/dist/services/provenance/index.d.ts +15 -0
  221. package/dist/services/provenance/index.d.ts.map +1 -0
  222. package/dist/services/provenance/index.js +17 -0
  223. package/dist/services/provenance/index.js.map +1 -0
  224. package/dist/services/provenance/tracker.d.ts +138 -0
  225. package/dist/services/provenance/tracker.d.ts.map +1 -0
  226. package/dist/services/provenance/tracker.js +293 -0
  227. package/dist/services/provenance/tracker.js.map +1 -0
  228. package/dist/services/provenance/verifier.d.ts +153 -0
  229. package/dist/services/provenance/verifier.d.ts.map +1 -0
  230. package/dist/services/provenance/verifier.js +536 -0
  231. package/dist/services/provenance/verifier.js.map +1 -0
  232. package/dist/services/python-pool.d.ts +70 -0
  233. package/dist/services/python-pool.d.ts.map +1 -0
  234. package/dist/services/python-pool.js +265 -0
  235. package/dist/services/python-pool.js.map +1 -0
  236. package/dist/services/search/bm25.d.ts +180 -0
  237. package/dist/services/search/bm25.d.ts.map +1 -0
  238. package/dist/services/search/bm25.js +656 -0
  239. package/dist/services/search/bm25.js.map +1 -0
  240. package/dist/services/search/fusion.d.ts +103 -0
  241. package/dist/services/search/fusion.d.ts.map +1 -0
  242. package/dist/services/search/fusion.js +122 -0
  243. package/dist/services/search/fusion.js.map +1 -0
  244. package/dist/services/search/local-reranker.d.ts +30 -0
  245. package/dist/services/search/local-reranker.d.ts.map +1 -0
  246. package/dist/services/search/local-reranker.js +123 -0
  247. package/dist/services/search/local-reranker.js.map +1 -0
  248. package/dist/services/search/quality.d.ts +11 -0
  249. package/dist/services/search/quality.d.ts.map +1 -0
  250. package/dist/services/search/quality.js +17 -0
  251. package/dist/services/search/quality.js.map +1 -0
  252. package/dist/services/search/query-classifier.d.ts +34 -0
  253. package/dist/services/search/query-classifier.d.ts.map +1 -0
  254. package/dist/services/search/query-classifier.js +114 -0
  255. package/dist/services/search/query-classifier.js.map +1 -0
  256. package/dist/services/search/query-expander.d.ts +73 -0
  257. package/dist/services/search/query-expander.d.ts.map +1 -0
  258. package/dist/services/search/query-expander.js +281 -0
  259. package/dist/services/search/query-expander.js.map +1 -0
  260. package/dist/services/search/reranker.d.ts +44 -0
  261. package/dist/services/search/reranker.d.ts.map +1 -0
  262. package/dist/services/search/reranker.js +101 -0
  263. package/dist/services/search/reranker.js.map +1 -0
  264. package/dist/services/storage/database/annotation-operations.d.ts +113 -0
  265. package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
  266. package/dist/services/storage/database/annotation-operations.js +177 -0
  267. package/dist/services/storage/database/annotation-operations.js.map +1 -0
  268. package/dist/services/storage/database/approval-operations.d.ts +132 -0
  269. package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
  270. package/dist/services/storage/database/approval-operations.js +206 -0
  271. package/dist/services/storage/database/approval-operations.js.map +1 -0
  272. package/dist/services/storage/database/chunk-operations.d.ts +132 -0
  273. package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
  274. package/dist/services/storage/database/chunk-operations.js +306 -0
  275. package/dist/services/storage/database/chunk-operations.js.map +1 -0
  276. package/dist/services/storage/database/cluster-operations.d.ts +97 -0
  277. package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
  278. package/dist/services/storage/database/cluster-operations.js +258 -0
  279. package/dist/services/storage/database/cluster-operations.js.map +1 -0
  280. package/dist/services/storage/database/comparison-operations.d.ts +41 -0
  281. package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
  282. package/dist/services/storage/database/comparison-operations.js +65 -0
  283. package/dist/services/storage/database/comparison-operations.js.map +1 -0
  284. package/dist/services/storage/database/converters.d.ts +36 -0
  285. package/dist/services/storage/database/converters.d.ts.map +1 -0
  286. package/dist/services/storage/database/converters.js +244 -0
  287. package/dist/services/storage/database/converters.js.map +1 -0
  288. package/dist/services/storage/database/document-operations.d.ts +145 -0
  289. package/dist/services/storage/database/document-operations.d.ts.map +1 -0
  290. package/dist/services/storage/database/document-operations.js +498 -0
  291. package/dist/services/storage/database/document-operations.js.map +1 -0
  292. package/dist/services/storage/database/embedding-operations.d.ts +130 -0
  293. package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
  294. package/dist/services/storage/database/embedding-operations.js +315 -0
  295. package/dist/services/storage/database/embedding-operations.js.map +1 -0
  296. package/dist/services/storage/database/extraction-operations.d.ts +47 -0
  297. package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
  298. package/dist/services/storage/database/extraction-operations.js +85 -0
  299. package/dist/services/storage/database/extraction-operations.js.map +1 -0
  300. package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
  301. package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
  302. package/dist/services/storage/database/form-fill-operations.js +116 -0
  303. package/dist/services/storage/database/form-fill-operations.js.map +1 -0
  304. package/dist/services/storage/database/helpers.d.ts +29 -0
  305. package/dist/services/storage/database/helpers.d.ts.map +1 -0
  306. package/dist/services/storage/database/helpers.js +55 -0
  307. package/dist/services/storage/database/helpers.js.map +1 -0
  308. package/dist/services/storage/database/image-operations.d.ts +202 -0
  309. package/dist/services/storage/database/image-operations.d.ts.map +1 -0
  310. package/dist/services/storage/database/image-operations.js +484 -0
  311. package/dist/services/storage/database/image-operations.js.map +1 -0
  312. package/dist/services/storage/database/index.d.ts +13 -0
  313. package/dist/services/storage/database/index.d.ts.map +1 -0
  314. package/dist/services/storage/database/index.js +16 -0
  315. package/dist/services/storage/database/index.js.map +1 -0
  316. package/dist/services/storage/database/lock-operations.d.ts +59 -0
  317. package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
  318. package/dist/services/storage/database/lock-operations.js +89 -0
  319. package/dist/services/storage/database/lock-operations.js.map +1 -0
  320. package/dist/services/storage/database/obligation-operations.d.ts +88 -0
  321. package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
  322. package/dist/services/storage/database/obligation-operations.js +206 -0
  323. package/dist/services/storage/database/obligation-operations.js.map +1 -0
  324. package/dist/services/storage/database/ocr-operations.d.ts +33 -0
  325. package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
  326. package/dist/services/storage/database/ocr-operations.js +70 -0
  327. package/dist/services/storage/database/ocr-operations.js.map +1 -0
  328. package/dist/services/storage/database/playbook-operations.d.ts +72 -0
  329. package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
  330. package/dist/services/storage/database/playbook-operations.js +247 -0
  331. package/dist/services/storage/database/playbook-operations.js.map +1 -0
  332. package/dist/services/storage/database/provenance-operations.d.ts +112 -0
  333. package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
  334. package/dist/services/storage/database/provenance-operations.js +251 -0
  335. package/dist/services/storage/database/provenance-operations.js.map +1 -0
  336. package/dist/services/storage/database/service.d.ts +142 -0
  337. package/dist/services/storage/database/service.d.ts.map +1 -0
  338. package/dist/services/storage/database/service.js +310 -0
  339. package/dist/services/storage/database/service.js.map +1 -0
  340. package/dist/services/storage/database/static-operations.d.ts +30 -0
  341. package/dist/services/storage/database/static-operations.d.ts.map +1 -0
  342. package/dist/services/storage/database/static-operations.js +218 -0
  343. package/dist/services/storage/database/static-operations.js.map +1 -0
  344. package/dist/services/storage/database/stats-operations.d.ts +101 -0
  345. package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
  346. package/dist/services/storage/database/stats-operations.js +394 -0
  347. package/dist/services/storage/database/stats-operations.js.map +1 -0
  348. package/dist/services/storage/database/tag-operations.d.ts +76 -0
  349. package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
  350. package/dist/services/storage/database/tag-operations.js +178 -0
  351. package/dist/services/storage/database/tag-operations.js.map +1 -0
  352. package/dist/services/storage/database/types.d.ts +286 -0
  353. package/dist/services/storage/database/types.d.ts.map +1 -0
  354. package/dist/services/storage/database/types.js +39 -0
  355. package/dist/services/storage/database/types.js.map +1 -0
  356. package/dist/services/storage/database/upload-operations.d.ts +71 -0
  357. package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
  358. package/dist/services/storage/database/upload-operations.js +124 -0
  359. package/dist/services/storage/database/upload-operations.js.map +1 -0
  360. package/dist/services/storage/database/user-operations.d.ts +102 -0
  361. package/dist/services/storage/database/user-operations.d.ts.map +1 -0
  362. package/dist/services/storage/database/user-operations.js +151 -0
  363. package/dist/services/storage/database/user-operations.js.map +1 -0
  364. package/dist/services/storage/database/workflow-operations.d.ts +98 -0
  365. package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
  366. package/dist/services/storage/database/workflow-operations.js +157 -0
  367. package/dist/services/storage/database/workflow-operations.js.map +1 -0
  368. package/dist/services/storage/database.d.ts +16 -0
  369. package/dist/services/storage/database.d.ts.map +1 -0
  370. package/dist/services/storage/database.js +15 -0
  371. package/dist/services/storage/database.js.map +1 -0
  372. package/dist/services/storage/index.d.ts +10 -0
  373. package/dist/services/storage/index.d.ts.map +1 -0
  374. package/dist/services/storage/index.js +10 -0
  375. package/dist/services/storage/index.js.map +1 -0
  376. package/dist/services/storage/migrations/index.d.ts +16 -0
  377. package/dist/services/storage/migrations/index.d.ts.map +1 -0
  378. package/dist/services/storage/migrations/index.js +20 -0
  379. package/dist/services/storage/migrations/index.js.map +1 -0
  380. package/dist/services/storage/migrations/operations.d.ts +40 -0
  381. package/dist/services/storage/migrations/operations.d.ts.map +1 -0
  382. package/dist/services/storage/migrations/operations.js +2910 -0
  383. package/dist/services/storage/migrations/operations.js.map +1 -0
  384. package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
  385. package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
  386. package/dist/services/storage/migrations/schema-definitions.js +1006 -0
  387. package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
  388. package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
  389. package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
  390. package/dist/services/storage/migrations/schema-helpers.js +176 -0
  391. package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
  392. package/dist/services/storage/migrations/types.d.ts +15 -0
  393. package/dist/services/storage/migrations/types.d.ts.map +1 -0
  394. package/dist/services/storage/migrations/types.js +21 -0
  395. package/dist/services/storage/migrations/types.js.map +1 -0
  396. package/dist/services/storage/migrations/verification.d.ts +20 -0
  397. package/dist/services/storage/migrations/verification.d.ts.map +1 -0
  398. package/dist/services/storage/migrations/verification.js +78 -0
  399. package/dist/services/storage/migrations/verification.js.map +1 -0
  400. package/dist/services/storage/migrations.d.ts +16 -0
  401. package/dist/services/storage/migrations.d.ts.map +1 -0
  402. package/dist/services/storage/migrations.js +17 -0
  403. package/dist/services/storage/migrations.js.map +1 -0
  404. package/dist/services/storage/types.d.ts +12 -0
  405. package/dist/services/storage/types.d.ts.map +1 -0
  406. package/dist/services/storage/types.js +5 -0
  407. package/dist/services/storage/types.js.map +1 -0
  408. package/dist/services/storage/vector.d.ts +208 -0
  409. package/dist/services/storage/vector.d.ts.map +1 -0
  410. package/dist/services/storage/vector.js +526 -0
  411. package/dist/services/storage/vector.js.map +1 -0
  412. package/dist/services/vlm/pipeline.d.ts +194 -0
  413. package/dist/services/vlm/pipeline.d.ts.map +1 -0
  414. package/dist/services/vlm/pipeline.js +800 -0
  415. package/dist/services/vlm/pipeline.js.map +1 -0
  416. package/dist/services/vlm/prompts.d.ts +171 -0
  417. package/dist/services/vlm/prompts.d.ts.map +1 -0
  418. package/dist/services/vlm/prompts.js +229 -0
  419. package/dist/services/vlm/prompts.js.map +1 -0
  420. package/dist/services/vlm/service.d.ts +174 -0
  421. package/dist/services/vlm/service.d.ts.map +1 -0
  422. package/dist/services/vlm/service.js +256 -0
  423. package/dist/services/vlm/service.js.map +1 -0
  424. package/dist/services/webhook-delivery.d.ts +4 -0
  425. package/dist/services/webhook-delivery.d.ts.map +1 -0
  426. package/dist/services/webhook-delivery.js +140 -0
  427. package/dist/services/webhook-delivery.js.map +1 -0
  428. package/dist/tools/chunks.d.ts +19 -0
  429. package/dist/tools/chunks.d.ts.map +1 -0
  430. package/dist/tools/chunks.js +392 -0
  431. package/dist/tools/chunks.js.map +1 -0
  432. package/dist/tools/clm.d.ts +16 -0
  433. package/dist/tools/clm.d.ts.map +1 -0
  434. package/dist/tools/clm.js +668 -0
  435. package/dist/tools/clm.js.map +1 -0
  436. package/dist/tools/clustering.d.ts +13 -0
  437. package/dist/tools/clustering.d.ts.map +1 -0
  438. package/dist/tools/clustering.js +498 -0
  439. package/dist/tools/clustering.js.map +1 -0
  440. package/dist/tools/collaboration.d.ts +15 -0
  441. package/dist/tools/collaboration.d.ts.map +1 -0
  442. package/dist/tools/collaboration.js +516 -0
  443. package/dist/tools/collaboration.js.map +1 -0
  444. package/dist/tools/comparison.d.ts +13 -0
  445. package/dist/tools/comparison.d.ts.map +1 -0
  446. package/dist/tools/comparison.js +735 -0
  447. package/dist/tools/comparison.js.map +1 -0
  448. package/dist/tools/compliance.d.ts +15 -0
  449. package/dist/tools/compliance.d.ts.map +1 -0
  450. package/dist/tools/compliance.js +640 -0
  451. package/dist/tools/compliance.js.map +1 -0
  452. package/dist/tools/config.d.ts +19 -0
  453. package/dist/tools/config.d.ts.map +1 -0
  454. package/dist/tools/config.js +213 -0
  455. package/dist/tools/config.js.map +1 -0
  456. package/dist/tools/database.d.ts +62 -0
  457. package/dist/tools/database.d.ts.map +1 -0
  458. package/dist/tools/database.js +288 -0
  459. package/dist/tools/database.js.map +1 -0
  460. package/dist/tools/documents.d.ts +61 -0
  461. package/dist/tools/documents.d.ts.map +1 -0
  462. package/dist/tools/documents.js +1624 -0
  463. package/dist/tools/documents.js.map +1 -0
  464. package/dist/tools/embeddings.d.ts +14 -0
  465. package/dist/tools/embeddings.d.ts.map +1 -0
  466. package/dist/tools/embeddings.js +626 -0
  467. package/dist/tools/embeddings.js.map +1 -0
  468. package/dist/tools/evaluation.d.ts +25 -0
  469. package/dist/tools/evaluation.d.ts.map +1 -0
  470. package/dist/tools/evaluation.js +523 -0
  471. package/dist/tools/evaluation.js.map +1 -0
  472. package/dist/tools/events.d.ts +16 -0
  473. package/dist/tools/events.d.ts.map +1 -0
  474. package/dist/tools/events.js +493 -0
  475. package/dist/tools/events.js.map +1 -0
  476. package/dist/tools/extraction-structured.d.ts +13 -0
  477. package/dist/tools/extraction-structured.d.ts.map +1 -0
  478. package/dist/tools/extraction-structured.js +390 -0
  479. package/dist/tools/extraction-structured.js.map +1 -0
  480. package/dist/tools/extraction.d.ts +24 -0
  481. package/dist/tools/extraction.d.ts.map +1 -0
  482. package/dist/tools/extraction.js +424 -0
  483. package/dist/tools/extraction.js.map +1 -0
  484. package/dist/tools/file-management.d.ts +14 -0
  485. package/dist/tools/file-management.d.ts.map +1 -0
  486. package/dist/tools/file-management.js +523 -0
  487. package/dist/tools/file-management.js.map +1 -0
  488. package/dist/tools/form-fill.d.ts +13 -0
  489. package/dist/tools/form-fill.d.ts.map +1 -0
  490. package/dist/tools/form-fill.js +250 -0
  491. package/dist/tools/form-fill.js.map +1 -0
  492. package/dist/tools/health.d.ts +19 -0
  493. package/dist/tools/health.d.ts.map +1 -0
  494. package/dist/tools/health.js +229 -0
  495. package/dist/tools/health.js.map +1 -0
  496. package/dist/tools/images.d.ts +54 -0
  497. package/dist/tools/images.d.ts.map +1 -0
  498. package/dist/tools/images.js +787 -0
  499. package/dist/tools/images.js.map +1 -0
  500. package/dist/tools/ingestion.d.ts +94 -0
  501. package/dist/tools/ingestion.d.ts.map +1 -0
  502. package/dist/tools/ingestion.js +1659 -0
  503. package/dist/tools/ingestion.js.map +1 -0
  504. package/dist/tools/intelligence.d.ts +18 -0
  505. package/dist/tools/intelligence.d.ts.map +1 -0
  506. package/dist/tools/intelligence.js +1039 -0
  507. package/dist/tools/intelligence.js.map +1 -0
  508. package/dist/tools/provenance.d.ts +51 -0
  509. package/dist/tools/provenance.d.ts.map +1 -0
  510. package/dist/tools/provenance.js +691 -0
  511. package/dist/tools/provenance.js.map +1 -0
  512. package/dist/tools/reports.d.ts +41 -0
  513. package/dist/tools/reports.d.ts.map +1 -0
  514. package/dist/tools/reports.js +1394 -0
  515. package/dist/tools/reports.js.map +1 -0
  516. package/dist/tools/search.d.ts +35 -0
  517. package/dist/tools/search.d.ts.map +1 -0
  518. package/dist/tools/search.js +2528 -0
  519. package/dist/tools/search.js.map +1 -0
  520. package/dist/tools/shared.d.ts +52 -0
  521. package/dist/tools/shared.d.ts.map +1 -0
  522. package/dist/tools/shared.js +54 -0
  523. package/dist/tools/shared.js.map +1 -0
  524. package/dist/tools/tags.d.ts +15 -0
  525. package/dist/tools/tags.d.ts.map +1 -0
  526. package/dist/tools/tags.js +287 -0
  527. package/dist/tools/tags.js.map +1 -0
  528. package/dist/tools/timeline.d.ts +15 -0
  529. package/dist/tools/timeline.d.ts.map +1 -0
  530. package/dist/tools/timeline.js +14 -0
  531. package/dist/tools/timeline.js.map +1 -0
  532. package/dist/tools/users.d.ts +14 -0
  533. package/dist/tools/users.d.ts.map +1 -0
  534. package/dist/tools/users.js +257 -0
  535. package/dist/tools/users.js.map +1 -0
  536. package/dist/tools/vlm.d.ts +40 -0
  537. package/dist/tools/vlm.d.ts.map +1 -0
  538. package/dist/tools/vlm.js +475 -0
  539. package/dist/tools/vlm.js.map +1 -0
  540. package/dist/tools/workflow.d.ts +16 -0
  541. package/dist/tools/workflow.d.ts.map +1 -0
  542. package/dist/tools/workflow.js +495 -0
  543. package/dist/tools/workflow.js.map +1 -0
  544. package/dist/utils/backoff.d.ts +53 -0
  545. package/dist/utils/backoff.d.ts.map +1 -0
  546. package/dist/utils/backoff.js +78 -0
  547. package/dist/utils/backoff.js.map +1 -0
  548. package/dist/utils/config-persistence.d.ts +33 -0
  549. package/dist/utils/config-persistence.d.ts.map +1 -0
  550. package/dist/utils/config-persistence.js +61 -0
  551. package/dist/utils/config-persistence.js.map +1 -0
  552. package/dist/utils/hash.d.ts +65 -0
  553. package/dist/utils/hash.d.ts.map +1 -0
  554. package/dist/utils/hash.js +146 -0
  555. package/dist/utils/hash.js.map +1 -0
  556. package/dist/utils/math.d.ts +21 -0
  557. package/dist/utils/math.d.ts.map +1 -0
  558. package/dist/utils/math.js +39 -0
  559. package/dist/utils/math.js.map +1 -0
  560. package/dist/utils/validation.d.ts +697 -0
  561. package/dist/utils/validation.d.ts.map +1 -0
  562. package/dist/utils/validation.js +529 -0
  563. package/dist/utils/validation.js.map +1 -0
  564. package/package.json +96 -0
  565. package/python/.gitkeep +0 -0
  566. package/python/__init__.py +104 -0
  567. package/python/clustering_worker.py +440 -0
  568. package/python/docx_image_extractor.py +524 -0
  569. package/python/embedding_worker.py +552 -0
  570. package/python/file_manager_worker.py +564 -0
  571. package/python/form_fill_worker.py +399 -0
  572. package/python/gpu_utils.py +582 -0
  573. package/python/image_extractor.py +317 -0
  574. package/python/image_optimizer.py +444 -0
  575. package/python/ocr_worker.py +712 -0
  576. package/python/pyproject.toml +76 -0
  577. package/python/requirements.txt +51 -0
  578. package/python/reranker_worker.py +87 -0
@@ -0,0 +1,691 @@
1
+ /**
2
+ * Provenance Management MCP Tools
3
+ *
4
+ * Extracted from src/index.ts Task 22.
5
+ * Tools: ocr_provenance_get, ocr_provenance_verify, ocr_provenance_export,
6
+ * ocr_provenance_query, ocr_provenance_timeline, ocr_provenance_processor_stats
7
+ *
8
+ * CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
9
+ * Use console.error() for all logging.
10
+ *
11
+ * @module tools/provenance
12
+ */
13
+ import { z } from 'zod';
14
+ import { requireDatabase } from '../server/state.js';
15
+ import { successResult } from '../server/types.js';
16
+ import { validateInput, ProvenanceGetInput, ProvenanceVerifyInput, ProvenanceExportInput, } from '../utils/validation.js';
17
+ import { provenanceNotFoundError, validationError, documentNotFoundError, } from '../server/errors.js';
18
+ import { formatResponse, handleError } from './shared.js';
19
+ import { getImage } from '../services/storage/database/image-operations.js';
20
+ import { getOCRResult } from '../services/storage/database/ocr-operations.js';
21
+ import { ProvenanceVerifier } from '../services/provenance/verifier.js';
22
+ import { ProvenanceTracker } from '../services/provenance/tracker.js';
23
+ // ═══════════════════════════════════════════════════════════════════════════════
24
+ // HELPER FUNCTIONS
25
+ // ═══════════════════════════════════════════════════════════════════════════════
26
+ /**
27
+ * Escape a field for CSV output.
28
+ * If the field contains a comma, double-quote, or newline, wrap it in double quotes
29
+ * and escape internal double-quotes by doubling them.
30
+ */
31
+ function csvEscape(field) {
32
+ if (field === null || field === undefined)
33
+ return '';
34
+ const str = String(field);
35
+ if (str.includes(',') || str.includes('"') || str.includes('\n') || str.includes('\r')) {
36
+ return `"${str.replace(/"/g, '""')}"`;
37
+ }
38
+ return str;
39
+ }
40
+ /**
41
+ * Find provenance ID from an item of any type.
42
+ * Returns the provenance ID and detected item type, or null if not found.
43
+ */
44
+ function findProvenanceId(db, itemId) {
45
+ const doc = db.getDocument(itemId);
46
+ if (doc)
47
+ return { provenanceId: doc.provenance_id, itemType: 'document' };
48
+ const chunk = db.getChunk(itemId);
49
+ if (chunk)
50
+ return { provenanceId: chunk.provenance_id, itemType: 'chunk' };
51
+ const embedding = db.getEmbedding(itemId);
52
+ if (embedding)
53
+ return { provenanceId: embedding.provenance_id, itemType: 'embedding' };
54
+ const dbConn = db.getConnection();
55
+ const image = getImage(dbConn, itemId);
56
+ if (image && image.provenance_id) {
57
+ return { provenanceId: image.provenance_id, itemType: 'image' };
58
+ }
59
+ const ocrResult = getOCRResult(dbConn, itemId);
60
+ if (ocrResult && ocrResult.provenance_id) {
61
+ return { provenanceId: ocrResult.provenance_id, itemType: 'ocr_result' };
62
+ }
63
+ const comparison = dbConn
64
+ .prepare('SELECT provenance_id FROM comparisons WHERE id = ?')
65
+ .get(itemId);
66
+ if (comparison) {
67
+ return { provenanceId: comparison.provenance_id, itemType: 'comparison' };
68
+ }
69
+ const cluster = dbConn.prepare('SELECT provenance_id FROM clusters WHERE id = ?').get(itemId);
70
+ if (cluster) {
71
+ return { provenanceId: cluster.provenance_id, itemType: 'clustering' };
72
+ }
73
+ const formFill = dbConn
74
+ .prepare('SELECT provenance_id FROM form_fills WHERE id = ?')
75
+ .get(itemId);
76
+ if (formFill) {
77
+ return { provenanceId: formFill.provenance_id, itemType: 'form_fill' };
78
+ }
79
+ const extraction = dbConn
80
+ .prepare('SELECT provenance_id FROM extractions WHERE id = ?')
81
+ .get(itemId);
82
+ if (extraction) {
83
+ return { provenanceId: extraction.provenance_id, itemType: 'extraction' };
84
+ }
85
+ const prov = db.getProvenance(itemId);
86
+ if (prov)
87
+ return { provenanceId: prov.id, itemType: 'provenance' };
88
+ return null;
89
+ }
90
+ // ═══════════════════════════════════════════════════════════════════════════════
91
+ // PROVENANCE TOOL HANDLERS
92
+ // ═══════════════════════════════════════════════════════════════════════════════
93
+ export async function handleProvenanceGet(params) {
94
+ try {
95
+ const input = validateInput(ProvenanceGetInput, params);
96
+ const { db } = requireDatabase();
97
+ const found = findProvenanceId(db, input.item_id);
98
+ const provenanceId = found?.provenanceId ?? null;
99
+ const itemType = found?.itemType ?? input.item_type ?? 'auto';
100
+ if (!provenanceId) {
101
+ throw provenanceNotFoundError(input.item_id);
102
+ }
103
+ const chain = db.getProvenanceChain(provenanceId);
104
+ if (chain.length === 0) {
105
+ throw provenanceNotFoundError(input.item_id);
106
+ }
107
+ const enrichedChain = chain.map((p) => ({
108
+ id: p.id,
109
+ type: p.type,
110
+ chain_depth: p.chain_depth,
111
+ processor: p.processor,
112
+ processor_version: p.processor_version,
113
+ content_hash: p.content_hash,
114
+ created_at: p.created_at,
115
+ parent_id: p.parent_id,
116
+ }));
117
+ return formatResponse(successResult({
118
+ item_id: input.item_id,
119
+ item_type: itemType,
120
+ chain: enrichedChain,
121
+ root_document_id: chain[0].root_document_id,
122
+ next_steps: [
123
+ { tool: 'ocr_provenance_verify', description: 'Verify content integrity of this provenance chain' },
124
+ { tool: 'ocr_document_get', description: 'Get details for the root document' },
125
+ ],
126
+ }));
127
+ }
128
+ catch (error) {
129
+ return handleError(error);
130
+ }
131
+ }
132
+ /**
133
+ * Handle ocr_provenance_verify - Verify the integrity of an item through its provenance chain
134
+ *
135
+ * Uses real ProvenanceVerifier to re-hash content and compare against stored hashes.
136
+ * Constitution CP-003: SHA-256 hashes at every processing step enable tamper detection.
137
+ */
138
+ export async function handleProvenanceVerify(params) {
139
+ try {
140
+ const input = validateInput(ProvenanceVerifyInput, params);
141
+ const { db } = requireDatabase();
142
+ const found = findProvenanceId(db, input.item_id);
143
+ if (!found) {
144
+ throw provenanceNotFoundError(input.item_id);
145
+ }
146
+ const provenanceId = found.provenanceId;
147
+ // Use real ProvenanceVerifier for content integrity verification
148
+ const tracker = new ProvenanceTracker(db);
149
+ const verifier = new ProvenanceVerifier(db, tracker);
150
+ // Verify the full chain (re-hashes content at each step)
151
+ const chainResult = await verifier.verifyChain(provenanceId);
152
+ // Build per-step details for the response
153
+ const chain = db.getProvenanceChain(provenanceId);
154
+ const steps = [];
155
+ const errors = [];
156
+ let chainIntegrity = chainResult.chain_intact;
157
+ for (let i = 0; i < chain.length; i++) {
158
+ const prov = chain[i];
159
+ const step = {
160
+ provenance_id: prov.id,
161
+ type: prov.type,
162
+ chain_depth: prov.chain_depth,
163
+ content_verified: true,
164
+ chain_verified: true,
165
+ expected_hash: prov.content_hash,
166
+ };
167
+ // Check if this item failed content verification
168
+ if (input.verify_content) {
169
+ const failedItem = chainResult.failed_items.find((f) => f.id === prov.id);
170
+ if (failedItem) {
171
+ step.content_verified = false;
172
+ step.computed_hash = failedItem.computed_hash;
173
+ errors.push(`Content hash mismatch at ${prov.id}: expected ${failedItem.expected_hash}, got ${failedItem.computed_hash}`);
174
+ }
175
+ }
176
+ // Verify chain structure (depth and parent links)
177
+ // H-5: Only verify depths when chain is intact; incomplete chains produce wrong expected depths
178
+ if (input.verify_chain) {
179
+ if (chainResult.chain_intact) {
180
+ const expectedDepth = chain.length - 1 - i;
181
+ if (prov.chain_depth !== expectedDepth) {
182
+ step.chain_verified = false;
183
+ chainIntegrity = false;
184
+ errors.push(`Chain depth mismatch at ${prov.id}: expected ${expectedDepth}, got ${prov.chain_depth}`);
185
+ }
186
+ if (i > 0 && chain[i - 1].parent_id !== prov.id) {
187
+ step.chain_verified = false;
188
+ chainIntegrity = false;
189
+ errors.push(`Parent link broken at ${chain[i - 1].id}`);
190
+ }
191
+ }
192
+ else {
193
+ step.chain_verified = false;
194
+ }
195
+ }
196
+ steps.push(step);
197
+ }
198
+ // H-5: When chain is not intact, add a note instead of false-positive depth errors
199
+ if (input.verify_chain && !chainResult.chain_intact) {
200
+ errors.push('Depth verification skipped: chain is incomplete');
201
+ }
202
+ // M-9: When verify_content is false, skip content hash result entirely
203
+ const contentIntegrity = input.verify_content ? chainResult.hashes_failed === 0 : true;
204
+ const result = {
205
+ item_id: input.item_id,
206
+ verified: contentIntegrity && chainIntegrity,
207
+ content_integrity: contentIntegrity,
208
+ chain_integrity: chainIntegrity,
209
+ steps,
210
+ errors: errors.length > 0 ? errors : undefined,
211
+ };
212
+ // M-9: Only include hash counts when content verification was requested
213
+ if (input.verify_content) {
214
+ result.hashes_verified = chainResult.hashes_verified;
215
+ result.hashes_failed = chainResult.hashes_failed;
216
+ }
217
+ result.next_steps = [{ tool: 'ocr_provenance_get', description: 'View the full provenance chain' }, { tool: 'ocr_document_get', description: 'Get details for the root document' }, { tool: 'ocr_reprocess', description: 'Reprocess the document if integrity failed' }];
218
+ return formatResponse(successResult(result));
219
+ }
220
+ catch (error) {
221
+ return handleError(error);
222
+ }
223
+ }
224
+ /**
225
+ * Handle ocr_provenance_export - Export provenance data in various formats
226
+ */
227
+ export async function handleProvenanceExport(params) {
228
+ try {
229
+ const input = validateInput(ProvenanceExportInput, params);
230
+ const { db } = requireDatabase();
231
+ // Collect provenance records based on scope
232
+ let rawRecords = [];
233
+ if (input.scope === 'document') {
234
+ if (!input.document_id) {
235
+ throw validationError('document_id is required when scope is "document"');
236
+ }
237
+ const doc = db.getDocument(input.document_id);
238
+ if (!doc) {
239
+ throw documentNotFoundError(input.document_id);
240
+ }
241
+ rawRecords = db.getProvenanceByRootDocument(doc.provenance_id);
242
+ }
243
+ else {
244
+ const docs = db.listDocuments({ limit: 10000 });
245
+ for (const doc of docs) {
246
+ rawRecords.push(...db.getProvenanceByRootDocument(doc.provenance_id));
247
+ }
248
+ }
249
+ // Filter null records and deduplicate by provenance ID (L-17: cross-document VLM dedup can overlap)
250
+ const nonNullRecords = rawRecords.filter((r) => r !== null);
251
+ const records = [...new Map(nonNullRecords.map((r) => [r.id, r])).values()];
252
+ let data;
253
+ if (input.format === 'json') {
254
+ data = records.map((r) => ({
255
+ id: r.id,
256
+ type: r.type,
257
+ chain_depth: r.chain_depth,
258
+ processor: r.processor,
259
+ processor_version: r.processor_version,
260
+ content_hash: r.content_hash,
261
+ parent_id: r.parent_id,
262
+ root_document_id: r.root_document_id,
263
+ created_at: r.created_at,
264
+ }));
265
+ }
266
+ else if (input.format === 'w3c-prov') {
267
+ // W3C PROV-JSON compliant export matching W3CProvDocument interface
268
+ const prefix = {
269
+ prov: 'http://www.w3.org/ns/prov#',
270
+ ocr: 'http://ocr-provenance.local/ns#',
271
+ xsd: 'http://www.w3.org/2001/XMLSchema#',
272
+ };
273
+ const entity = {};
274
+ const activity = {};
275
+ const agent = {
276
+ 'ocr:system': {
277
+ 'prov:type': { $: 'ocr-provenance-mcp', type: 'xsd:string' },
278
+ 'prov:label': 'OCR Provenance MCP System',
279
+ },
280
+ };
281
+ const wasGeneratedBy = {};
282
+ const wasDerivedFrom = {};
283
+ const wasAttributedTo = {};
284
+ const used = {};
285
+ for (const r of records) {
286
+ // Each provenance record is an entity
287
+ entity[`ocr:${r.id}`] = {
288
+ 'prov:type': { $: r.type, type: 'xsd:string' },
289
+ 'ocr:contentHash': r.content_hash,
290
+ 'ocr:chainDepth': r.chain_depth,
291
+ 'prov:generatedAtTime': r.created_at,
292
+ };
293
+ // Only processing steps (chain_depth > 0) have activities
294
+ if (r.chain_depth > 0) {
295
+ const activityId = `ocr:activity-${r.id}`;
296
+ activity[activityId] = {
297
+ 'prov:type': { $: r.processor, type: 'xsd:string' },
298
+ 'ocr:processorVersion': r.processor_version,
299
+ 'prov:startedAtTime': r.created_at,
300
+ };
301
+ // wasGeneratedBy: entity was generated by activity
302
+ wasGeneratedBy[`ocr:wgb-${r.id}`] = {
303
+ 'prov:entity': `ocr:${r.id}`,
304
+ 'prov:activity': activityId,
305
+ };
306
+ if (r.parent_id) {
307
+ // wasDerivedFrom: child entity derived from parent entity
308
+ wasDerivedFrom[`ocr:wdf-${r.id}`] = {
309
+ 'prov:generatedEntity': `ocr:${r.id}`,
310
+ 'prov:usedEntity': `ocr:${r.parent_id}`,
311
+ 'prov:activity': activityId,
312
+ };
313
+ // used: activity used parent entity as input
314
+ used[`ocr:used-${r.id}`] = {
315
+ 'prov:activity': activityId,
316
+ 'prov:entity': `ocr:${r.parent_id}`,
317
+ };
318
+ }
319
+ }
320
+ else {
321
+ // DOCUMENT entities (chain_depth 0) are attributed to the system agent
322
+ wasAttributedTo[`ocr:wat-${r.id}`] = {
323
+ 'prov:entity': `ocr:${r.id}`,
324
+ 'prov:agent': 'ocr:system',
325
+ };
326
+ }
327
+ }
328
+ data = {
329
+ prefix,
330
+ entity,
331
+ activity,
332
+ agent,
333
+ wasGeneratedBy,
334
+ wasDerivedFrom,
335
+ wasAttributedTo,
336
+ used,
337
+ };
338
+ }
339
+ else {
340
+ // CSV format with proper escaping (M-10)
341
+ const headers = [
342
+ 'id',
343
+ 'type',
344
+ 'chain_depth',
345
+ 'processor',
346
+ 'processor_version',
347
+ 'content_hash',
348
+ 'parent_id',
349
+ 'root_document_id',
350
+ 'created_at',
351
+ ];
352
+ const rows = records.map((r) => [
353
+ csvEscape(r.id),
354
+ csvEscape(r.type),
355
+ csvEscape(r.chain_depth),
356
+ csvEscape(r.processor),
357
+ csvEscape(r.processor_version),
358
+ csvEscape(r.content_hash),
359
+ csvEscape(r.parent_id ?? ''),
360
+ csvEscape(r.root_document_id),
361
+ csvEscape(r.created_at),
362
+ ].join(','));
363
+ data = [headers.join(','), ...rows].join('\n');
364
+ }
365
+ return formatResponse(successResult({
366
+ scope: input.scope,
367
+ format: input.format,
368
+ document_id: input.document_id,
369
+ record_count: records.length,
370
+ data,
371
+ next_steps: [{ tool: 'ocr_provenance_query', description: 'Query provenance with filters' }],
372
+ }));
373
+ }
374
+ catch (error) {
375
+ return handleError(error);
376
+ }
377
+ }
378
+ // ═══════════════════════════════════════════════════════════════════════════════
379
+ // PROVENANCE QUERY TOOL HANDLERS (Phase 5)
380
+ // ═══════════════════════════════════════════════════════════════════════════════
381
+ /**
382
+ * Handle ocr_provenance_query - Query provenance records with filters
383
+ *
384
+ * Supports filtering by processor, type, chain_depth, date range, quality score,
385
+ * duration, and root_document_id. Supports ordering and pagination.
386
+ */
387
+ export async function handleProvenanceQuery(params) {
388
+ try {
389
+ const input = validateInput(z.object({
390
+ processor: z.string().optional().describe('Filter by processor name'),
391
+ type: z
392
+ .enum([
393
+ 'DOCUMENT',
394
+ 'OCR_RESULT',
395
+ 'FORM_FILL',
396
+ 'CHUNK',
397
+ 'IMAGE',
398
+ 'EXTRACTION',
399
+ 'COMPARISON',
400
+ 'CLUSTERING',
401
+ 'EMBEDDING',
402
+ 'VLM_DESCRIPTION',
403
+ ])
404
+ .optional()
405
+ .describe('Filter by provenance type'),
406
+ chain_depth: z.number().int().min(0).optional().describe('Filter by exact chain depth'),
407
+ created_after: z.string().optional().describe('Filter records created after this ISO 8601 timestamp'),
408
+ created_before: z.string().optional().describe('Filter records created before this ISO 8601 timestamp'),
409
+ min_quality_score: z.number().min(0).optional().describe('Minimum processing quality score'),
410
+ min_duration_ms: z.number().min(0).optional().describe('Minimum processing duration in ms'),
411
+ root_document_id: z.string().optional().describe('Filter by root document provenance ID'),
412
+ limit: z.number().int().min(1).max(100).default(50).describe('Maximum results (default 50)'),
413
+ offset: z.number().int().min(0).default(0).describe('Offset for pagination'),
414
+ order_by: z
415
+ .enum(['created_at', 'processing_duration_ms', 'processing_quality_score'])
416
+ .default('created_at')
417
+ .describe('Field to order by'),
418
+ order_dir: z.enum(['asc', 'desc']).default('desc').describe('Sort direction'),
419
+ }), params);
420
+ const { db } = requireDatabase();
421
+ const { records, total } = db.queryProvenance({
422
+ processor: input.processor,
423
+ type: input.type,
424
+ chain_depth: input.chain_depth,
425
+ created_after: input.created_after,
426
+ created_before: input.created_before,
427
+ min_quality_score: input.min_quality_score,
428
+ min_duration_ms: input.min_duration_ms,
429
+ root_document_id: input.root_document_id,
430
+ limit: input.limit,
431
+ offset: input.offset,
432
+ order_by: input.order_by,
433
+ order_dir: input.order_dir,
434
+ });
435
+ // Build filters_applied for response transparency
436
+ const filtersApplied = {};
437
+ if (input.processor !== undefined)
438
+ filtersApplied.processor = input.processor;
439
+ if (input.type !== undefined)
440
+ filtersApplied.type = input.type;
441
+ if (input.chain_depth !== undefined)
442
+ filtersApplied.chain_depth = input.chain_depth;
443
+ if (input.created_after !== undefined)
444
+ filtersApplied.created_after = input.created_after;
445
+ if (input.created_before !== undefined)
446
+ filtersApplied.created_before = input.created_before;
447
+ if (input.min_quality_score !== undefined)
448
+ filtersApplied.min_quality_score = input.min_quality_score;
449
+ if (input.min_duration_ms !== undefined)
450
+ filtersApplied.min_duration_ms = input.min_duration_ms;
451
+ if (input.root_document_id !== undefined)
452
+ filtersApplied.root_document_id = input.root_document_id;
453
+ const formattedRecords = records.map((r) => ({
454
+ id: r.id,
455
+ type: r.type,
456
+ chain_depth: r.chain_depth,
457
+ processor: r.processor,
458
+ processor_version: r.processor_version,
459
+ processing_duration_ms: r.processing_duration_ms,
460
+ processing_quality_score: r.processing_quality_score,
461
+ content_hash: r.content_hash,
462
+ root_document_id: r.root_document_id,
463
+ parent_id: r.parent_id,
464
+ created_at: r.created_at,
465
+ }));
466
+ return formatResponse(successResult({
467
+ records: formattedRecords,
468
+ total,
469
+ limit: input.limit,
470
+ offset: input.offset,
471
+ filters_applied: filtersApplied,
472
+ next_steps: [{ tool: 'ocr_provenance_get', description: 'View full chain for a specific record' }, { tool: 'ocr_document_get', description: 'Get document details for a provenance record' }],
473
+ }));
474
+ }
475
+ catch (error) {
476
+ return handleError(error);
477
+ }
478
+ }
479
+ /**
480
+ * Handle ocr_provenance_timeline - Complete processing timeline for a document
481
+ *
482
+ * Shows every transformation chronologically with step numbers, types,
483
+ * processors, durations, and quality scores.
484
+ */
485
+ export async function handleProvenanceTimeline(params) {
486
+ try {
487
+ const input = validateInput(z.object({
488
+ document_id: z.string().min(1).describe('Document ID to get timeline for'),
489
+ include_params: z
490
+ .boolean()
491
+ .default(false)
492
+ .describe('Include processing parameters in each step'),
493
+ }), params);
494
+ const { db } = requireDatabase();
495
+ // Get the document to find its provenance_id
496
+ const doc = db.getDocument(input.document_id);
497
+ if (!doc) {
498
+ throw documentNotFoundError(input.document_id);
499
+ }
500
+ // Get all provenance records for this document's root
501
+ const records = db.getProvenanceByRootDocument(doc.provenance_id);
502
+ if (records.length === 0) {
503
+ return formatResponse(successResult({
504
+ document_id: input.document_id,
505
+ total_processing_time_ms: 0,
506
+ steps_count: 0,
507
+ timeline: [],
508
+ next_steps: [{ tool: 'ocr_provenance_query', description: 'Query specific provenance records' }, { tool: 'ocr_trends', description: 'View processing volume trends (metric=volume)' }],
509
+ }));
510
+ }
511
+ // Sort chronologically by created_at, then by chain_depth for stable ordering
512
+ const sorted = [...records].sort((a, b) => {
513
+ const timeCompare = a.created_at.localeCompare(b.created_at);
514
+ if (timeCompare !== 0)
515
+ return timeCompare;
516
+ return a.chain_depth - b.chain_depth;
517
+ });
518
+ // Compute total processing time
519
+ let totalProcessingTimeMs = 0;
520
+ for (const r of sorted) {
521
+ if (r.processing_duration_ms !== null) {
522
+ totalProcessingTimeMs += r.processing_duration_ms;
523
+ }
524
+ }
525
+ // Build timeline entries
526
+ const timeline = sorted.map((r, index) => {
527
+ const entry = {
528
+ step: index + 1,
529
+ type: r.type,
530
+ processor: r.processor,
531
+ processor_version: r.processor_version,
532
+ duration_ms: r.processing_duration_ms,
533
+ quality_score: r.processing_quality_score,
534
+ chain_depth: r.chain_depth,
535
+ timestamp: r.created_at,
536
+ provenance_id: r.id,
537
+ parent_id: r.parent_id,
538
+ };
539
+ if (input.include_params) {
540
+ entry.processing_params = r.processing_params;
541
+ }
542
+ return entry;
543
+ });
544
+ return formatResponse(successResult({
545
+ document_id: input.document_id,
546
+ total_processing_time_ms: totalProcessingTimeMs,
547
+ steps_count: timeline.length,
548
+ timeline,
549
+ next_steps: [{ tool: 'ocr_provenance_query', description: 'Query specific provenance records' }, { tool: 'ocr_trends', description: 'View processing volume trends (metric=volume)' }],
550
+ }));
551
+ }
552
+ catch (error) {
553
+ return handleError(error);
554
+ }
555
+ }
556
+ /**
557
+ * Handle ocr_provenance_processor_stats - Aggregate statistics per processor
558
+ *
559
+ * Groups by processor and processor_version with COUNT, AVG, MIN, MAX, SUM
560
+ * aggregations on processing_duration_ms and processing_quality_score.
561
+ */
562
+ export async function handleProvenanceProcessorStats(params) {
563
+ try {
564
+ const input = validateInput(z.object({
565
+ processor: z.string().optional().describe('Filter by specific processor name'),
566
+ created_after: z.string().optional().describe('Filter records created after this ISO 8601 timestamp'),
567
+ created_before: z.string().optional().describe('Filter records created before this ISO 8601 timestamp'),
568
+ }), params);
569
+ const { db } = requireDatabase();
570
+ const stats = db.getProvenanceProcessorStats({
571
+ processor: input.processor,
572
+ created_after: input.created_after,
573
+ created_before: input.created_before,
574
+ });
575
+ return formatResponse(successResult({
576
+ stats,
577
+ total_processors: stats.length,
578
+ next_steps: [{ tool: 'ocr_report_performance', description: 'Get detailed pipeline performance analytics' }, { tool: 'ocr_provenance_query', description: 'Query provenance by processor' }],
579
+ }));
580
+ }
581
+ catch (error) {
582
+ return handleError(error);
583
+ }
584
+ }
585
+ // ═══════════════════════════════════════════════════════════════════════════════
586
+ // TOOL DEFINITIONS FOR MCP REGISTRATION
587
+ // ═══════════════════════════════════════════════════════════════════════════════
588
+ /**
589
+ * Provenance tools collection for MCP server registration
590
+ */
591
+ export const provenanceTools = {
592
+ ocr_provenance_get: {
593
+ description: '[ANALYSIS] Use to trace the complete processing history of any item (document, chunk, image, etc.). Returns provenance chain from origin to current state.',
594
+ inputSchema: {
595
+ item_id: z
596
+ .string()
597
+ .min(1)
598
+ .describe('ID of the item (document, ocr_result, chunk, embedding, image, comparison, clustering, form_fill, extraction, or provenance)'),
599
+ item_type: z
600
+ .enum([
601
+ 'document',
602
+ 'ocr_result',
603
+ 'chunk',
604
+ 'embedding',
605
+ 'image',
606
+ 'comparison',
607
+ 'clustering',
608
+ 'form_fill',
609
+ 'extraction',
610
+ 'auto',
611
+ ])
612
+ .default('auto')
613
+ .describe('Type of item'),
614
+ },
615
+ handler: handleProvenanceGet,
616
+ },
617
+ ocr_provenance_verify: {
618
+ description: '[ANALYSIS] Use to verify data integrity by checking content hashes and chain consistency. Returns verification status with any issues found.',
619
+ inputSchema: {
620
+ item_id: z.string().min(1).describe('ID of the item to verify'),
621
+ verify_content: z.boolean().default(true).describe('Verify content hashes'),
622
+ verify_chain: z.boolean().default(true).describe('Verify chain integrity'),
623
+ },
624
+ handler: handleProvenanceVerify,
625
+ },
626
+ ocr_provenance_export: {
627
+ description: '[STATUS] Use to export provenance records to JSON, W3C PROV-JSON, or CSV. Scope to a document or entire database.',
628
+ inputSchema: {
629
+ scope: z.enum(['document', 'database']).describe('Export scope'),
630
+ document_id: z.string().optional().describe('Document ID (required when scope is document)'),
631
+ format: z.enum(['json', 'w3c-prov', 'csv']).default('json').describe('Export format'),
632
+ },
633
+ handler: handleProvenanceExport,
634
+ },
635
+ ocr_provenance_query: {
636
+ description: '[ANALYSIS] Use to query provenance records with filters (processor, type, date, depth, quality). Returns paginated records for auditing.',
637
+ inputSchema: {
638
+ processor: z.string().optional().describe('Filter by processor name'),
639
+ type: z
640
+ .enum([
641
+ 'DOCUMENT',
642
+ 'OCR_RESULT',
643
+ 'FORM_FILL',
644
+ 'CHUNK',
645
+ 'IMAGE',
646
+ 'EXTRACTION',
647
+ 'COMPARISON',
648
+ 'CLUSTERING',
649
+ 'EMBEDDING',
650
+ 'VLM_DESCRIPTION',
651
+ ])
652
+ .optional()
653
+ .describe('Filter by provenance type'),
654
+ chain_depth: z.number().int().min(0).optional().describe('Filter by exact chain depth'),
655
+ created_after: z.string().optional().describe('Filter records created after this ISO 8601 timestamp'),
656
+ created_before: z.string().optional().describe('Filter records created before this ISO 8601 timestamp'),
657
+ min_quality_score: z.number().min(0).optional().describe('Minimum processing quality score'),
658
+ min_duration_ms: z.number().min(0).optional().describe('Minimum processing duration in ms'),
659
+ root_document_id: z.string().optional().describe('Filter by root document provenance ID'),
660
+ limit: z.number().int().min(1).max(100).default(50).describe('Maximum results (default 50)'),
661
+ offset: z.number().int().min(0).default(0).describe('Offset for pagination'),
662
+ order_by: z
663
+ .enum(['created_at', 'processing_duration_ms', 'processing_quality_score'])
664
+ .default('created_at')
665
+ .describe('Field to order by'),
666
+ order_dir: z.enum(['asc', 'desc']).default('desc').describe('Sort direction'),
667
+ },
668
+ handler: handleProvenanceQuery,
669
+ },
670
+ ocr_provenance_timeline: {
671
+ description: '[ANALYSIS] Use to see the complete processing timeline for a document. Returns every transformation step chronologically with durations.',
672
+ inputSchema: {
673
+ document_id: z.string().min(1).describe('Document ID to get timeline for'),
674
+ include_params: z
675
+ .boolean()
676
+ .default(false)
677
+ .describe('Include processing parameters in each step'),
678
+ },
679
+ handler: handleProvenanceTimeline,
680
+ },
681
+ ocr_provenance_processor_stats: {
682
+ description: '[STATUS] Use to get aggregate performance stats per processor (operation counts, durations, quality). Returns processor-level analytics.',
683
+ inputSchema: {
684
+ processor: z.string().optional().describe('Filter by specific processor name'),
685
+ created_after: z.string().optional().describe('Filter records created after this ISO 8601 timestamp'),
686
+ created_before: z.string().optional().describe('Filter records created before this ISO 8601 timestamp'),
687
+ },
688
+ handler: handleProvenanceProcessorStats,
689
+ },
690
+ };
691
+ //# sourceMappingURL=provenance.js.map