ocr-provenance-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocr-provenance-mcp might be problematic. Click here for more details.

Files changed (578) hide show
  1. package/.env.example +55 -0
  2. package/LICENSE +78 -0
  3. package/README.md +1154 -0
  4. package/dist/bin-http.d.ts +24 -0
  5. package/dist/bin-http.d.ts.map +1 -0
  6. package/dist/bin-http.js +275 -0
  7. package/dist/bin-http.js.map +1 -0
  8. package/dist/bin-setup.d.ts +11 -0
  9. package/dist/bin-setup.d.ts.map +1 -0
  10. package/dist/bin-setup.js +610 -0
  11. package/dist/bin-setup.js.map +1 -0
  12. package/dist/bin.d.ts +16 -0
  13. package/dist/bin.d.ts.map +1 -0
  14. package/dist/bin.js +16 -0
  15. package/dist/bin.js.map +1 -0
  16. package/dist/index.d.ts +13 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +90 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/models/chunk.d.ts +136 -0
  21. package/dist/models/chunk.d.ts.map +1 -0
  22. package/dist/models/chunk.js +27 -0
  23. package/dist/models/chunk.js.map +1 -0
  24. package/dist/models/cluster.d.ts +79 -0
  25. package/dist/models/cluster.d.ts.map +1 -0
  26. package/dist/models/cluster.js +10 -0
  27. package/dist/models/cluster.js.map +1 -0
  28. package/dist/models/comparison.d.ts +62 -0
  29. package/dist/models/comparison.d.ts.map +1 -0
  30. package/dist/models/comparison.js +8 -0
  31. package/dist/models/comparison.js.map +1 -0
  32. package/dist/models/document.d.ts +104 -0
  33. package/dist/models/document.d.ts.map +1 -0
  34. package/dist/models/document.js +15 -0
  35. package/dist/models/document.js.map +1 -0
  36. package/dist/models/embedding.d.ts +87 -0
  37. package/dist/models/embedding.d.ts.map +1 -0
  38. package/dist/models/embedding.js +23 -0
  39. package/dist/models/embedding.js.map +1 -0
  40. package/dist/models/extraction.d.ts +15 -0
  41. package/dist/models/extraction.d.ts.map +1 -0
  42. package/dist/models/extraction.js +2 -0
  43. package/dist/models/extraction.js.map +1 -0
  44. package/dist/models/form-fill.d.ts +23 -0
  45. package/dist/models/form-fill.d.ts.map +1 -0
  46. package/dist/models/form-fill.js +2 -0
  47. package/dist/models/form-fill.js.map +1 -0
  48. package/dist/models/image.d.ts +177 -0
  49. package/dist/models/image.d.ts.map +1 -0
  50. package/dist/models/image.js +8 -0
  51. package/dist/models/image.js.map +1 -0
  52. package/dist/models/index.d.ts +14 -0
  53. package/dist/models/index.d.ts.map +1 -0
  54. package/dist/models/index.js +22 -0
  55. package/dist/models/index.js.map +1 -0
  56. package/dist/models/provenance.d.ts +174 -0
  57. package/dist/models/provenance.d.ts.map +1 -0
  58. package/dist/models/provenance.js +53 -0
  59. package/dist/models/provenance.js.map +1 -0
  60. package/dist/models/uploaded-file.d.ts +20 -0
  61. package/dist/models/uploaded-file.d.ts.map +1 -0
  62. package/dist/models/uploaded-file.js +2 -0
  63. package/dist/models/uploaded-file.js.map +1 -0
  64. package/dist/server/errors.d.ts +93 -0
  65. package/dist/server/errors.d.ts.map +1 -0
  66. package/dist/server/errors.js +256 -0
  67. package/dist/server/errors.js.map +1 -0
  68. package/dist/server/events.d.ts +36 -0
  69. package/dist/server/events.d.ts.map +1 -0
  70. package/dist/server/events.js +48 -0
  71. package/dist/server/events.js.map +1 -0
  72. package/dist/server/permissions.d.ts +26 -0
  73. package/dist/server/permissions.d.ts.map +1 -0
  74. package/dist/server/permissions.js +194 -0
  75. package/dist/server/permissions.js.map +1 -0
  76. package/dist/server/register-tools.d.ts +25 -0
  77. package/dist/server/register-tools.d.ts.map +1 -0
  78. package/dist/server/register-tools.js +102 -0
  79. package/dist/server/register-tools.js.map +1 -0
  80. package/dist/server/startup.d.ts +16 -0
  81. package/dist/server/startup.d.ts.map +1 -0
  82. package/dist/server/startup.js +37 -0
  83. package/dist/server/startup.js.map +1 -0
  84. package/dist/server/state.d.ts +166 -0
  85. package/dist/server/state.d.ts.map +1 -0
  86. package/dist/server/state.js +424 -0
  87. package/dist/server/state.js.map +1 -0
  88. package/dist/server/transports/http-transport.d.ts +37 -0
  89. package/dist/server/transports/http-transport.d.ts.map +1 -0
  90. package/dist/server/transports/http-transport.js +204 -0
  91. package/dist/server/transports/http-transport.js.map +1 -0
  92. package/dist/server/transports/index.d.ts +9 -0
  93. package/dist/server/transports/index.d.ts.map +1 -0
  94. package/dist/server/transports/index.js +9 -0
  95. package/dist/server/transports/index.js.map +1 -0
  96. package/dist/server/transports/session-manager.d.ts +40 -0
  97. package/dist/server/transports/session-manager.d.ts.map +1 -0
  98. package/dist/server/transports/session-manager.js +74 -0
  99. package/dist/server/transports/session-manager.js.map +1 -0
  100. package/dist/server/types.d.ts +82 -0
  101. package/dist/server/types.d.ts.map +1 -0
  102. package/dist/server/types.js +14 -0
  103. package/dist/server/types.js.map +1 -0
  104. package/dist/services/audit.d.ts +26 -0
  105. package/dist/services/audit.d.ts.map +1 -0
  106. package/dist/services/audit.js +43 -0
  107. package/dist/services/audit.js.map +1 -0
  108. package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
  109. package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
  110. package/dist/services/chunking/chunk-deduplicator.js +46 -0
  111. package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
  112. package/dist/services/chunking/chunk-merger.d.ts +26 -0
  113. package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
  114. package/dist/services/chunking/chunk-merger.js +94 -0
  115. package/dist/services/chunking/chunk-merger.js.map +1 -0
  116. package/dist/services/chunking/chunker.d.ts +62 -0
  117. package/dist/services/chunking/chunker.d.ts.map +1 -0
  118. package/dist/services/chunking/chunker.js +566 -0
  119. package/dist/services/chunking/chunker.js.map +1 -0
  120. package/dist/services/chunking/heading-normalizer.d.ts +33 -0
  121. package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
  122. package/dist/services/chunking/heading-normalizer.js +101 -0
  123. package/dist/services/chunking/heading-normalizer.js.map +1 -0
  124. package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
  125. package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
  126. package/dist/services/chunking/json-block-analyzer.js +1033 -0
  127. package/dist/services/chunking/json-block-analyzer.js.map +1 -0
  128. package/dist/services/chunking/markdown-parser.d.ts +75 -0
  129. package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
  130. package/dist/services/chunking/markdown-parser.js +428 -0
  131. package/dist/services/chunking/markdown-parser.js.map +1 -0
  132. package/dist/services/chunking/text-normalizer.d.ts +20 -0
  133. package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
  134. package/dist/services/chunking/text-normalizer.js +36 -0
  135. package/dist/services/chunking/text-normalizer.js.map +1 -0
  136. package/dist/services/clm/contract-schemas.d.ts +36 -0
  137. package/dist/services/clm/contract-schemas.d.ts.map +1 -0
  138. package/dist/services/clm/contract-schemas.js +92 -0
  139. package/dist/services/clm/contract-schemas.js.map +1 -0
  140. package/dist/services/clm/summarization.d.ts +46 -0
  141. package/dist/services/clm/summarization.d.ts.map +1 -0
  142. package/dist/services/clm/summarization.js +61 -0
  143. package/dist/services/clm/summarization.js.map +1 -0
  144. package/dist/services/clustering/clustering-service.d.ts +58 -0
  145. package/dist/services/clustering/clustering-service.d.ts.map +1 -0
  146. package/dist/services/clustering/clustering-service.js +467 -0
  147. package/dist/services/clustering/clustering-service.js.map +1 -0
  148. package/dist/services/comparison/diff-service.d.ts +41 -0
  149. package/dist/services/comparison/diff-service.d.ts.map +1 -0
  150. package/dist/services/comparison/diff-service.js +120 -0
  151. package/dist/services/comparison/diff-service.js.map +1 -0
  152. package/dist/services/embedding/embedder.d.ts +55 -0
  153. package/dist/services/embedding/embedder.d.ts.map +1 -0
  154. package/dist/services/embedding/embedder.js +202 -0
  155. package/dist/services/embedding/embedder.js.map +1 -0
  156. package/dist/services/embedding/nomic.d.ts +67 -0
  157. package/dist/services/embedding/nomic.d.ts.map +1 -0
  158. package/dist/services/embedding/nomic.js +280 -0
  159. package/dist/services/embedding/nomic.js.map +1 -0
  160. package/dist/services/gemini/circuit-breaker.d.ts +106 -0
  161. package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
  162. package/dist/services/gemini/circuit-breaker.js +237 -0
  163. package/dist/services/gemini/circuit-breaker.js.map +1 -0
  164. package/dist/services/gemini/client.d.ts +173 -0
  165. package/dist/services/gemini/client.d.ts.map +1 -0
  166. package/dist/services/gemini/client.js +483 -0
  167. package/dist/services/gemini/client.js.map +1 -0
  168. package/dist/services/gemini/config.d.ts +116 -0
  169. package/dist/services/gemini/config.d.ts.map +1 -0
  170. package/dist/services/gemini/config.js +118 -0
  171. package/dist/services/gemini/config.js.map +1 -0
  172. package/dist/services/gemini/index.d.ts +9 -0
  173. package/dist/services/gemini/index.d.ts.map +1 -0
  174. package/dist/services/gemini/index.js +13 -0
  175. package/dist/services/gemini/index.js.map +1 -0
  176. package/dist/services/gemini/rate-limiter.d.ts +62 -0
  177. package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
  178. package/dist/services/gemini/rate-limiter.js +120 -0
  179. package/dist/services/gemini/rate-limiter.js.map +1 -0
  180. package/dist/services/images/extractor.d.ts +88 -0
  181. package/dist/services/images/extractor.d.ts.map +1 -0
  182. package/dist/services/images/extractor.js +340 -0
  183. package/dist/services/images/extractor.js.map +1 -0
  184. package/dist/services/images/optimizer.d.ts +130 -0
  185. package/dist/services/images/optimizer.d.ts.map +1 -0
  186. package/dist/services/images/optimizer.js +228 -0
  187. package/dist/services/images/optimizer.js.map +1 -0
  188. package/dist/services/ocr/datalab.d.ts +64 -0
  189. package/dist/services/ocr/datalab.d.ts.map +1 -0
  190. package/dist/services/ocr/datalab.js +425 -0
  191. package/dist/services/ocr/datalab.js.map +1 -0
  192. package/dist/services/ocr/errors.d.ts +38 -0
  193. package/dist/services/ocr/errors.d.ts.map +1 -0
  194. package/dist/services/ocr/errors.js +83 -0
  195. package/dist/services/ocr/errors.js.map +1 -0
  196. package/dist/services/ocr/file-manager.d.ts +76 -0
  197. package/dist/services/ocr/file-manager.d.ts.map +1 -0
  198. package/dist/services/ocr/file-manager.js +238 -0
  199. package/dist/services/ocr/file-manager.js.map +1 -0
  200. package/dist/services/ocr/form-fill.d.ts +48 -0
  201. package/dist/services/ocr/form-fill.d.ts.map +1 -0
  202. package/dist/services/ocr/form-fill.js +213 -0
  203. package/dist/services/ocr/form-fill.js.map +1 -0
  204. package/dist/services/ocr/processor.d.ts +95 -0
  205. package/dist/services/ocr/processor.d.ts.map +1 -0
  206. package/dist/services/ocr/processor.js +259 -0
  207. package/dist/services/ocr/processor.js.map +1 -0
  208. package/dist/services/provenance/agent-metadata.d.ts +82 -0
  209. package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
  210. package/dist/services/provenance/agent-metadata.js +106 -0
  211. package/dist/services/provenance/agent-metadata.js.map +1 -0
  212. package/dist/services/provenance/chain-hash.d.ts +57 -0
  213. package/dist/services/provenance/chain-hash.d.ts.map +1 -0
  214. package/dist/services/provenance/chain-hash.js +131 -0
  215. package/dist/services/provenance/chain-hash.js.map +1 -0
  216. package/dist/services/provenance/exporter.d.ts +202 -0
  217. package/dist/services/provenance/exporter.d.ts.map +1 -0
  218. package/dist/services/provenance/exporter.js +457 -0
  219. package/dist/services/provenance/exporter.js.map +1 -0
  220. package/dist/services/provenance/index.d.ts +15 -0
  221. package/dist/services/provenance/index.d.ts.map +1 -0
  222. package/dist/services/provenance/index.js +17 -0
  223. package/dist/services/provenance/index.js.map +1 -0
  224. package/dist/services/provenance/tracker.d.ts +138 -0
  225. package/dist/services/provenance/tracker.d.ts.map +1 -0
  226. package/dist/services/provenance/tracker.js +293 -0
  227. package/dist/services/provenance/tracker.js.map +1 -0
  228. package/dist/services/provenance/verifier.d.ts +153 -0
  229. package/dist/services/provenance/verifier.d.ts.map +1 -0
  230. package/dist/services/provenance/verifier.js +536 -0
  231. package/dist/services/provenance/verifier.js.map +1 -0
  232. package/dist/services/python-pool.d.ts +70 -0
  233. package/dist/services/python-pool.d.ts.map +1 -0
  234. package/dist/services/python-pool.js +265 -0
  235. package/dist/services/python-pool.js.map +1 -0
  236. package/dist/services/search/bm25.d.ts +180 -0
  237. package/dist/services/search/bm25.d.ts.map +1 -0
  238. package/dist/services/search/bm25.js +656 -0
  239. package/dist/services/search/bm25.js.map +1 -0
  240. package/dist/services/search/fusion.d.ts +103 -0
  241. package/dist/services/search/fusion.d.ts.map +1 -0
  242. package/dist/services/search/fusion.js +122 -0
  243. package/dist/services/search/fusion.js.map +1 -0
  244. package/dist/services/search/local-reranker.d.ts +30 -0
  245. package/dist/services/search/local-reranker.d.ts.map +1 -0
  246. package/dist/services/search/local-reranker.js +123 -0
  247. package/dist/services/search/local-reranker.js.map +1 -0
  248. package/dist/services/search/quality.d.ts +11 -0
  249. package/dist/services/search/quality.d.ts.map +1 -0
  250. package/dist/services/search/quality.js +17 -0
  251. package/dist/services/search/quality.js.map +1 -0
  252. package/dist/services/search/query-classifier.d.ts +34 -0
  253. package/dist/services/search/query-classifier.d.ts.map +1 -0
  254. package/dist/services/search/query-classifier.js +114 -0
  255. package/dist/services/search/query-classifier.js.map +1 -0
  256. package/dist/services/search/query-expander.d.ts +73 -0
  257. package/dist/services/search/query-expander.d.ts.map +1 -0
  258. package/dist/services/search/query-expander.js +281 -0
  259. package/dist/services/search/query-expander.js.map +1 -0
  260. package/dist/services/search/reranker.d.ts +44 -0
  261. package/dist/services/search/reranker.d.ts.map +1 -0
  262. package/dist/services/search/reranker.js +101 -0
  263. package/dist/services/search/reranker.js.map +1 -0
  264. package/dist/services/storage/database/annotation-operations.d.ts +113 -0
  265. package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
  266. package/dist/services/storage/database/annotation-operations.js +177 -0
  267. package/dist/services/storage/database/annotation-operations.js.map +1 -0
  268. package/dist/services/storage/database/approval-operations.d.ts +132 -0
  269. package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
  270. package/dist/services/storage/database/approval-operations.js +206 -0
  271. package/dist/services/storage/database/approval-operations.js.map +1 -0
  272. package/dist/services/storage/database/chunk-operations.d.ts +132 -0
  273. package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
  274. package/dist/services/storage/database/chunk-operations.js +306 -0
  275. package/dist/services/storage/database/chunk-operations.js.map +1 -0
  276. package/dist/services/storage/database/cluster-operations.d.ts +97 -0
  277. package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
  278. package/dist/services/storage/database/cluster-operations.js +258 -0
  279. package/dist/services/storage/database/cluster-operations.js.map +1 -0
  280. package/dist/services/storage/database/comparison-operations.d.ts +41 -0
  281. package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
  282. package/dist/services/storage/database/comparison-operations.js +65 -0
  283. package/dist/services/storage/database/comparison-operations.js.map +1 -0
  284. package/dist/services/storage/database/converters.d.ts +36 -0
  285. package/dist/services/storage/database/converters.d.ts.map +1 -0
  286. package/dist/services/storage/database/converters.js +244 -0
  287. package/dist/services/storage/database/converters.js.map +1 -0
  288. package/dist/services/storage/database/document-operations.d.ts +145 -0
  289. package/dist/services/storage/database/document-operations.d.ts.map +1 -0
  290. package/dist/services/storage/database/document-operations.js +498 -0
  291. package/dist/services/storage/database/document-operations.js.map +1 -0
  292. package/dist/services/storage/database/embedding-operations.d.ts +130 -0
  293. package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
  294. package/dist/services/storage/database/embedding-operations.js +315 -0
  295. package/dist/services/storage/database/embedding-operations.js.map +1 -0
  296. package/dist/services/storage/database/extraction-operations.d.ts +47 -0
  297. package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
  298. package/dist/services/storage/database/extraction-operations.js +85 -0
  299. package/dist/services/storage/database/extraction-operations.js.map +1 -0
  300. package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
  301. package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
  302. package/dist/services/storage/database/form-fill-operations.js +116 -0
  303. package/dist/services/storage/database/form-fill-operations.js.map +1 -0
  304. package/dist/services/storage/database/helpers.d.ts +29 -0
  305. package/dist/services/storage/database/helpers.d.ts.map +1 -0
  306. package/dist/services/storage/database/helpers.js +55 -0
  307. package/dist/services/storage/database/helpers.js.map +1 -0
  308. package/dist/services/storage/database/image-operations.d.ts +202 -0
  309. package/dist/services/storage/database/image-operations.d.ts.map +1 -0
  310. package/dist/services/storage/database/image-operations.js +484 -0
  311. package/dist/services/storage/database/image-operations.js.map +1 -0
  312. package/dist/services/storage/database/index.d.ts +13 -0
  313. package/dist/services/storage/database/index.d.ts.map +1 -0
  314. package/dist/services/storage/database/index.js +16 -0
  315. package/dist/services/storage/database/index.js.map +1 -0
  316. package/dist/services/storage/database/lock-operations.d.ts +59 -0
  317. package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
  318. package/dist/services/storage/database/lock-operations.js +89 -0
  319. package/dist/services/storage/database/lock-operations.js.map +1 -0
  320. package/dist/services/storage/database/obligation-operations.d.ts +88 -0
  321. package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
  322. package/dist/services/storage/database/obligation-operations.js +206 -0
  323. package/dist/services/storage/database/obligation-operations.js.map +1 -0
  324. package/dist/services/storage/database/ocr-operations.d.ts +33 -0
  325. package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
  326. package/dist/services/storage/database/ocr-operations.js +70 -0
  327. package/dist/services/storage/database/ocr-operations.js.map +1 -0
  328. package/dist/services/storage/database/playbook-operations.d.ts +72 -0
  329. package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
  330. package/dist/services/storage/database/playbook-operations.js +247 -0
  331. package/dist/services/storage/database/playbook-operations.js.map +1 -0
  332. package/dist/services/storage/database/provenance-operations.d.ts +112 -0
  333. package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
  334. package/dist/services/storage/database/provenance-operations.js +251 -0
  335. package/dist/services/storage/database/provenance-operations.js.map +1 -0
  336. package/dist/services/storage/database/service.d.ts +142 -0
  337. package/dist/services/storage/database/service.d.ts.map +1 -0
  338. package/dist/services/storage/database/service.js +310 -0
  339. package/dist/services/storage/database/service.js.map +1 -0
  340. package/dist/services/storage/database/static-operations.d.ts +30 -0
  341. package/dist/services/storage/database/static-operations.d.ts.map +1 -0
  342. package/dist/services/storage/database/static-operations.js +218 -0
  343. package/dist/services/storage/database/static-operations.js.map +1 -0
  344. package/dist/services/storage/database/stats-operations.d.ts +101 -0
  345. package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
  346. package/dist/services/storage/database/stats-operations.js +394 -0
  347. package/dist/services/storage/database/stats-operations.js.map +1 -0
  348. package/dist/services/storage/database/tag-operations.d.ts +76 -0
  349. package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
  350. package/dist/services/storage/database/tag-operations.js +178 -0
  351. package/dist/services/storage/database/tag-operations.js.map +1 -0
  352. package/dist/services/storage/database/types.d.ts +286 -0
  353. package/dist/services/storage/database/types.d.ts.map +1 -0
  354. package/dist/services/storage/database/types.js +39 -0
  355. package/dist/services/storage/database/types.js.map +1 -0
  356. package/dist/services/storage/database/upload-operations.d.ts +71 -0
  357. package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
  358. package/dist/services/storage/database/upload-operations.js +124 -0
  359. package/dist/services/storage/database/upload-operations.js.map +1 -0
  360. package/dist/services/storage/database/user-operations.d.ts +102 -0
  361. package/dist/services/storage/database/user-operations.d.ts.map +1 -0
  362. package/dist/services/storage/database/user-operations.js +151 -0
  363. package/dist/services/storage/database/user-operations.js.map +1 -0
  364. package/dist/services/storage/database/workflow-operations.d.ts +98 -0
  365. package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
  366. package/dist/services/storage/database/workflow-operations.js +157 -0
  367. package/dist/services/storage/database/workflow-operations.js.map +1 -0
  368. package/dist/services/storage/database.d.ts +16 -0
  369. package/dist/services/storage/database.d.ts.map +1 -0
  370. package/dist/services/storage/database.js +15 -0
  371. package/dist/services/storage/database.js.map +1 -0
  372. package/dist/services/storage/index.d.ts +10 -0
  373. package/dist/services/storage/index.d.ts.map +1 -0
  374. package/dist/services/storage/index.js +10 -0
  375. package/dist/services/storage/index.js.map +1 -0
  376. package/dist/services/storage/migrations/index.d.ts +16 -0
  377. package/dist/services/storage/migrations/index.d.ts.map +1 -0
  378. package/dist/services/storage/migrations/index.js +20 -0
  379. package/dist/services/storage/migrations/index.js.map +1 -0
  380. package/dist/services/storage/migrations/operations.d.ts +40 -0
  381. package/dist/services/storage/migrations/operations.d.ts.map +1 -0
  382. package/dist/services/storage/migrations/operations.js +2910 -0
  383. package/dist/services/storage/migrations/operations.js.map +1 -0
  384. package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
  385. package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
  386. package/dist/services/storage/migrations/schema-definitions.js +1006 -0
  387. package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
  388. package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
  389. package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
  390. package/dist/services/storage/migrations/schema-helpers.js +176 -0
  391. package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
  392. package/dist/services/storage/migrations/types.d.ts +15 -0
  393. package/dist/services/storage/migrations/types.d.ts.map +1 -0
  394. package/dist/services/storage/migrations/types.js +21 -0
  395. package/dist/services/storage/migrations/types.js.map +1 -0
  396. package/dist/services/storage/migrations/verification.d.ts +20 -0
  397. package/dist/services/storage/migrations/verification.d.ts.map +1 -0
  398. package/dist/services/storage/migrations/verification.js +78 -0
  399. package/dist/services/storage/migrations/verification.js.map +1 -0
  400. package/dist/services/storage/migrations.d.ts +16 -0
  401. package/dist/services/storage/migrations.d.ts.map +1 -0
  402. package/dist/services/storage/migrations.js +17 -0
  403. package/dist/services/storage/migrations.js.map +1 -0
  404. package/dist/services/storage/types.d.ts +12 -0
  405. package/dist/services/storage/types.d.ts.map +1 -0
  406. package/dist/services/storage/types.js +5 -0
  407. package/dist/services/storage/types.js.map +1 -0
  408. package/dist/services/storage/vector.d.ts +208 -0
  409. package/dist/services/storage/vector.d.ts.map +1 -0
  410. package/dist/services/storage/vector.js +526 -0
  411. package/dist/services/storage/vector.js.map +1 -0
  412. package/dist/services/vlm/pipeline.d.ts +194 -0
  413. package/dist/services/vlm/pipeline.d.ts.map +1 -0
  414. package/dist/services/vlm/pipeline.js +800 -0
  415. package/dist/services/vlm/pipeline.js.map +1 -0
  416. package/dist/services/vlm/prompts.d.ts +171 -0
  417. package/dist/services/vlm/prompts.d.ts.map +1 -0
  418. package/dist/services/vlm/prompts.js +229 -0
  419. package/dist/services/vlm/prompts.js.map +1 -0
  420. package/dist/services/vlm/service.d.ts +174 -0
  421. package/dist/services/vlm/service.d.ts.map +1 -0
  422. package/dist/services/vlm/service.js +256 -0
  423. package/dist/services/vlm/service.js.map +1 -0
  424. package/dist/services/webhook-delivery.d.ts +4 -0
  425. package/dist/services/webhook-delivery.d.ts.map +1 -0
  426. package/dist/services/webhook-delivery.js +140 -0
  427. package/dist/services/webhook-delivery.js.map +1 -0
  428. package/dist/tools/chunks.d.ts +19 -0
  429. package/dist/tools/chunks.d.ts.map +1 -0
  430. package/dist/tools/chunks.js +392 -0
  431. package/dist/tools/chunks.js.map +1 -0
  432. package/dist/tools/clm.d.ts +16 -0
  433. package/dist/tools/clm.d.ts.map +1 -0
  434. package/dist/tools/clm.js +668 -0
  435. package/dist/tools/clm.js.map +1 -0
  436. package/dist/tools/clustering.d.ts +13 -0
  437. package/dist/tools/clustering.d.ts.map +1 -0
  438. package/dist/tools/clustering.js +498 -0
  439. package/dist/tools/clustering.js.map +1 -0
  440. package/dist/tools/collaboration.d.ts +15 -0
  441. package/dist/tools/collaboration.d.ts.map +1 -0
  442. package/dist/tools/collaboration.js +516 -0
  443. package/dist/tools/collaboration.js.map +1 -0
  444. package/dist/tools/comparison.d.ts +13 -0
  445. package/dist/tools/comparison.d.ts.map +1 -0
  446. package/dist/tools/comparison.js +735 -0
  447. package/dist/tools/comparison.js.map +1 -0
  448. package/dist/tools/compliance.d.ts +15 -0
  449. package/dist/tools/compliance.d.ts.map +1 -0
  450. package/dist/tools/compliance.js +640 -0
  451. package/dist/tools/compliance.js.map +1 -0
  452. package/dist/tools/config.d.ts +19 -0
  453. package/dist/tools/config.d.ts.map +1 -0
  454. package/dist/tools/config.js +213 -0
  455. package/dist/tools/config.js.map +1 -0
  456. package/dist/tools/database.d.ts +62 -0
  457. package/dist/tools/database.d.ts.map +1 -0
  458. package/dist/tools/database.js +288 -0
  459. package/dist/tools/database.js.map +1 -0
  460. package/dist/tools/documents.d.ts +61 -0
  461. package/dist/tools/documents.d.ts.map +1 -0
  462. package/dist/tools/documents.js +1624 -0
  463. package/dist/tools/documents.js.map +1 -0
  464. package/dist/tools/embeddings.d.ts +14 -0
  465. package/dist/tools/embeddings.d.ts.map +1 -0
  466. package/dist/tools/embeddings.js +626 -0
  467. package/dist/tools/embeddings.js.map +1 -0
  468. package/dist/tools/evaluation.d.ts +25 -0
  469. package/dist/tools/evaluation.d.ts.map +1 -0
  470. package/dist/tools/evaluation.js +523 -0
  471. package/dist/tools/evaluation.js.map +1 -0
  472. package/dist/tools/events.d.ts +16 -0
  473. package/dist/tools/events.d.ts.map +1 -0
  474. package/dist/tools/events.js +493 -0
  475. package/dist/tools/events.js.map +1 -0
  476. package/dist/tools/extraction-structured.d.ts +13 -0
  477. package/dist/tools/extraction-structured.d.ts.map +1 -0
  478. package/dist/tools/extraction-structured.js +390 -0
  479. package/dist/tools/extraction-structured.js.map +1 -0
  480. package/dist/tools/extraction.d.ts +24 -0
  481. package/dist/tools/extraction.d.ts.map +1 -0
  482. package/dist/tools/extraction.js +424 -0
  483. package/dist/tools/extraction.js.map +1 -0
  484. package/dist/tools/file-management.d.ts +14 -0
  485. package/dist/tools/file-management.d.ts.map +1 -0
  486. package/dist/tools/file-management.js +523 -0
  487. package/dist/tools/file-management.js.map +1 -0
  488. package/dist/tools/form-fill.d.ts +13 -0
  489. package/dist/tools/form-fill.d.ts.map +1 -0
  490. package/dist/tools/form-fill.js +250 -0
  491. package/dist/tools/form-fill.js.map +1 -0
  492. package/dist/tools/health.d.ts +19 -0
  493. package/dist/tools/health.d.ts.map +1 -0
  494. package/dist/tools/health.js +229 -0
  495. package/dist/tools/health.js.map +1 -0
  496. package/dist/tools/images.d.ts +54 -0
  497. package/dist/tools/images.d.ts.map +1 -0
  498. package/dist/tools/images.js +787 -0
  499. package/dist/tools/images.js.map +1 -0
  500. package/dist/tools/ingestion.d.ts +94 -0
  501. package/dist/tools/ingestion.d.ts.map +1 -0
  502. package/dist/tools/ingestion.js +1659 -0
  503. package/dist/tools/ingestion.js.map +1 -0
  504. package/dist/tools/intelligence.d.ts +18 -0
  505. package/dist/tools/intelligence.d.ts.map +1 -0
  506. package/dist/tools/intelligence.js +1039 -0
  507. package/dist/tools/intelligence.js.map +1 -0
  508. package/dist/tools/provenance.d.ts +51 -0
  509. package/dist/tools/provenance.d.ts.map +1 -0
  510. package/dist/tools/provenance.js +691 -0
  511. package/dist/tools/provenance.js.map +1 -0
  512. package/dist/tools/reports.d.ts +41 -0
  513. package/dist/tools/reports.d.ts.map +1 -0
  514. package/dist/tools/reports.js +1394 -0
  515. package/dist/tools/reports.js.map +1 -0
  516. package/dist/tools/search.d.ts +35 -0
  517. package/dist/tools/search.d.ts.map +1 -0
  518. package/dist/tools/search.js +2528 -0
  519. package/dist/tools/search.js.map +1 -0
  520. package/dist/tools/shared.d.ts +52 -0
  521. package/dist/tools/shared.d.ts.map +1 -0
  522. package/dist/tools/shared.js +54 -0
  523. package/dist/tools/shared.js.map +1 -0
  524. package/dist/tools/tags.d.ts +15 -0
  525. package/dist/tools/tags.d.ts.map +1 -0
  526. package/dist/tools/tags.js +287 -0
  527. package/dist/tools/tags.js.map +1 -0
  528. package/dist/tools/timeline.d.ts +15 -0
  529. package/dist/tools/timeline.d.ts.map +1 -0
  530. package/dist/tools/timeline.js +14 -0
  531. package/dist/tools/timeline.js.map +1 -0
  532. package/dist/tools/users.d.ts +14 -0
  533. package/dist/tools/users.d.ts.map +1 -0
  534. package/dist/tools/users.js +257 -0
  535. package/dist/tools/users.js.map +1 -0
  536. package/dist/tools/vlm.d.ts +40 -0
  537. package/dist/tools/vlm.d.ts.map +1 -0
  538. package/dist/tools/vlm.js +475 -0
  539. package/dist/tools/vlm.js.map +1 -0
  540. package/dist/tools/workflow.d.ts +16 -0
  541. package/dist/tools/workflow.d.ts.map +1 -0
  542. package/dist/tools/workflow.js +495 -0
  543. package/dist/tools/workflow.js.map +1 -0
  544. package/dist/utils/backoff.d.ts +53 -0
  545. package/dist/utils/backoff.d.ts.map +1 -0
  546. package/dist/utils/backoff.js +78 -0
  547. package/dist/utils/backoff.js.map +1 -0
  548. package/dist/utils/config-persistence.d.ts +33 -0
  549. package/dist/utils/config-persistence.d.ts.map +1 -0
  550. package/dist/utils/config-persistence.js +61 -0
  551. package/dist/utils/config-persistence.js.map +1 -0
  552. package/dist/utils/hash.d.ts +65 -0
  553. package/dist/utils/hash.d.ts.map +1 -0
  554. package/dist/utils/hash.js +146 -0
  555. package/dist/utils/hash.js.map +1 -0
  556. package/dist/utils/math.d.ts +21 -0
  557. package/dist/utils/math.d.ts.map +1 -0
  558. package/dist/utils/math.js +39 -0
  559. package/dist/utils/math.js.map +1 -0
  560. package/dist/utils/validation.d.ts +697 -0
  561. package/dist/utils/validation.d.ts.map +1 -0
  562. package/dist/utils/validation.js +529 -0
  563. package/dist/utils/validation.js.map +1 -0
  564. package/package.json +96 -0
  565. package/python/.gitkeep +0 -0
  566. package/python/__init__.py +104 -0
  567. package/python/clustering_worker.py +440 -0
  568. package/python/docx_image_extractor.py +524 -0
  569. package/python/embedding_worker.py +552 -0
  570. package/python/file_manager_worker.py +564 -0
  571. package/python/form_fill_worker.py +399 -0
  572. package/python/gpu_utils.py +582 -0
  573. package/python/image_extractor.py +317 -0
  574. package/python/image_optimizer.py +444 -0
  575. package/python/ocr_worker.py +712 -0
  576. package/python/pyproject.toml +76 -0
  577. package/python/requirements.txt +51 -0
  578. package/python/reranker_worker.py +87 -0
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Embedding interfaces for OCR Provenance MCP System
3
+ *
4
+ * Represents vector embeddings with full provenance.
5
+ * Provenance depth: 3
6
+ *
7
+ * CRITICAL: This interface is denormalized to include original_text
8
+ * and source file info. Search results are self-contained per CP-002.
9
+ */
10
+ /**
11
+ * Embedding model constants
12
+ */
13
+ export declare const EMBEDDING_MODEL: {
14
+ readonly name: "nomic-embed-text-v1.5";
15
+ readonly version: "1.5.0";
16
+ readonly dimensions: 768;
17
+ readonly maxSequenceLength: 8192;
18
+ readonly prefixes: {
19
+ readonly document: "search_document: ";
20
+ readonly query: "search_query: ";
21
+ };
22
+ };
23
+ /**
24
+ * Represents a vector embedding with full provenance
25
+ * Provenance depth: 3 (from chunks) or 4 (from VLM descriptions)
26
+ *
27
+ * CRITICAL: This table is denormalized to include original_text
28
+ * and source file info. Search results are self-contained.
29
+ *
30
+ * Either chunk_id (for text embeddings), image_id (for VLM embeddings),
31
+ * or extraction_id (for extraction embeddings) must be set.
32
+ */
33
+ export interface Embedding {
34
+ /** UUID v4 identifier */
35
+ id: string;
36
+ /** Reference to source chunk (null for VLM embeddings) */
37
+ chunk_id: string | null;
38
+ /** Reference to source image (null for text embeddings) */
39
+ image_id: string | null;
40
+ /** Reference to source extraction (null for text/VLM embeddings) */
41
+ extraction_id: string | null;
42
+ /** Reference to root document */
43
+ document_id: string;
44
+ /** The actual chunk text that was embedded - ALWAYS INCLUDED */
45
+ original_text: string;
46
+ /** Length of original text in characters */
47
+ original_text_length: number;
48
+ /** Full path to source file */
49
+ source_file_path: string;
50
+ /** Source file name */
51
+ source_file_name: string;
52
+ /** SHA-256 hash of source file */
53
+ source_file_hash: string;
54
+ /** Page number (1-indexed) */
55
+ page_number: number | null;
56
+ /** Page range if spanning multiple pages */
57
+ page_range: string | null;
58
+ /** Start character offset in OCR text */
59
+ character_start: number;
60
+ /** End character offset in OCR text */
61
+ character_end: number;
62
+ /** Chunk index in document */
63
+ chunk_index: number;
64
+ /** Total chunks in document */
65
+ total_chunks: number;
66
+ /** 768-dimensional float32 vector */
67
+ vector: Float32Array;
68
+ /** Model name (nomic-embed-text-v1.5) */
69
+ model_name: string;
70
+ /** Model version */
71
+ model_version: string;
72
+ /** Task type used (search_document) */
73
+ task_type: 'search_document' | 'search_query';
74
+ /** Inference mode (always 'local') */
75
+ inference_mode: 'local';
76
+ /** GPU device used */
77
+ gpu_device: string;
78
+ /** Reference to provenance record */
79
+ provenance_id: string;
80
+ /** SHA-256 hash of original_text */
81
+ content_hash: string;
82
+ /** ISO 8601 timestamp */
83
+ created_at: string;
84
+ /** Generation duration in milliseconds */
85
+ generation_duration_ms: number | null;
86
+ }
87
+ //# sourceMappingURL=embedding.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedding.d.ts","sourceRoot":"","sources":["../../src/models/embedding.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH;;GAEG;AACH,eAAO,MAAM,eAAe;;;;;;;;;CASlB,CAAC;AAEX;;;;;;;;;GASG;AACH,MAAM,WAAW,SAAS;IACxB,yBAAyB;IACzB,EAAE,EAAE,MAAM,CAAC;IAEX,0DAA0D;IAC1D,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IAExB,2DAA2D;IAC3D,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IAExB,oEAAoE;IACpE,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAE7B,iCAAiC;IACjC,WAAW,EAAE,MAAM,CAAC;IAGpB,gEAAgE;IAChE,aAAa,EAAE,MAAM,CAAC;IAEtB,4CAA4C;IAC5C,oBAAoB,EAAE,MAAM,CAAC;IAG7B,+BAA+B;IAC/B,gBAAgB,EAAE,MAAM,CAAC;IAEzB,uBAAuB;IACvB,gBAAgB,EAAE,MAAM,CAAC;IAEzB,kCAAkC;IAClC,gBAAgB,EAAE,MAAM,CAAC;IAGzB,8BAA8B;IAC9B,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAE3B,4CAA4C;IAC5C,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAE1B,yCAAyC;IACzC,eAAe,EAAE,MAAM,CAAC;IAExB,uCAAuC;IACvC,aAAa,EAAE,MAAM,CAAC;IAEtB,8BAA8B;IAC9B,WAAW,EAAE,MAAM,CAAC;IAEpB,+BAA+B;IAC/B,YAAY,EAAE,MAAM,CAAC;IAGrB,qCAAqC;IACrC,MAAM,EAAE,YAAY,CAAC;IAGrB,yCAAyC;IACzC,UAAU,EAAE,MAAM,CAAC;IAEnB,oBAAoB;IACpB,aAAa,EAAE,MAAM,CAAC;IAEtB,uCAAuC;IACvC,SAAS,EAAE,iBAAiB,GAAG,cAAc,CAAC;IAE9C,sCAAsC;IACtC,cAAc,EAAE,OAAO,CAAC;IAExB,sBAAsB;IACtB,UAAU,EAAE,MAAM,CAAC;IAGnB,qCAAqC;IACrC,aAAa,EAAE,MAAM,CAAC;IAEtB,oCAAoC;IACpC,YAAY,EAAE,MAAM,CAAC;IAErB,yBAAyB;IACzB,UAAU,EAAE,MAAM,CAAC;IAEnB,0CAA0C;IAC1C,sBAAsB,EAAE,MAAM,GAAG,IAAI,CAAC;CACvC"}
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Embedding interfaces for OCR Provenance MCP System
3
+ *
4
+ * Represents vector embeddings with full provenance.
5
+ * Provenance depth: 3
6
+ *
7
+ * CRITICAL: This interface is denormalized to include original_text
8
+ * and source file info. Search results are self-contained per CP-002.
9
+ */
10
+ /**
11
+ * Embedding model constants
12
+ */
13
+ export const EMBEDDING_MODEL = {
14
+ name: 'nomic-embed-text-v1.5',
15
+ version: '1.5.0',
16
+ dimensions: 768,
17
+ maxSequenceLength: 8192,
18
+ prefixes: {
19
+ document: 'search_document: ',
20
+ query: 'search_query: '
21
+ }
22
+ };
23
+ //# sourceMappingURL=embedding.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedding.js","sourceRoot":"","sources":["../../src/models/embedding.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH;;GAEG;AACH,MAAM,CAAC,MAAM,eAAe,GAAG;IAC7B,IAAI,EAAE,uBAAuB;IAC7B,OAAO,EAAE,OAAO;IAChB,UAAU,EAAE,GAAG;IACf,iBAAiB,EAAE,IAAI;IACvB,QAAQ,EAAE;QACR,QAAQ,EAAE,mBAAmB;QAC7B,KAAK,EAAE,gBAAgB;KACxB;CACO,CAAC"}
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Extraction interface for structured data extracted via page_schema
3
+ * Provenance depth: 2 (parallel to CHUNK)
4
+ */
5
+ export interface Extraction {
6
+ id: string;
7
+ document_id: string;
8
+ ocr_result_id: string;
9
+ schema_json: string;
10
+ extraction_json: string;
11
+ content_hash: string;
12
+ provenance_id: string;
13
+ created_at: string;
14
+ }
15
+ //# sourceMappingURL=extraction.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extraction.d.ts","sourceRoot":"","sources":["../../src/models/extraction.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,WAAW,UAAU;IACzB,EAAE,EAAE,MAAM,CAAC;IACX,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,EAAE,MAAM,CAAC;IACxB,YAAY,EAAE,MAAM,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACpB"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=extraction.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extraction.js","sourceRoot":"","sources":["../../src/models/extraction.ts"],"names":[],"mappings":""}
@@ -0,0 +1,23 @@
1
+ /**
2
+ * FormFill interface for form fill results from Datalab /fill API
3
+ * Provenance depth: 1
4
+ */
5
+ export interface FormFill {
6
+ id: string;
7
+ source_file_path: string;
8
+ source_file_hash: string;
9
+ field_data_json: string;
10
+ context: string | null;
11
+ confidence_threshold: number;
12
+ output_file_path: string | null;
13
+ output_base64: string | null;
14
+ fields_filled: string;
15
+ fields_not_found: string;
16
+ page_count: number | null;
17
+ cost_cents: number | null;
18
+ status: 'pending' | 'processing' | 'complete' | 'failed';
19
+ error_message: string | null;
20
+ provenance_id: string;
21
+ created_at: string;
22
+ }
23
+ //# sourceMappingURL=form-fill.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"form-fill.d.ts","sourceRoot":"","sources":["../../src/models/form-fill.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,gBAAgB,EAAE,MAAM,CAAC;IACzB,gBAAgB,EAAE,MAAM,CAAC;IACzB,eAAe,EAAE,MAAM,CAAC;IACxB,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,aAAa,EAAE,MAAM,CAAC;IACtB,gBAAgB,EAAE,MAAM,CAAC;IACzB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,MAAM,EAAE,SAAS,GAAG,YAAY,GAAG,UAAU,GAAG,QAAQ,CAAC;IACzD,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;CACpB"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=form-fill.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"form-fill.js","sourceRoot":"","sources":["../../src/models/form-fill.ts"],"names":[],"mappings":""}
@@ -0,0 +1,177 @@
1
+ /**
2
+ * Image interfaces for OCR Provenance MCP System
3
+ *
4
+ * Represents images extracted from documents for VLM analysis.
5
+ * Provenance depth: 2 (after OCR)
6
+ */
7
+ /**
8
+ * Bounding box coordinates for image location in document
9
+ */
10
+ interface BoundingBox {
11
+ /** X coordinate (pixels from left) */
12
+ x: number;
13
+ /** Y coordinate (pixels from top) */
14
+ y: number;
15
+ /** Width in pixels */
16
+ width: number;
17
+ /** Height in pixels */
18
+ height: number;
19
+ }
20
+ /**
21
+ * Image dimensions
22
+ */
23
+ interface ImageDimensions {
24
+ /** Width in pixels */
25
+ width: number;
26
+ /** Height in pixels */
27
+ height: number;
28
+ }
29
+ /**
30
+ * VLM processing status
31
+ */
32
+ export type VLMStatus = 'pending' | 'processing' | 'complete' | 'failed';
33
+ /**
34
+ * Structured data extracted from image by VLM
35
+ */
36
+ export interface VLMStructuredData {
37
+ /** Type of image identified */
38
+ imageType: string;
39
+ /** Primary subject/content */
40
+ primarySubject?: string;
41
+ /** Extracted text strings */
42
+ extractedText?: string[];
43
+ /** Dates found in image */
44
+ dates?: string[];
45
+ /** Names found in image */
46
+ names?: string[];
47
+ /** Numbers/amounts found */
48
+ numbers?: string[];
49
+ /** Additional metadata */
50
+ [key: string]: unknown;
51
+ }
52
+ /**
53
+ * Represents an extracted image from a document
54
+ * Provenance depth: 2 (after OCR extraction)
55
+ */
56
+ export interface ImageReference {
57
+ /** UUID v4 identifier */
58
+ id: string;
59
+ /** Reference to source document */
60
+ document_id: string;
61
+ /** Reference to OCR result that produced this */
62
+ ocr_result_id: string;
63
+ /** Page number where image appears (1-indexed) */
64
+ page_number: number;
65
+ /** Bounding box coordinates on page */
66
+ bounding_box: BoundingBox;
67
+ /** Index of image on this page (0-indexed) */
68
+ image_index: number;
69
+ /** Image format (png, jpg, etc.) */
70
+ format: string;
71
+ /** Image dimensions in pixels */
72
+ dimensions: ImageDimensions;
73
+ /** Path to extracted image file */
74
+ extracted_path: string | null;
75
+ /** File size in bytes */
76
+ file_size: number | null;
77
+ /** VLM processing status */
78
+ vlm_status: VLMStatus;
79
+ /** Natural language description from VLM */
80
+ vlm_description: string | null;
81
+ /** Structured data extracted by VLM */
82
+ vlm_structured_data: VLMStructuredData | null;
83
+ /** Reference to embedding of VLM description */
84
+ vlm_embedding_id: string | null;
85
+ /** Model used for VLM processing */
86
+ vlm_model: string | null;
87
+ /** VLM confidence score (0-1) */
88
+ vlm_confidence: number | null;
89
+ /** ISO 8601 timestamp when VLM completed */
90
+ vlm_processed_at: string | null;
91
+ /** Tokens used for VLM processing */
92
+ vlm_tokens_used: number | null;
93
+ /** Surrounding text context from document */
94
+ context_text: string | null;
95
+ /** Reference to provenance record */
96
+ provenance_id: string | null;
97
+ /** Datalab block type: 'Figure', 'Picture', 'PageHeader', etc. Null if unknown */
98
+ block_type: string | null;
99
+ /** True if image is inside a PageHeader or PageFooter block */
100
+ is_header_footer: boolean;
101
+ /** SHA-256 hash of image file bytes for deduplication: 'sha256:...' */
102
+ content_hash: string | null;
103
+ /** ISO 8601 timestamp when image was extracted */
104
+ created_at: string;
105
+ /** Error message if VLM failed */
106
+ error_message: string | null;
107
+ }
108
+ /**
109
+ * Input for creating a new image reference
110
+ */
111
+ export type CreateImageReference = Omit<ImageReference, 'id' | 'created_at' | 'vlm_status' | 'vlm_description' | 'vlm_structured_data' | 'vlm_embedding_id' | 'vlm_model' | 'vlm_confidence' | 'vlm_processed_at' | 'vlm_tokens_used' | 'error_message'>;
112
+ /**
113
+ * VLM result to update an image with
114
+ */
115
+ export interface VLMResult {
116
+ /** Natural language description */
117
+ description: string;
118
+ /** Structured data extracted */
119
+ structuredData: VLMStructuredData;
120
+ /** Embedding ID for description */
121
+ embeddingId: string;
122
+ /** Model used */
123
+ model: string;
124
+ /** Confidence score (0-1) */
125
+ confidence: number;
126
+ /** Tokens used */
127
+ tokensUsed: number;
128
+ }
129
+ /**
130
+ * Image extraction result from PDF
131
+ */
132
+ export interface ExtractedImage {
133
+ /** Page number (1-indexed) */
134
+ page: number;
135
+ /** Index on page (0-indexed) */
136
+ index: number;
137
+ /** Image format */
138
+ format: string;
139
+ /** Width in pixels */
140
+ width: number;
141
+ /** Height in pixels */
142
+ height: number;
143
+ /** Bounding box on page */
144
+ bbox: BoundingBox;
145
+ /** Path to extracted file */
146
+ path: string;
147
+ /** File size in bytes */
148
+ size: number;
149
+ }
150
+ /**
151
+ * Image extraction options
152
+ */
153
+ export interface ImageExtractionOptions {
154
+ /** Minimum image dimension (pixels) to include */
155
+ minSize?: number;
156
+ /** Maximum images to extract per document */
157
+ maxImages?: number;
158
+ /** Output directory for extracted images */
159
+ outputDir: string;
160
+ }
161
+ /**
162
+ * Image statistics
163
+ */
164
+ export interface ImageStats {
165
+ /** Total images in database */
166
+ total: number;
167
+ /** Images with VLM complete */
168
+ processed: number;
169
+ /** Images pending VLM */
170
+ pending: number;
171
+ /** Images currently being processed */
172
+ processing: number;
173
+ /** Images with VLM failed */
174
+ failed: number;
175
+ }
176
+ export {};
177
+ //# sourceMappingURL=image.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image.d.ts","sourceRoot":"","sources":["../../src/models/image.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;GAEG;AACH,UAAU,WAAW;IACnB,sCAAsC;IACtC,CAAC,EAAE,MAAM,CAAC;IACV,qCAAqC;IACrC,CAAC,EAAE,MAAM,CAAC;IACV,sBAAsB;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,uBAAuB;IACvB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,UAAU,eAAe;IACvB,sBAAsB;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,uBAAuB;IACvB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,YAAY,GAAG,UAAU,GAAG,QAAQ,CAAC;AAEzE;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,+BAA+B;IAC/B,SAAS,EAAE,MAAM,CAAC;IAClB,8BAA8B;IAC9B,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,6BAA6B;IAC7B,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,2BAA2B;IAC3B,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,2BAA2B;IAC3B,KAAK,CAAC,EAAE,MAAM,EAAE,CAAC;IACjB,4BAA4B;IAC5B,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB,0BAA0B;IAC1B,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED;;;GAGG;AACH,MAAM,WAAW,cAAc;IAC7B,yBAAyB;IACzB,EAAE,EAAE,MAAM,CAAC;IAEX,mCAAmC;IACnC,WAAW,EAAE,MAAM,CAAC;IAEpB,iDAAiD;IACjD,aAAa,EAAE,MAAM,CAAC;IAEtB,kDAAkD;IAClD,WAAW,EAAE,MAAM,CAAC;IAEpB,uCAAuC;IACvC,YAAY,EAAE,WAAW,CAAC;IAE1B,8CAA8C;IAC9C,WAAW,EAAE,MAAM,CAAC;IAEpB,oCAAoC;IACpC,MAAM,EAAE,MAAM,CAAC;IAEf,iCAAiC;IACjC,UAAU,EAAE,eAAe,CAAC;IAE5B,mCAAmC;IACnC,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAE9B,yBAAyB;IACzB,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IAEzB,4BAA4B;IAC5B,UAAU,EAAE,SAAS,CAAC;IAEtB,4CAA4C;IAC5C,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAE/B,uCAAuC;IACvC,mBAAmB,EAAE,iBAAiB,GAAG,IAAI,CAAC;IAE9C,gDAAgD;IAChD,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAEhC,oCAAoC;IACpC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IAEzB,iCAAiC;IACjC,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;IAE9B,4CAA4C;IAC5C,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAEhC,qCAAqC;IACrC,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;IAE/B,6CAA6C;IAC7C,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAE5B,qCAAqC;IACrC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAE7B,kFAAkF;IAClF,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAE1B,+DAA+D;IAC/D,gBAAgB,EAAE,OAAO,CAAC;IAE1B,uEAAuE;IACvE,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAE5B,kDAAkD;IAClD,UAAU,EAAE,MAAM,CAAC;IAEnB,kCAAkC;IAClC,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;CAC9B;AAED;;GAEG;AACH,MAAM,MAAM,oBAAoB,GAAG,IAAI,CACrC,cAAc,EACZ,IAAI,GACJ,YAAY,GACZ,YAAY,GACZ,iBAAiB,GACjB,qBAAqB,GACrB,kBAAkB,GAClB,WAAW,GACX,gBAAgB,GAChB,kBAAkB,GAClB,iBAAiB,GACjB,eAAe,CAClB,CAAC;AAEF;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,mCAAmC;IACnC,WAAW,EAAE,MAAM,CAAC;IACpB,gCAAgC;IAChC,cAAc,EAAE,iBAAiB,CAAC;IAClC,mCAAmC;IACnC,WAAW,EAAE,MAAM,CAAC;IACpB,iBAAiB;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,6BAA6B;IAC7B,UAAU,EAAE,MAAM,CAAC;IACnB,kBAAkB;IAClB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,8BAA8B;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,gCAAgC;IAChC,KAAK,EAAE,MAAM,CAAC;IACd,mBAAmB;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,sBAAsB;IACtB,KAAK,EAAE,MAAM,CAAC;IACd,uBAAuB;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,2BAA2B;IAC3B,IAAI,EAAE,WAAW,CAAC;IAClB,6BAA6B;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,yBAAyB;IACzB,IAAI,EAAE,MAAM,CAAC;CACd;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,kDAAkD;IAClD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,6CAA6C;IAC7C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4CAA4C;IAC5C,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,+BAA+B;IAC/B,KAAK,EAAE,MAAM,CAAC;IACd,+BAA+B;IAC/B,SAAS,EAAE,MAAM,CAAC;IAClB,yBAAyB;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,uCAAuC;IACvC,UAAU,EAAE,MAAM,CAAC;IACnB,6BAA6B;IAC7B,MAAM,EAAE,MAAM,CAAC;CAChB"}
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Image interfaces for OCR Provenance MCP System
3
+ *
4
+ * Represents images extracted from documents for VLM analysis.
5
+ * Provenance depth: 2 (after OCR)
6
+ */
7
+ export {};
8
+ //# sourceMappingURL=image.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"image.js","sourceRoot":"","sources":["../../src/models/image.ts"],"names":[],"mappings":"AAAA;;;;;GAKG"}
@@ -0,0 +1,14 @@
1
+ /**
2
+ * OCR Provenance MCP System - Data Models
3
+ *
4
+ * Barrel export for all model interfaces.
5
+ */
6
+ export * from './document.js';
7
+ export * from './chunk.js';
8
+ export * from './embedding.js';
9
+ export * from './provenance.js';
10
+ export * from './image.js';
11
+ export * from './extraction.js';
12
+ export * from './form-fill.js';
13
+ export * from './comparison.js';
14
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/models/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,cAAc,eAAe,CAAC;AAG9B,cAAc,YAAY,CAAC;AAG3B,cAAc,gBAAgB,CAAC;AAG/B,cAAc,iBAAiB,CAAC;AAGhC,cAAc,YAAY,CAAC;AAG3B,cAAc,iBAAiB,CAAC;AAGhC,cAAc,gBAAgB,CAAC;AAG/B,cAAc,iBAAiB,CAAC"}
@@ -0,0 +1,22 @@
1
+ /**
2
+ * OCR Provenance MCP System - Data Models
3
+ *
4
+ * Barrel export for all model interfaces.
5
+ */
6
+ // Document models
7
+ export * from './document.js';
8
+ // Chunk models
9
+ export * from './chunk.js';
10
+ // Embedding models
11
+ export * from './embedding.js';
12
+ // Provenance models
13
+ export * from './provenance.js';
14
+ // Image models
15
+ export * from './image.js';
16
+ // Extraction models
17
+ export * from './extraction.js';
18
+ // Form fill models
19
+ export * from './form-fill.js';
20
+ // Comparison models
21
+ export * from './comparison.js';
22
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/models/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,kBAAkB;AAClB,cAAc,eAAe,CAAC;AAE9B,eAAe;AACf,cAAc,YAAY,CAAC;AAE3B,mBAAmB;AACnB,cAAc,gBAAgB,CAAC;AAE/B,oBAAoB;AACpB,cAAc,iBAAiB,CAAC;AAEhC,eAAe;AACf,cAAc,YAAY,CAAC;AAE3B,oBAAoB;AACpB,cAAc,iBAAiB,CAAC;AAEhC,mBAAmB;AACnB,cAAc,gBAAgB,CAAC;AAE/B,oBAAoB;AACpB,cAAc,iBAAiB,CAAC"}
@@ -0,0 +1,174 @@
1
+ /**
2
+ * Provenance interfaces for OCR Provenance MCP System
3
+ *
4
+ * Comprehensive provenance tracking for complete data lineage.
5
+ * Every data transformation creates a provenance record.
6
+ */
7
+ /**
8
+ * Types of provenance records in the chain
9
+ * Each type has a fixed chain depth
10
+ */
11
+ export declare enum ProvenanceType {
12
+ /** Original source file (depth 0) */
13
+ DOCUMENT = "DOCUMENT",
14
+ /** Text extracted via Datalab OCR (depth 1) */
15
+ OCR_RESULT = "OCR_RESULT",
16
+ /** Text segment with overlap (depth 2) */
17
+ CHUNK = "CHUNK",
18
+ /** Extracted image from document (depth 2, parallel to CHUNK) */
19
+ IMAGE = "IMAGE",
20
+ /** VLM analysis output describing an image (depth 3) */
21
+ VLM_DESCRIPTION = "VLM_DESCRIPTION",
22
+ /** Structured extraction from page_schema (depth 2, parallel to CHUNK) */
23
+ EXTRACTION = "EXTRACTION",
24
+ /** Form fill result (depth 0, parallel to DOCUMENT) */
25
+ FORM_FILL = "FORM_FILL",
26
+ /** Comparison between two documents (depth 2, parallel to CHUNK) */
27
+ COMPARISON = "COMPARISON",
28
+ /** Document clustering result (depth 2, parallel to CHUNK) */
29
+ CLUSTERING = "CLUSTERING",
30
+ /** 768-dim vector from nomic model (depth 3 from CHUNK, depth 4 from VLM_DESCRIPTION) */
31
+ EMBEDDING = "EMBEDDING"
32
+ }
33
+ /**
34
+ * Chain depth for each provenance type
35
+ * Note: EMBEDDING depth is 3 when derived from CHUNK, but 4 when derived from VLM_DESCRIPTION.
36
+ * The actual depth is computed dynamically from the parent chain.
37
+ */
38
+ export declare const PROVENANCE_CHAIN_DEPTH: Record<ProvenanceType, number>;
39
+ /**
40
+ * Source type for provenance tracking
41
+ */
42
+ export type SourceType = 'FILE' | 'OCR' | 'CHUNKING' | 'IMAGE_EXTRACTION' | 'VLM' | 'VLM_DEDUP' | 'EMBEDDING' | 'EXTRACTION' | 'FORM_FILL' | 'COMPARISON' | 'CLUSTERING';
43
+ /**
44
+ * Location information within source document
45
+ */
46
+ export interface ProvenanceLocation {
47
+ /** 1-indexed page number */
48
+ page_number?: number;
49
+ /** Page range if spanning pages (e.g., "4-5") */
50
+ page_range?: string;
51
+ /** Start character offset */
52
+ character_start?: number;
53
+ /** End character offset */
54
+ character_end?: number;
55
+ /** Chunk index (0-indexed) */
56
+ chunk_index?: number;
57
+ /** Bounding box for OCR position data */
58
+ bounding_box?: {
59
+ x: number;
60
+ y: number;
61
+ width: number;
62
+ height: number;
63
+ page: number;
64
+ };
65
+ }
66
+ /**
67
+ * Comprehensive provenance record
68
+ * Every data transformation creates one of these
69
+ */
70
+ export interface ProvenanceRecord {
71
+ /** UUID v4 - globally unique */
72
+ id: string;
73
+ /** Type of record (determines chain depth) */
74
+ type: ProvenanceType;
75
+ /** ISO 8601 - when this record was created */
76
+ created_at: string;
77
+ /** ISO 8601 - when processing completed */
78
+ processed_at: string;
79
+ /** Original file creation time (for DOCUMENT type) */
80
+ source_file_created_at: string | null;
81
+ /** Original file modified time (for DOCUMENT type) */
82
+ source_file_modified_at: string | null;
83
+ /** Type of source operation */
84
+ source_type: SourceType;
85
+ /** Full absolute path (for FILE source) */
86
+ source_path: string | null;
87
+ /** Parent provenance ID (null for DOCUMENT type) */
88
+ source_id: string | null;
89
+ /** Original document this derives from */
90
+ root_document_id: string;
91
+ /** Precise location within source */
92
+ location: ProvenanceLocation | null;
93
+ /** SHA-256 of THIS item's content (format: 'sha256:...') */
94
+ content_hash: string;
95
+ /** SHA-256 of the INPUT that produced this */
96
+ input_hash: string | null;
97
+ /** SHA-256 of original source file (for tracing) */
98
+ file_hash: string | null;
99
+ /** Processor name (e.g., 'datalab-ocr', 'chunker', 'nomic-embed-text-v1.5') */
100
+ processor: string;
101
+ /** Exact processor version */
102
+ processor_version: string;
103
+ /** ALL parameters used for processing */
104
+ processing_params: Record<string, unknown>;
105
+ /** Processing duration in milliseconds */
106
+ processing_duration_ms: number | null;
107
+ /** Quality metric if available */
108
+ processing_quality_score: number | null;
109
+ /** Immediate parent provenance ID */
110
+ parent_id: string | null;
111
+ /** ALL ancestor provenance IDs as JSON array string */
112
+ parent_ids: string;
113
+ /** How many transformations from source */
114
+ chain_depth: number;
115
+ /** Human-readable path as JSON array string */
116
+ chain_path: string | null;
117
+ }
118
+ /**
119
+ * Parameters for creating a new provenance record
120
+ */
121
+ export interface CreateProvenanceParams {
122
+ type: ProvenanceType;
123
+ source_type: SourceType;
124
+ source_id?: string | null;
125
+ root_document_id: string;
126
+ content_hash: string;
127
+ input_hash?: string | null;
128
+ file_hash?: string | null;
129
+ source_path?: string | null;
130
+ processor: string;
131
+ processor_version: string;
132
+ processing_params: Record<string, unknown>;
133
+ processing_duration_ms?: number | null;
134
+ processing_quality_score?: number | null;
135
+ location?: ProvenanceLocation | null;
136
+ source_file_created_at?: string | null;
137
+ source_file_modified_at?: string | null;
138
+ }
139
+ /**
140
+ * Verification result for provenance integrity
141
+ */
142
+ export interface VerificationResult {
143
+ /** Overall validity */
144
+ valid: boolean;
145
+ /** Whether chain is intact (no missing records) */
146
+ chain_intact: boolean;
147
+ /** Number of hashes successfully verified */
148
+ hashes_verified: number;
149
+ /** Number of hash verification failures */
150
+ hashes_failed: number;
151
+ /** Details of failed verifications */
152
+ failed_items: Array<{
153
+ id: string;
154
+ expected_hash: string;
155
+ computed_hash: string;
156
+ type: ProvenanceType;
157
+ }>;
158
+ /** ISO 8601 timestamp of verification */
159
+ verified_at: string;
160
+ }
161
+ /**
162
+ * W3C PROV-JSON export format
163
+ */
164
+ export interface W3CProvDocument {
165
+ prefix: Record<string, string>;
166
+ entity: Record<string, Record<string, unknown>>;
167
+ activity: Record<string, Record<string, unknown>>;
168
+ agent: Record<string, Record<string, unknown>>;
169
+ wasGeneratedBy: Record<string, Record<string, unknown>>;
170
+ wasDerivedFrom: Record<string, Record<string, unknown>>;
171
+ wasAttributedTo: Record<string, Record<string, unknown>>;
172
+ used: Record<string, Record<string, unknown>>;
173
+ }
174
+ //# sourceMappingURL=provenance.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../../src/models/provenance.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH;;;GAGG;AACH,oBAAY,cAAc;IACxB,qCAAqC;IACrC,QAAQ,aAAa;IAErB,+CAA+C;IAC/C,UAAU,eAAe;IAEzB,0CAA0C;IAC1C,KAAK,UAAU;IAEf,iEAAiE;IACjE,KAAK,UAAU;IAEf,wDAAwD;IACxD,eAAe,oBAAoB;IAEnC,0EAA0E;IAC1E,UAAU,eAAe;IAEzB,uDAAuD;IACvD,SAAS,cAAc;IAEvB,oEAAoE;IACpE,UAAU,eAAe;IAEzB,8DAA8D;IAC9D,UAAU,eAAe;IAEzB,yFAAyF;IACzF,SAAS,cAAc;CACxB;AAED;;;;GAIG;AAGH,eAAO,MAAM,sBAAsB,EAAE,MAAM,CAAC,cAAc,EAAE,MAAM,CAWjE,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,UAAU,GAAG,MAAM,GAAG,KAAK,GAAG,UAAU,GAAG,kBAAkB,GAAG,KAAK,GAAG,WAAW,GAAG,WAAW,GAAG,YAAY,GAAG,WAAW,GAAG,YAAY,GAAG,YAAY,CAAC;AAEzK;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,4BAA4B;IAC5B,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB,iDAAiD;IACjD,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB,6BAA6B;IAC7B,eAAe,CAAC,EAAE,MAAM,CAAC;IAEzB,2BAA2B;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IAEvB,8BAA8B;IAC9B,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB,yCAAyC;IACzC,YAAY,CAAC,EAAE;QACb,CAAC,EAAE,MAAM,CAAC;QACV,CAAC,EAAE,MAAM,CAAC;QACV,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,EAAE,MAAM,CAAC;QACf,IAAI,EAAE,MAAM,CAAC;KACd,CAAC;CACH;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAgB;IAE/B,gCAAgC;IAChC,EAAE,EAAE,MAAM,CAAC;IAEX,8CAA8C;IAC9C,IAAI,EAAE,cAAc,CAAC;IAGrB,8CAA8C;IAC9C,UAAU,EAAE,MAAM,CAAC;IAEnB,2CAA2C;IAC3C,YAAY,EAAE,MAAM,CAAC;IAErB,sDAAsD;IACtD,sBAAsB,EAAE,MAAM,GAAG,IAAI,CAAC;IAEtC,sDAAsD;IACtD,uBAAuB,EAAE,MAAM,GAAG,IAAI,CAAC;IAGvC,+BAA+B;IAC/B,WAAW,EAAE,UAAU,CAAC;IAExB,2CAA2C;IAC3C,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAE3B,oDAAoD;IACpD,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IAEzB,0CAA0C;IAC1C,gBAAgB,EAAE,MAAM,CAAC;IAGzB,qCAAqC;IACrC,QAAQ,EAAE,kBAAkB,GAAG,IAAI,CAAC;IAGpC,4DAA4D;IAC5D,YAAY,EAAE,MAAM,CAAC;IAErB,8CAA8C;IAC9C,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAE1B,oDAAoD;IACpD,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IAGzB,+EAA+E;IAC/E,SAAS,EAAE,MAAM,CAAC;IAElB,8BAA8B;IAC9B,iBAAiB,EAAE,MAAM,CAAC;IAE1B,yCAAyC;IACzC,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAE3C,0CAA0C;IAC1C,sBAAsB,EAAE,MAAM,GAAG,IAAI,CAAC;IAEtC,kCAAkC;IAClC,wBAAwB,EAAE,MAAM,GAAG,IAAI,CAAC;IAGxC,qCAAqC;IACrC,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IAEzB,uDAAuD;IACvD,UAAU,EAAE,MAAM,CAAC;IAEnB,2CAA2C;IAC3C,WAAW,EAAE,MAAM,CAAC;IAEpB,+CAA+C;IAC/C,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,IAAI,EAAE,cAAc,CAAC;IACrB,WAAW,EAAE,UAAU,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,gBAAgB,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC3C,sBAAsB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACvC,wBAAwB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACzC,QAAQ,CAAC,EAAE,kBAAkB,GAAG,IAAI,CAAC;IACrC,sBAAsB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACvC,uBAAuB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACzC;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,uBAAuB;IACvB,KAAK,EAAE,OAAO,CAAC;IAEf,mDAAmD;IACnD,YAAY,EAAE,OAAO,CAAC;IAEtB,6CAA6C;IAC7C,eAAe,EAAE,MAAM,CAAC;IAExB,2CAA2C;IAC3C,aAAa,EAAE,MAAM,CAAC;IAEtB,sCAAsC;IACtC,YAAY,EAAE,KAAK,CAAC;QAClB,EAAE,EAAE,MAAM,CAAC;QACX,aAAa,EAAE,MAAM,CAAC;QACtB,aAAa,EAAE,MAAM,CAAC;QACtB,IAAI,EAAE,cAAc,CAAC;KACtB,CAAC,CAAC;IAEH,yCAAyC;IACzC,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC/B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAChD,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAClD,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAC/C,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IACxD,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IACxD,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IACzD,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;CAC/C"}
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Provenance interfaces for OCR Provenance MCP System
3
+ *
4
+ * Comprehensive provenance tracking for complete data lineage.
5
+ * Every data transformation creates a provenance record.
6
+ */
7
+ /**
8
+ * Types of provenance records in the chain
9
+ * Each type has a fixed chain depth
10
+ */
11
+ export var ProvenanceType;
12
+ (function (ProvenanceType) {
13
+ /** Original source file (depth 0) */
14
+ ProvenanceType["DOCUMENT"] = "DOCUMENT";
15
+ /** Text extracted via Datalab OCR (depth 1) */
16
+ ProvenanceType["OCR_RESULT"] = "OCR_RESULT";
17
+ /** Text segment with overlap (depth 2) */
18
+ ProvenanceType["CHUNK"] = "CHUNK";
19
+ /** Extracted image from document (depth 2, parallel to CHUNK) */
20
+ ProvenanceType["IMAGE"] = "IMAGE";
21
+ /** VLM analysis output describing an image (depth 3) */
22
+ ProvenanceType["VLM_DESCRIPTION"] = "VLM_DESCRIPTION";
23
+ /** Structured extraction from page_schema (depth 2, parallel to CHUNK) */
24
+ ProvenanceType["EXTRACTION"] = "EXTRACTION";
25
+ /** Form fill result (depth 0, parallel to DOCUMENT) */
26
+ ProvenanceType["FORM_FILL"] = "FORM_FILL";
27
+ /** Comparison between two documents (depth 2, parallel to CHUNK) */
28
+ ProvenanceType["COMPARISON"] = "COMPARISON";
29
+ /** Document clustering result (depth 2, parallel to CHUNK) */
30
+ ProvenanceType["CLUSTERING"] = "CLUSTERING";
31
+ /** 768-dim vector from nomic model (depth 3 from CHUNK, depth 4 from VLM_DESCRIPTION) */
32
+ ProvenanceType["EMBEDDING"] = "EMBEDDING";
33
+ })(ProvenanceType || (ProvenanceType = {}));
34
+ /**
35
+ * Chain depth for each provenance type
36
+ * Note: EMBEDDING depth is 3 when derived from CHUNK, but 4 when derived from VLM_DESCRIPTION.
37
+ * The actual depth is computed dynamically from the parent chain.
38
+ */
39
+ // NOTE: EMBEDDING depth is 3 for chunk-derived embeddings only.
40
+ // VLM description embeddings use depth 4 and bypass createProvenance() - see pipeline.ts
41
+ export const PROVENANCE_CHAIN_DEPTH = {
42
+ [ProvenanceType.DOCUMENT]: 0,
43
+ [ProvenanceType.OCR_RESULT]: 1,
44
+ [ProvenanceType.CHUNK]: 2,
45
+ [ProvenanceType.IMAGE]: 2, // Same depth as CHUNK (parallel branch)
46
+ [ProvenanceType.VLM_DESCRIPTION]: 3, // After IMAGE
47
+ [ProvenanceType.EXTRACTION]: 2, // Same depth as CHUNK (parallel branch)
48
+ [ProvenanceType.FORM_FILL]: 0, // Same depth as DOCUMENT (form fill is a root-level operation)
49
+ [ProvenanceType.COMPARISON]: 2, // Same depth as CHUNK (parallel branch)
50
+ [ProvenanceType.CLUSTERING]: 2, // Same depth as CHUNK (parallel branch)
51
+ [ProvenanceType.EMBEDDING]: 3 // Chunk-derived only; VLM embeddings use depth 4 (built in pipeline.ts)
52
+ };
53
+ //# sourceMappingURL=provenance.js.map