ocr-provenance-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocr-provenance-mcp might be problematic. Click here for more details.

Files changed (578) hide show
  1. package/.env.example +55 -0
  2. package/LICENSE +78 -0
  3. package/README.md +1154 -0
  4. package/dist/bin-http.d.ts +24 -0
  5. package/dist/bin-http.d.ts.map +1 -0
  6. package/dist/bin-http.js +275 -0
  7. package/dist/bin-http.js.map +1 -0
  8. package/dist/bin-setup.d.ts +11 -0
  9. package/dist/bin-setup.d.ts.map +1 -0
  10. package/dist/bin-setup.js +610 -0
  11. package/dist/bin-setup.js.map +1 -0
  12. package/dist/bin.d.ts +16 -0
  13. package/dist/bin.d.ts.map +1 -0
  14. package/dist/bin.js +16 -0
  15. package/dist/bin.js.map +1 -0
  16. package/dist/index.d.ts +13 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +90 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/models/chunk.d.ts +136 -0
  21. package/dist/models/chunk.d.ts.map +1 -0
  22. package/dist/models/chunk.js +27 -0
  23. package/dist/models/chunk.js.map +1 -0
  24. package/dist/models/cluster.d.ts +79 -0
  25. package/dist/models/cluster.d.ts.map +1 -0
  26. package/dist/models/cluster.js +10 -0
  27. package/dist/models/cluster.js.map +1 -0
  28. package/dist/models/comparison.d.ts +62 -0
  29. package/dist/models/comparison.d.ts.map +1 -0
  30. package/dist/models/comparison.js +8 -0
  31. package/dist/models/comparison.js.map +1 -0
  32. package/dist/models/document.d.ts +104 -0
  33. package/dist/models/document.d.ts.map +1 -0
  34. package/dist/models/document.js +15 -0
  35. package/dist/models/document.js.map +1 -0
  36. package/dist/models/embedding.d.ts +87 -0
  37. package/dist/models/embedding.d.ts.map +1 -0
  38. package/dist/models/embedding.js +23 -0
  39. package/dist/models/embedding.js.map +1 -0
  40. package/dist/models/extraction.d.ts +15 -0
  41. package/dist/models/extraction.d.ts.map +1 -0
  42. package/dist/models/extraction.js +2 -0
  43. package/dist/models/extraction.js.map +1 -0
  44. package/dist/models/form-fill.d.ts +23 -0
  45. package/dist/models/form-fill.d.ts.map +1 -0
  46. package/dist/models/form-fill.js +2 -0
  47. package/dist/models/form-fill.js.map +1 -0
  48. package/dist/models/image.d.ts +177 -0
  49. package/dist/models/image.d.ts.map +1 -0
  50. package/dist/models/image.js +8 -0
  51. package/dist/models/image.js.map +1 -0
  52. package/dist/models/index.d.ts +14 -0
  53. package/dist/models/index.d.ts.map +1 -0
  54. package/dist/models/index.js +22 -0
  55. package/dist/models/index.js.map +1 -0
  56. package/dist/models/provenance.d.ts +174 -0
  57. package/dist/models/provenance.d.ts.map +1 -0
  58. package/dist/models/provenance.js +53 -0
  59. package/dist/models/provenance.js.map +1 -0
  60. package/dist/models/uploaded-file.d.ts +20 -0
  61. package/dist/models/uploaded-file.d.ts.map +1 -0
  62. package/dist/models/uploaded-file.js +2 -0
  63. package/dist/models/uploaded-file.js.map +1 -0
  64. package/dist/server/errors.d.ts +93 -0
  65. package/dist/server/errors.d.ts.map +1 -0
  66. package/dist/server/errors.js +256 -0
  67. package/dist/server/errors.js.map +1 -0
  68. package/dist/server/events.d.ts +36 -0
  69. package/dist/server/events.d.ts.map +1 -0
  70. package/dist/server/events.js +48 -0
  71. package/dist/server/events.js.map +1 -0
  72. package/dist/server/permissions.d.ts +26 -0
  73. package/dist/server/permissions.d.ts.map +1 -0
  74. package/dist/server/permissions.js +194 -0
  75. package/dist/server/permissions.js.map +1 -0
  76. package/dist/server/register-tools.d.ts +25 -0
  77. package/dist/server/register-tools.d.ts.map +1 -0
  78. package/dist/server/register-tools.js +102 -0
  79. package/dist/server/register-tools.js.map +1 -0
  80. package/dist/server/startup.d.ts +16 -0
  81. package/dist/server/startup.d.ts.map +1 -0
  82. package/dist/server/startup.js +37 -0
  83. package/dist/server/startup.js.map +1 -0
  84. package/dist/server/state.d.ts +166 -0
  85. package/dist/server/state.d.ts.map +1 -0
  86. package/dist/server/state.js +424 -0
  87. package/dist/server/state.js.map +1 -0
  88. package/dist/server/transports/http-transport.d.ts +37 -0
  89. package/dist/server/transports/http-transport.d.ts.map +1 -0
  90. package/dist/server/transports/http-transport.js +204 -0
  91. package/dist/server/transports/http-transport.js.map +1 -0
  92. package/dist/server/transports/index.d.ts +9 -0
  93. package/dist/server/transports/index.d.ts.map +1 -0
  94. package/dist/server/transports/index.js +9 -0
  95. package/dist/server/transports/index.js.map +1 -0
  96. package/dist/server/transports/session-manager.d.ts +40 -0
  97. package/dist/server/transports/session-manager.d.ts.map +1 -0
  98. package/dist/server/transports/session-manager.js +74 -0
  99. package/dist/server/transports/session-manager.js.map +1 -0
  100. package/dist/server/types.d.ts +82 -0
  101. package/dist/server/types.d.ts.map +1 -0
  102. package/dist/server/types.js +14 -0
  103. package/dist/server/types.js.map +1 -0
  104. package/dist/services/audit.d.ts +26 -0
  105. package/dist/services/audit.d.ts.map +1 -0
  106. package/dist/services/audit.js +43 -0
  107. package/dist/services/audit.js.map +1 -0
  108. package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
  109. package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
  110. package/dist/services/chunking/chunk-deduplicator.js +46 -0
  111. package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
  112. package/dist/services/chunking/chunk-merger.d.ts +26 -0
  113. package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
  114. package/dist/services/chunking/chunk-merger.js +94 -0
  115. package/dist/services/chunking/chunk-merger.js.map +1 -0
  116. package/dist/services/chunking/chunker.d.ts +62 -0
  117. package/dist/services/chunking/chunker.d.ts.map +1 -0
  118. package/dist/services/chunking/chunker.js +566 -0
  119. package/dist/services/chunking/chunker.js.map +1 -0
  120. package/dist/services/chunking/heading-normalizer.d.ts +33 -0
  121. package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
  122. package/dist/services/chunking/heading-normalizer.js +101 -0
  123. package/dist/services/chunking/heading-normalizer.js.map +1 -0
  124. package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
  125. package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
  126. package/dist/services/chunking/json-block-analyzer.js +1033 -0
  127. package/dist/services/chunking/json-block-analyzer.js.map +1 -0
  128. package/dist/services/chunking/markdown-parser.d.ts +75 -0
  129. package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
  130. package/dist/services/chunking/markdown-parser.js +428 -0
  131. package/dist/services/chunking/markdown-parser.js.map +1 -0
  132. package/dist/services/chunking/text-normalizer.d.ts +20 -0
  133. package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
  134. package/dist/services/chunking/text-normalizer.js +36 -0
  135. package/dist/services/chunking/text-normalizer.js.map +1 -0
  136. package/dist/services/clm/contract-schemas.d.ts +36 -0
  137. package/dist/services/clm/contract-schemas.d.ts.map +1 -0
  138. package/dist/services/clm/contract-schemas.js +92 -0
  139. package/dist/services/clm/contract-schemas.js.map +1 -0
  140. package/dist/services/clm/summarization.d.ts +46 -0
  141. package/dist/services/clm/summarization.d.ts.map +1 -0
  142. package/dist/services/clm/summarization.js +61 -0
  143. package/dist/services/clm/summarization.js.map +1 -0
  144. package/dist/services/clustering/clustering-service.d.ts +58 -0
  145. package/dist/services/clustering/clustering-service.d.ts.map +1 -0
  146. package/dist/services/clustering/clustering-service.js +467 -0
  147. package/dist/services/clustering/clustering-service.js.map +1 -0
  148. package/dist/services/comparison/diff-service.d.ts +41 -0
  149. package/dist/services/comparison/diff-service.d.ts.map +1 -0
  150. package/dist/services/comparison/diff-service.js +120 -0
  151. package/dist/services/comparison/diff-service.js.map +1 -0
  152. package/dist/services/embedding/embedder.d.ts +55 -0
  153. package/dist/services/embedding/embedder.d.ts.map +1 -0
  154. package/dist/services/embedding/embedder.js +202 -0
  155. package/dist/services/embedding/embedder.js.map +1 -0
  156. package/dist/services/embedding/nomic.d.ts +67 -0
  157. package/dist/services/embedding/nomic.d.ts.map +1 -0
  158. package/dist/services/embedding/nomic.js +280 -0
  159. package/dist/services/embedding/nomic.js.map +1 -0
  160. package/dist/services/gemini/circuit-breaker.d.ts +106 -0
  161. package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
  162. package/dist/services/gemini/circuit-breaker.js +237 -0
  163. package/dist/services/gemini/circuit-breaker.js.map +1 -0
  164. package/dist/services/gemini/client.d.ts +173 -0
  165. package/dist/services/gemini/client.d.ts.map +1 -0
  166. package/dist/services/gemini/client.js +483 -0
  167. package/dist/services/gemini/client.js.map +1 -0
  168. package/dist/services/gemini/config.d.ts +116 -0
  169. package/dist/services/gemini/config.d.ts.map +1 -0
  170. package/dist/services/gemini/config.js +118 -0
  171. package/dist/services/gemini/config.js.map +1 -0
  172. package/dist/services/gemini/index.d.ts +9 -0
  173. package/dist/services/gemini/index.d.ts.map +1 -0
  174. package/dist/services/gemini/index.js +13 -0
  175. package/dist/services/gemini/index.js.map +1 -0
  176. package/dist/services/gemini/rate-limiter.d.ts +62 -0
  177. package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
  178. package/dist/services/gemini/rate-limiter.js +120 -0
  179. package/dist/services/gemini/rate-limiter.js.map +1 -0
  180. package/dist/services/images/extractor.d.ts +88 -0
  181. package/dist/services/images/extractor.d.ts.map +1 -0
  182. package/dist/services/images/extractor.js +340 -0
  183. package/dist/services/images/extractor.js.map +1 -0
  184. package/dist/services/images/optimizer.d.ts +130 -0
  185. package/dist/services/images/optimizer.d.ts.map +1 -0
  186. package/dist/services/images/optimizer.js +228 -0
  187. package/dist/services/images/optimizer.js.map +1 -0
  188. package/dist/services/ocr/datalab.d.ts +64 -0
  189. package/dist/services/ocr/datalab.d.ts.map +1 -0
  190. package/dist/services/ocr/datalab.js +425 -0
  191. package/dist/services/ocr/datalab.js.map +1 -0
  192. package/dist/services/ocr/errors.d.ts +38 -0
  193. package/dist/services/ocr/errors.d.ts.map +1 -0
  194. package/dist/services/ocr/errors.js +83 -0
  195. package/dist/services/ocr/errors.js.map +1 -0
  196. package/dist/services/ocr/file-manager.d.ts +76 -0
  197. package/dist/services/ocr/file-manager.d.ts.map +1 -0
  198. package/dist/services/ocr/file-manager.js +238 -0
  199. package/dist/services/ocr/file-manager.js.map +1 -0
  200. package/dist/services/ocr/form-fill.d.ts +48 -0
  201. package/dist/services/ocr/form-fill.d.ts.map +1 -0
  202. package/dist/services/ocr/form-fill.js +213 -0
  203. package/dist/services/ocr/form-fill.js.map +1 -0
  204. package/dist/services/ocr/processor.d.ts +95 -0
  205. package/dist/services/ocr/processor.d.ts.map +1 -0
  206. package/dist/services/ocr/processor.js +259 -0
  207. package/dist/services/ocr/processor.js.map +1 -0
  208. package/dist/services/provenance/agent-metadata.d.ts +82 -0
  209. package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
  210. package/dist/services/provenance/agent-metadata.js +106 -0
  211. package/dist/services/provenance/agent-metadata.js.map +1 -0
  212. package/dist/services/provenance/chain-hash.d.ts +57 -0
  213. package/dist/services/provenance/chain-hash.d.ts.map +1 -0
  214. package/dist/services/provenance/chain-hash.js +131 -0
  215. package/dist/services/provenance/chain-hash.js.map +1 -0
  216. package/dist/services/provenance/exporter.d.ts +202 -0
  217. package/dist/services/provenance/exporter.d.ts.map +1 -0
  218. package/dist/services/provenance/exporter.js +457 -0
  219. package/dist/services/provenance/exporter.js.map +1 -0
  220. package/dist/services/provenance/index.d.ts +15 -0
  221. package/dist/services/provenance/index.d.ts.map +1 -0
  222. package/dist/services/provenance/index.js +17 -0
  223. package/dist/services/provenance/index.js.map +1 -0
  224. package/dist/services/provenance/tracker.d.ts +138 -0
  225. package/dist/services/provenance/tracker.d.ts.map +1 -0
  226. package/dist/services/provenance/tracker.js +293 -0
  227. package/dist/services/provenance/tracker.js.map +1 -0
  228. package/dist/services/provenance/verifier.d.ts +153 -0
  229. package/dist/services/provenance/verifier.d.ts.map +1 -0
  230. package/dist/services/provenance/verifier.js +536 -0
  231. package/dist/services/provenance/verifier.js.map +1 -0
  232. package/dist/services/python-pool.d.ts +70 -0
  233. package/dist/services/python-pool.d.ts.map +1 -0
  234. package/dist/services/python-pool.js +265 -0
  235. package/dist/services/python-pool.js.map +1 -0
  236. package/dist/services/search/bm25.d.ts +180 -0
  237. package/dist/services/search/bm25.d.ts.map +1 -0
  238. package/dist/services/search/bm25.js +656 -0
  239. package/dist/services/search/bm25.js.map +1 -0
  240. package/dist/services/search/fusion.d.ts +103 -0
  241. package/dist/services/search/fusion.d.ts.map +1 -0
  242. package/dist/services/search/fusion.js +122 -0
  243. package/dist/services/search/fusion.js.map +1 -0
  244. package/dist/services/search/local-reranker.d.ts +30 -0
  245. package/dist/services/search/local-reranker.d.ts.map +1 -0
  246. package/dist/services/search/local-reranker.js +123 -0
  247. package/dist/services/search/local-reranker.js.map +1 -0
  248. package/dist/services/search/quality.d.ts +11 -0
  249. package/dist/services/search/quality.d.ts.map +1 -0
  250. package/dist/services/search/quality.js +17 -0
  251. package/dist/services/search/quality.js.map +1 -0
  252. package/dist/services/search/query-classifier.d.ts +34 -0
  253. package/dist/services/search/query-classifier.d.ts.map +1 -0
  254. package/dist/services/search/query-classifier.js +114 -0
  255. package/dist/services/search/query-classifier.js.map +1 -0
  256. package/dist/services/search/query-expander.d.ts +73 -0
  257. package/dist/services/search/query-expander.d.ts.map +1 -0
  258. package/dist/services/search/query-expander.js +281 -0
  259. package/dist/services/search/query-expander.js.map +1 -0
  260. package/dist/services/search/reranker.d.ts +44 -0
  261. package/dist/services/search/reranker.d.ts.map +1 -0
  262. package/dist/services/search/reranker.js +101 -0
  263. package/dist/services/search/reranker.js.map +1 -0
  264. package/dist/services/storage/database/annotation-operations.d.ts +113 -0
  265. package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
  266. package/dist/services/storage/database/annotation-operations.js +177 -0
  267. package/dist/services/storage/database/annotation-operations.js.map +1 -0
  268. package/dist/services/storage/database/approval-operations.d.ts +132 -0
  269. package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
  270. package/dist/services/storage/database/approval-operations.js +206 -0
  271. package/dist/services/storage/database/approval-operations.js.map +1 -0
  272. package/dist/services/storage/database/chunk-operations.d.ts +132 -0
  273. package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
  274. package/dist/services/storage/database/chunk-operations.js +306 -0
  275. package/dist/services/storage/database/chunk-operations.js.map +1 -0
  276. package/dist/services/storage/database/cluster-operations.d.ts +97 -0
  277. package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
  278. package/dist/services/storage/database/cluster-operations.js +258 -0
  279. package/dist/services/storage/database/cluster-operations.js.map +1 -0
  280. package/dist/services/storage/database/comparison-operations.d.ts +41 -0
  281. package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
  282. package/dist/services/storage/database/comparison-operations.js +65 -0
  283. package/dist/services/storage/database/comparison-operations.js.map +1 -0
  284. package/dist/services/storage/database/converters.d.ts +36 -0
  285. package/dist/services/storage/database/converters.d.ts.map +1 -0
  286. package/dist/services/storage/database/converters.js +244 -0
  287. package/dist/services/storage/database/converters.js.map +1 -0
  288. package/dist/services/storage/database/document-operations.d.ts +145 -0
  289. package/dist/services/storage/database/document-operations.d.ts.map +1 -0
  290. package/dist/services/storage/database/document-operations.js +498 -0
  291. package/dist/services/storage/database/document-operations.js.map +1 -0
  292. package/dist/services/storage/database/embedding-operations.d.ts +130 -0
  293. package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
  294. package/dist/services/storage/database/embedding-operations.js +315 -0
  295. package/dist/services/storage/database/embedding-operations.js.map +1 -0
  296. package/dist/services/storage/database/extraction-operations.d.ts +47 -0
  297. package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
  298. package/dist/services/storage/database/extraction-operations.js +85 -0
  299. package/dist/services/storage/database/extraction-operations.js.map +1 -0
  300. package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
  301. package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
  302. package/dist/services/storage/database/form-fill-operations.js +116 -0
  303. package/dist/services/storage/database/form-fill-operations.js.map +1 -0
  304. package/dist/services/storage/database/helpers.d.ts +29 -0
  305. package/dist/services/storage/database/helpers.d.ts.map +1 -0
  306. package/dist/services/storage/database/helpers.js +55 -0
  307. package/dist/services/storage/database/helpers.js.map +1 -0
  308. package/dist/services/storage/database/image-operations.d.ts +202 -0
  309. package/dist/services/storage/database/image-operations.d.ts.map +1 -0
  310. package/dist/services/storage/database/image-operations.js +484 -0
  311. package/dist/services/storage/database/image-operations.js.map +1 -0
  312. package/dist/services/storage/database/index.d.ts +13 -0
  313. package/dist/services/storage/database/index.d.ts.map +1 -0
  314. package/dist/services/storage/database/index.js +16 -0
  315. package/dist/services/storage/database/index.js.map +1 -0
  316. package/dist/services/storage/database/lock-operations.d.ts +59 -0
  317. package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
  318. package/dist/services/storage/database/lock-operations.js +89 -0
  319. package/dist/services/storage/database/lock-operations.js.map +1 -0
  320. package/dist/services/storage/database/obligation-operations.d.ts +88 -0
  321. package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
  322. package/dist/services/storage/database/obligation-operations.js +206 -0
  323. package/dist/services/storage/database/obligation-operations.js.map +1 -0
  324. package/dist/services/storage/database/ocr-operations.d.ts +33 -0
  325. package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
  326. package/dist/services/storage/database/ocr-operations.js +70 -0
  327. package/dist/services/storage/database/ocr-operations.js.map +1 -0
  328. package/dist/services/storage/database/playbook-operations.d.ts +72 -0
  329. package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
  330. package/dist/services/storage/database/playbook-operations.js +247 -0
  331. package/dist/services/storage/database/playbook-operations.js.map +1 -0
  332. package/dist/services/storage/database/provenance-operations.d.ts +112 -0
  333. package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
  334. package/dist/services/storage/database/provenance-operations.js +251 -0
  335. package/dist/services/storage/database/provenance-operations.js.map +1 -0
  336. package/dist/services/storage/database/service.d.ts +142 -0
  337. package/dist/services/storage/database/service.d.ts.map +1 -0
  338. package/dist/services/storage/database/service.js +310 -0
  339. package/dist/services/storage/database/service.js.map +1 -0
  340. package/dist/services/storage/database/static-operations.d.ts +30 -0
  341. package/dist/services/storage/database/static-operations.d.ts.map +1 -0
  342. package/dist/services/storage/database/static-operations.js +218 -0
  343. package/dist/services/storage/database/static-operations.js.map +1 -0
  344. package/dist/services/storage/database/stats-operations.d.ts +101 -0
  345. package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
  346. package/dist/services/storage/database/stats-operations.js +394 -0
  347. package/dist/services/storage/database/stats-operations.js.map +1 -0
  348. package/dist/services/storage/database/tag-operations.d.ts +76 -0
  349. package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
  350. package/dist/services/storage/database/tag-operations.js +178 -0
  351. package/dist/services/storage/database/tag-operations.js.map +1 -0
  352. package/dist/services/storage/database/types.d.ts +286 -0
  353. package/dist/services/storage/database/types.d.ts.map +1 -0
  354. package/dist/services/storage/database/types.js +39 -0
  355. package/dist/services/storage/database/types.js.map +1 -0
  356. package/dist/services/storage/database/upload-operations.d.ts +71 -0
  357. package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
  358. package/dist/services/storage/database/upload-operations.js +124 -0
  359. package/dist/services/storage/database/upload-operations.js.map +1 -0
  360. package/dist/services/storage/database/user-operations.d.ts +102 -0
  361. package/dist/services/storage/database/user-operations.d.ts.map +1 -0
  362. package/dist/services/storage/database/user-operations.js +151 -0
  363. package/dist/services/storage/database/user-operations.js.map +1 -0
  364. package/dist/services/storage/database/workflow-operations.d.ts +98 -0
  365. package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
  366. package/dist/services/storage/database/workflow-operations.js +157 -0
  367. package/dist/services/storage/database/workflow-operations.js.map +1 -0
  368. package/dist/services/storage/database.d.ts +16 -0
  369. package/dist/services/storage/database.d.ts.map +1 -0
  370. package/dist/services/storage/database.js +15 -0
  371. package/dist/services/storage/database.js.map +1 -0
  372. package/dist/services/storage/index.d.ts +10 -0
  373. package/dist/services/storage/index.d.ts.map +1 -0
  374. package/dist/services/storage/index.js +10 -0
  375. package/dist/services/storage/index.js.map +1 -0
  376. package/dist/services/storage/migrations/index.d.ts +16 -0
  377. package/dist/services/storage/migrations/index.d.ts.map +1 -0
  378. package/dist/services/storage/migrations/index.js +20 -0
  379. package/dist/services/storage/migrations/index.js.map +1 -0
  380. package/dist/services/storage/migrations/operations.d.ts +40 -0
  381. package/dist/services/storage/migrations/operations.d.ts.map +1 -0
  382. package/dist/services/storage/migrations/operations.js +2910 -0
  383. package/dist/services/storage/migrations/operations.js.map +1 -0
  384. package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
  385. package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
  386. package/dist/services/storage/migrations/schema-definitions.js +1006 -0
  387. package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
  388. package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
  389. package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
  390. package/dist/services/storage/migrations/schema-helpers.js +176 -0
  391. package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
  392. package/dist/services/storage/migrations/types.d.ts +15 -0
  393. package/dist/services/storage/migrations/types.d.ts.map +1 -0
  394. package/dist/services/storage/migrations/types.js +21 -0
  395. package/dist/services/storage/migrations/types.js.map +1 -0
  396. package/dist/services/storage/migrations/verification.d.ts +20 -0
  397. package/dist/services/storage/migrations/verification.d.ts.map +1 -0
  398. package/dist/services/storage/migrations/verification.js +78 -0
  399. package/dist/services/storage/migrations/verification.js.map +1 -0
  400. package/dist/services/storage/migrations.d.ts +16 -0
  401. package/dist/services/storage/migrations.d.ts.map +1 -0
  402. package/dist/services/storage/migrations.js +17 -0
  403. package/dist/services/storage/migrations.js.map +1 -0
  404. package/dist/services/storage/types.d.ts +12 -0
  405. package/dist/services/storage/types.d.ts.map +1 -0
  406. package/dist/services/storage/types.js +5 -0
  407. package/dist/services/storage/types.js.map +1 -0
  408. package/dist/services/storage/vector.d.ts +208 -0
  409. package/dist/services/storage/vector.d.ts.map +1 -0
  410. package/dist/services/storage/vector.js +526 -0
  411. package/dist/services/storage/vector.js.map +1 -0
  412. package/dist/services/vlm/pipeline.d.ts +194 -0
  413. package/dist/services/vlm/pipeline.d.ts.map +1 -0
  414. package/dist/services/vlm/pipeline.js +800 -0
  415. package/dist/services/vlm/pipeline.js.map +1 -0
  416. package/dist/services/vlm/prompts.d.ts +171 -0
  417. package/dist/services/vlm/prompts.d.ts.map +1 -0
  418. package/dist/services/vlm/prompts.js +229 -0
  419. package/dist/services/vlm/prompts.js.map +1 -0
  420. package/dist/services/vlm/service.d.ts +174 -0
  421. package/dist/services/vlm/service.d.ts.map +1 -0
  422. package/dist/services/vlm/service.js +256 -0
  423. package/dist/services/vlm/service.js.map +1 -0
  424. package/dist/services/webhook-delivery.d.ts +4 -0
  425. package/dist/services/webhook-delivery.d.ts.map +1 -0
  426. package/dist/services/webhook-delivery.js +140 -0
  427. package/dist/services/webhook-delivery.js.map +1 -0
  428. package/dist/tools/chunks.d.ts +19 -0
  429. package/dist/tools/chunks.d.ts.map +1 -0
  430. package/dist/tools/chunks.js +392 -0
  431. package/dist/tools/chunks.js.map +1 -0
  432. package/dist/tools/clm.d.ts +16 -0
  433. package/dist/tools/clm.d.ts.map +1 -0
  434. package/dist/tools/clm.js +668 -0
  435. package/dist/tools/clm.js.map +1 -0
  436. package/dist/tools/clustering.d.ts +13 -0
  437. package/dist/tools/clustering.d.ts.map +1 -0
  438. package/dist/tools/clustering.js +498 -0
  439. package/dist/tools/clustering.js.map +1 -0
  440. package/dist/tools/collaboration.d.ts +15 -0
  441. package/dist/tools/collaboration.d.ts.map +1 -0
  442. package/dist/tools/collaboration.js +516 -0
  443. package/dist/tools/collaboration.js.map +1 -0
  444. package/dist/tools/comparison.d.ts +13 -0
  445. package/dist/tools/comparison.d.ts.map +1 -0
  446. package/dist/tools/comparison.js +735 -0
  447. package/dist/tools/comparison.js.map +1 -0
  448. package/dist/tools/compliance.d.ts +15 -0
  449. package/dist/tools/compliance.d.ts.map +1 -0
  450. package/dist/tools/compliance.js +640 -0
  451. package/dist/tools/compliance.js.map +1 -0
  452. package/dist/tools/config.d.ts +19 -0
  453. package/dist/tools/config.d.ts.map +1 -0
  454. package/dist/tools/config.js +213 -0
  455. package/dist/tools/config.js.map +1 -0
  456. package/dist/tools/database.d.ts +62 -0
  457. package/dist/tools/database.d.ts.map +1 -0
  458. package/dist/tools/database.js +288 -0
  459. package/dist/tools/database.js.map +1 -0
  460. package/dist/tools/documents.d.ts +61 -0
  461. package/dist/tools/documents.d.ts.map +1 -0
  462. package/dist/tools/documents.js +1624 -0
  463. package/dist/tools/documents.js.map +1 -0
  464. package/dist/tools/embeddings.d.ts +14 -0
  465. package/dist/tools/embeddings.d.ts.map +1 -0
  466. package/dist/tools/embeddings.js +626 -0
  467. package/dist/tools/embeddings.js.map +1 -0
  468. package/dist/tools/evaluation.d.ts +25 -0
  469. package/dist/tools/evaluation.d.ts.map +1 -0
  470. package/dist/tools/evaluation.js +523 -0
  471. package/dist/tools/evaluation.js.map +1 -0
  472. package/dist/tools/events.d.ts +16 -0
  473. package/dist/tools/events.d.ts.map +1 -0
  474. package/dist/tools/events.js +493 -0
  475. package/dist/tools/events.js.map +1 -0
  476. package/dist/tools/extraction-structured.d.ts +13 -0
  477. package/dist/tools/extraction-structured.d.ts.map +1 -0
  478. package/dist/tools/extraction-structured.js +390 -0
  479. package/dist/tools/extraction-structured.js.map +1 -0
  480. package/dist/tools/extraction.d.ts +24 -0
  481. package/dist/tools/extraction.d.ts.map +1 -0
  482. package/dist/tools/extraction.js +424 -0
  483. package/dist/tools/extraction.js.map +1 -0
  484. package/dist/tools/file-management.d.ts +14 -0
  485. package/dist/tools/file-management.d.ts.map +1 -0
  486. package/dist/tools/file-management.js +523 -0
  487. package/dist/tools/file-management.js.map +1 -0
  488. package/dist/tools/form-fill.d.ts +13 -0
  489. package/dist/tools/form-fill.d.ts.map +1 -0
  490. package/dist/tools/form-fill.js +250 -0
  491. package/dist/tools/form-fill.js.map +1 -0
  492. package/dist/tools/health.d.ts +19 -0
  493. package/dist/tools/health.d.ts.map +1 -0
  494. package/dist/tools/health.js +229 -0
  495. package/dist/tools/health.js.map +1 -0
  496. package/dist/tools/images.d.ts +54 -0
  497. package/dist/tools/images.d.ts.map +1 -0
  498. package/dist/tools/images.js +787 -0
  499. package/dist/tools/images.js.map +1 -0
  500. package/dist/tools/ingestion.d.ts +94 -0
  501. package/dist/tools/ingestion.d.ts.map +1 -0
  502. package/dist/tools/ingestion.js +1659 -0
  503. package/dist/tools/ingestion.js.map +1 -0
  504. package/dist/tools/intelligence.d.ts +18 -0
  505. package/dist/tools/intelligence.d.ts.map +1 -0
  506. package/dist/tools/intelligence.js +1039 -0
  507. package/dist/tools/intelligence.js.map +1 -0
  508. package/dist/tools/provenance.d.ts +51 -0
  509. package/dist/tools/provenance.d.ts.map +1 -0
  510. package/dist/tools/provenance.js +691 -0
  511. package/dist/tools/provenance.js.map +1 -0
  512. package/dist/tools/reports.d.ts +41 -0
  513. package/dist/tools/reports.d.ts.map +1 -0
  514. package/dist/tools/reports.js +1394 -0
  515. package/dist/tools/reports.js.map +1 -0
  516. package/dist/tools/search.d.ts +35 -0
  517. package/dist/tools/search.d.ts.map +1 -0
  518. package/dist/tools/search.js +2528 -0
  519. package/dist/tools/search.js.map +1 -0
  520. package/dist/tools/shared.d.ts +52 -0
  521. package/dist/tools/shared.d.ts.map +1 -0
  522. package/dist/tools/shared.js +54 -0
  523. package/dist/tools/shared.js.map +1 -0
  524. package/dist/tools/tags.d.ts +15 -0
  525. package/dist/tools/tags.d.ts.map +1 -0
  526. package/dist/tools/tags.js +287 -0
  527. package/dist/tools/tags.js.map +1 -0
  528. package/dist/tools/timeline.d.ts +15 -0
  529. package/dist/tools/timeline.d.ts.map +1 -0
  530. package/dist/tools/timeline.js +14 -0
  531. package/dist/tools/timeline.js.map +1 -0
  532. package/dist/tools/users.d.ts +14 -0
  533. package/dist/tools/users.d.ts.map +1 -0
  534. package/dist/tools/users.js +257 -0
  535. package/dist/tools/users.js.map +1 -0
  536. package/dist/tools/vlm.d.ts +40 -0
  537. package/dist/tools/vlm.d.ts.map +1 -0
  538. package/dist/tools/vlm.js +475 -0
  539. package/dist/tools/vlm.js.map +1 -0
  540. package/dist/tools/workflow.d.ts +16 -0
  541. package/dist/tools/workflow.d.ts.map +1 -0
  542. package/dist/tools/workflow.js +495 -0
  543. package/dist/tools/workflow.js.map +1 -0
  544. package/dist/utils/backoff.d.ts +53 -0
  545. package/dist/utils/backoff.d.ts.map +1 -0
  546. package/dist/utils/backoff.js +78 -0
  547. package/dist/utils/backoff.js.map +1 -0
  548. package/dist/utils/config-persistence.d.ts +33 -0
  549. package/dist/utils/config-persistence.d.ts.map +1 -0
  550. package/dist/utils/config-persistence.js +61 -0
  551. package/dist/utils/config-persistence.js.map +1 -0
  552. package/dist/utils/hash.d.ts +65 -0
  553. package/dist/utils/hash.d.ts.map +1 -0
  554. package/dist/utils/hash.js +146 -0
  555. package/dist/utils/hash.js.map +1 -0
  556. package/dist/utils/math.d.ts +21 -0
  557. package/dist/utils/math.d.ts.map +1 -0
  558. package/dist/utils/math.js +39 -0
  559. package/dist/utils/math.js.map +1 -0
  560. package/dist/utils/validation.d.ts +697 -0
  561. package/dist/utils/validation.d.ts.map +1 -0
  562. package/dist/utils/validation.js +529 -0
  563. package/dist/utils/validation.js.map +1 -0
  564. package/package.json +96 -0
  565. package/python/.gitkeep +0 -0
  566. package/python/__init__.py +104 -0
  567. package/python/clustering_worker.py +440 -0
  568. package/python/docx_image_extractor.py +524 -0
  569. package/python/embedding_worker.py +552 -0
  570. package/python/file_manager_worker.py +564 -0
  571. package/python/form_fill_worker.py +399 -0
  572. package/python/gpu_utils.py +582 -0
  573. package/python/image_extractor.py +317 -0
  574. package/python/image_optimizer.py +444 -0
  575. package/python/ocr_worker.py +712 -0
  576. package/python/pyproject.toml +76 -0
  577. package/python/requirements.txt +51 -0
  578. package/python/reranker_worker.py +87 -0
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chain-hash.js","sourceRoot":"","sources":["../../../src/services/provenance/chain-hash.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAoBpC,kFAAkF;AAClF,mBAAmB;AACnB,kFAAkF;AAElF;;;;;;;;;GASG;AACH,MAAM,UAAU,gBAAgB,CAAC,WAAmB,EAAE,eAA8B;IAClF,MAAM,KAAK,GAAG,eAAe;QAC3B,CAAC,CAAC,GAAG,WAAW,IAAI,eAAe,EAAE;QACrC,CAAC,CAAC,WAAW,CAAC;IAChB,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC1D,CAAC;AAED,kFAAkF;AAClF,qBAAqB;AACrB,kFAAkF;AAElF;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,iBAAiB,CAC/B,IAAuB,EACvB,cAAsB;IAEtB,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;;;;;GAK5B,CAAC,CAAC,GAAG,CAAC,cAAc,CAKnB,CAAC;IAEH,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,aAAa,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IACzE,CAAC;IAED,sCAAsC;IACtC,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IACvD,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC;YACvB,oEAAoE;YACpE,QAAQ,EAAE,CAAC;YACX,SAAS;QACX,CAAC;QAED,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAC/E,MAAM,eAAe,GAAG,YAAY,EAAE,UAAU,IAAI,IAAI,CAAC;QACzD,MAAM,YAAY,GAAG,gBAAgB,CAAC,MAAM,CAAC,YAAY,EAAE,eAAe,CAAC,CAAC;QAE5E,IAAI,MAAM,CAAC,UAAU,KAAK,YAAY,EAAE,CAAC;YACvC,OAAO;gBACL,KAAK,EAAE,KAAK;gBACZ,aAAa,EAAE,OAAO,CAAC,MAAM;gBAC7B,QAAQ;gBACR,SAAS,EAAE,MAAM,CAAC,EAAE;gBACpB,KAAK,EAAE,0BAA0B,MAAM,CAAC,EAAE,cAAc,YAAY,SAAS,MAAM,CAAC,UAAU,EAAE;aACjG,CAAC;QACJ,CAAC;QACD,QAAQ,EAAE,CAAC;IACb,CAAC;IAED,OAAO;QACL,KAAK,EAAE,IAAI;QACX,aAAa,EAAE,OAAO,CAAC,MAAM;QAC7B,QAAQ;QACR,SAAS,EAAE,IAAI;KAChB,CAAC;AACJ,CAAC;AAED,kFAAkF;AAClF,WAAW;AACX,kFAAkF;AAElF;;;;;;;;GAQG;AACH,MAAM,UAAU,mBAAmB,CAAC,IAAuB;IACzD,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;;;;;GAK5B,CAAC,CAAC,GAAG,EAKJ,CAAC;IAEH,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE1C,qDAAqD;IACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAC3B,oEAAoE,CACrE,CAAC,GAAG,EAA+C,CAAC;IACrD,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,mDAAmD,CAAC,CAAC;IAErF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,CAAC;YACH,MAAM,eAAe,GAAG,MAAM,CAAC,SAAS;gBACtC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,IAAI,CAAC;gBACzC,CAAC,CAAC,IAAI,CAAC;YACT,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,YAAY,EAAE,eAAe,CAAC,CAAC;YACzE,UAAU,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,EAAE,SAAS,CAAC,CAAC;YAClC,OAAO,EAAE,CAAC;QACZ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CACX,kCAAkC,MAAM,CAAC,EAAE,KAAK,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CACzG,CAAC;YACF,MAAM,EAAE,CAAC;QACX,CAAC;IACH,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;AAC7B,CAAC"}
@@ -0,0 +1,202 @@
1
+ /**
2
+ * ProvenanceExporter - Export provenance records in JSON, W3C PROV-JSON, and CSV
3
+ *
4
+ * Constitution Compliance:
5
+ * - CP-001: Complete provenance chain for every data item
6
+ * - CP-003: SHA-256 content hashing
7
+ * - CP-005: Full reproducibility via processing params
8
+ *
9
+ * FAIL FAST: All errors throw immediately with detailed error info
10
+ * NO MOCKS: Tests use real DatabaseService
11
+ */
12
+ import { DatabaseService } from '../storage/database/index.js';
13
+ import { ProvenanceTracker } from './tracker.js';
14
+ import { ProvenanceRecord } from '../../models/provenance.js';
15
+ export type ExportScope = 'document' | 'database' | 'all';
16
+ export type ExportFormat = 'json' | 'w3c-prov' | 'csv';
17
+ /**
18
+ * W3C PROV-JSON Document structure
19
+ * Reference: https://www.w3.org/submissions/prov-json/
20
+ */
21
+ interface PROVDocument {
22
+ prefix: Record<string, string>;
23
+ entity: Record<string, Record<string, unknown>>;
24
+ activity: Record<string, Record<string, unknown>>;
25
+ agent: Record<string, Record<string, unknown>>;
26
+ wasDerivedFrom: Record<string, Record<string, unknown>>;
27
+ wasGeneratedBy: Record<string, Record<string, unknown>>;
28
+ wasAttributedTo: Record<string, Record<string, unknown>>;
29
+ }
30
+ /** Result of JSON export */
31
+ interface JSONExportResult {
32
+ format: 'json';
33
+ scope: ExportScope;
34
+ document_id?: string;
35
+ exported_at: string;
36
+ record_count: number;
37
+ records: ProvenanceRecord[];
38
+ }
39
+ /** Result of W3C PROV-JSON export */
40
+ interface W3CPROVExportResult {
41
+ format: 'w3c-prov';
42
+ scope: ExportScope;
43
+ document_id?: string;
44
+ exported_at: string;
45
+ entity_count: number;
46
+ activity_count: number;
47
+ agent_count: number;
48
+ prov_document: PROVDocument;
49
+ }
50
+ /** Result of CSV export */
51
+ interface CSVExportResult {
52
+ format: 'csv';
53
+ scope: ExportScope;
54
+ document_id?: string;
55
+ exported_at: string;
56
+ record_count: number;
57
+ csv_content: string;
58
+ }
59
+ /** Result of file export */
60
+ interface FileExportResult {
61
+ success: boolean;
62
+ format: ExportFormat;
63
+ output_path: string;
64
+ bytes_written: number;
65
+ record_count: number;
66
+ }
67
+ /** Error codes for exporter operations - extends ProvenanceErrorCode */
68
+ export declare const ExporterErrorCode: {
69
+ readonly INVALID_SCOPE: "INVALID_EXPORT_SCOPE";
70
+ readonly INVALID_FORMAT: "INVALID_EXPORT_FORMAT";
71
+ readonly FILE_WRITE_ERROR: "FILE_WRITE_ERROR";
72
+ readonly DOCUMENT_REQUIRED: "DOCUMENT_ID_REQUIRED_FOR_SCOPE";
73
+ readonly NOT_FOUND: "PROVENANCE_NOT_FOUND";
74
+ readonly CHAIN_BROKEN: "PROVENANCE_CHAIN_BROKEN";
75
+ readonly ROOT_NOT_FOUND: "ROOT_DOCUMENT_NOT_FOUND";
76
+ readonly INVALID_TYPE: "INVALID_PROVENANCE_TYPE";
77
+ readonly INVALID_PARAMS: "INVALID_PROVENANCE_PARAMS";
78
+ };
79
+ type ExporterErrorCodeType = (typeof ExporterErrorCode)[keyof typeof ExporterErrorCode];
80
+ /**
81
+ * ExporterError - Typed error for export operations
82
+ * FAIL FAST: Always throw with detailed error information
83
+ */
84
+ export declare class ExporterError extends Error {
85
+ readonly code: ExporterErrorCodeType;
86
+ readonly details?: Record<string, unknown> | undefined;
87
+ constructor(message: string, code: ExporterErrorCodeType, details?: Record<string, unknown> | undefined);
88
+ }
89
+ /**
90
+ * ProvenanceExporter - Export provenance records in multiple formats
91
+ *
92
+ * Supports:
93
+ * - JSON: Internal JSON format with full ProvenanceRecord data
94
+ * - W3C PROV-JSON: Standard W3C PROV-JSON format for interoperability
95
+ * - CSV: Tabular format for spreadsheet/analysis tools
96
+ *
97
+ * Scopes:
98
+ * - document: Export all records for a specific root document
99
+ * - database/all: Export all records in the database
100
+ */
101
+ export declare class ProvenanceExporter {
102
+ private readonly tracker;
103
+ private readonly rawDb;
104
+ private static readonly PROV_PREFIX;
105
+ private static readonly OCR_PREFIX;
106
+ private static readonly OCRP_PREFIX;
107
+ constructor(db: DatabaseService, tracker: ProvenanceTracker);
108
+ /**
109
+ * Export provenance records as internal JSON format
110
+ *
111
+ * @param scope - 'document' (requires documentId), 'database', or 'all'
112
+ * @param documentId - Required when scope='document'
113
+ * @returns JSONExportResult with full ProvenanceRecord array
114
+ * @throws ExporterError if scope invalid or documentId missing when required
115
+ */
116
+ exportJSON(scope: ExportScope, documentId?: string): Promise<JSONExportResult>;
117
+ /**
118
+ * Export provenance records as W3C PROV-JSON format
119
+ *
120
+ * Reference: https://www.w3.org/submissions/prov-json/
121
+ *
122
+ * Mapping:
123
+ * - ProvenanceRecord -> prov:Entity
124
+ * - Processing operation -> prov:Activity (non-DOCUMENT types)
125
+ * - Processor -> prov:Agent
126
+ * - source_id relationship -> wasDerivedFrom
127
+ * - Processing creates record -> wasGeneratedBy
128
+ * - Processor attribution -> wasAttributedTo
129
+ *
130
+ * @param scope - 'document' (requires documentId), 'database', or 'all'
131
+ * @param documentId - Required when scope='document'
132
+ * @returns W3CPROVExportResult with PROV-JSON document
133
+ * @throws ExporterError if scope invalid or documentId missing when required
134
+ */
135
+ exportW3CPROV(scope: ExportScope, documentId?: string): Promise<W3CPROVExportResult>;
136
+ /**
137
+ * Export provenance records as CSV format
138
+ *
139
+ * @param scope - 'document' (requires documentId), 'database', or 'all'
140
+ * @param documentId - Required when scope='document'
141
+ * @returns CSVExportResult with CSV content string
142
+ * @throws ExporterError if scope invalid or documentId missing when required
143
+ */
144
+ exportCSV(scope: ExportScope, documentId?: string): Promise<CSVExportResult>;
145
+ /**
146
+ * Export provenance records to a file
147
+ *
148
+ * Creates parent directories if they don't exist.
149
+ *
150
+ * @param outputPath - Absolute path to output file
151
+ * @param format - 'json', 'w3c-prov', or 'csv'
152
+ * @param scope - 'document' (requires documentId), 'database', or 'all'
153
+ * @param documentId - Required when scope='document'
154
+ * @returns FileExportResult with bytes written and path
155
+ * @throws ExporterError if format invalid, scope invalid, or write fails
156
+ */
157
+ exportToFile(outputPath: string, format: ExportFormat, scope: ExportScope, documentId?: string): Promise<FileExportResult>;
158
+ /**
159
+ * Validate scope and documentId
160
+ * @throws ExporterError if invalid
161
+ */
162
+ private validateScope;
163
+ /**
164
+ * Get provenance records for the given scope
165
+ * Note: validateScope() must be called before this to ensure documentId is present when needed
166
+ */
167
+ private getRecordsForScope;
168
+ /**
169
+ * Get all provenance records from database
170
+ * Ordered by chain_depth ASC, created_at ASC
171
+ */
172
+ private getAllProvenance;
173
+ /**
174
+ * Transform provenance records to W3C PROV-JSON format
175
+ *
176
+ * Mapping rules:
177
+ * - Each ProvenanceRecord becomes an entity: ocr:<type>-<id>
178
+ * - Non-DOCUMENT types create an activity: ocr:activity-<id>
179
+ * - Unique agents per processor name
180
+ * - wasDerivedFrom links entities with source_id
181
+ * - wasGeneratedBy links entities to their activities
182
+ * - wasAttributedTo links all entities to their processor agents
183
+ */
184
+ private transformToPROVJSON;
185
+ /**
186
+ * Sanitize processor name for use as agent ID
187
+ * Only allows alphanumeric and hyphens
188
+ */
189
+ private sanitizeAgentId;
190
+ /**
191
+ * Transform provenance records to CSV format
192
+ */
193
+ private transformToCSV;
194
+ /**
195
+ * Escape a value for CSV
196
+ * - Wrap in quotes if contains comma, quote, or newline
197
+ * - Escape quotes by doubling them
198
+ */
199
+ private escapeCSV;
200
+ }
201
+ export {};
202
+ //# sourceMappingURL=exporter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"exporter.d.ts","sourceRoot":"","sources":["../../../src/services/provenance/exporter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAMH,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAC;AAC/D,OAAO,EAAE,iBAAiB,EAAuB,MAAM,cAAc,CAAC;AACtE,OAAO,EAAE,gBAAgB,EAAkB,MAAM,4BAA4B,CAAC;AAQ9E,MAAM,MAAM,WAAW,GAAG,UAAU,GAAG,UAAU,GAAG,KAAK,CAAC;AAC1D,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,UAAU,GAAG,KAAK,CAAC;AAEvD;;;GAGG;AACH,UAAU,YAAY;IACpB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC/B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAChD,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAClD,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IAC/C,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IACxD,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;IACxD,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;CAC1D;AAED,4BAA4B;AAC5B,UAAU,gBAAgB;IACxB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,WAAW,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,gBAAgB,EAAE,CAAC;CAC7B;AAED,qCAAqC;AACrC,UAAU,mBAAmB;IAC3B,MAAM,EAAE,UAAU,CAAC;IACnB,KAAK,EAAE,WAAW,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,EAAE,MAAM,CAAC;IACvB,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,YAAY,CAAC;CAC7B;AAED,2BAA2B;AAC3B,UAAU,eAAe;IACvB,MAAM,EAAE,KAAK,CAAC;IACd,KAAK,EAAE,WAAW,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED,4BAA4B;AAC5B,UAAU,gBAAgB;IACxB,OAAO,EAAE,OAAO,CAAC;IACjB,MAAM,EAAE,YAAY,CAAC;IACrB,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;IACtB,YAAY,EAAE,MAAM,CAAC;CACtB;AAMD,wEAAwE;AACxE,eAAO,MAAM,iBAAiB;;;;;;;;;;CAMpB,CAAC;AAEX,KAAK,qBAAqB,GAAG,CAAC,OAAO,iBAAiB,CAAC,CAAC,MAAM,OAAO,iBAAiB,CAAC,CAAC;AAExF;;;GAGG;AACH,qBAAa,aAAc,SAAQ,KAAK;aAGpB,IAAI,EAAE,qBAAqB;aAC3B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;gBAFjD,OAAO,EAAE,MAAM,EACC,IAAI,EAAE,qBAAqB,EAC3B,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,YAAA;CAMpD;AAMD;;;;;;;;;;;GAWG;AACH,qBAAa,kBAAkB;IAS3B,OAAO,CAAC,QAAQ,CAAC,OAAO;IAR1B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAoB;IAE1C,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAgC;IACnE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAqC;IACvE,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,WAAW,CAAuC;gBAGxE,EAAE,EAAE,eAAe,EACF,OAAO,EAAE,iBAAiB;IAS7C;;;;;;;OAOG;IACG,UAAU,CAAC,KAAK,EAAE,WAAW,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAepF;;;;;;;;;;;;;;;;;OAiBG;IACG,aAAa,CAAC,KAAK,EAAE,WAAW,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,mBAAmB,CAAC;IAkB1F;;;;;;;OAOG;IACG,SAAS,CAAC,KAAK,EAAE,WAAW,EAAE,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAgBlF;;;;;;;;;;;OAWG;IACG,YAAY,CAChB,UAAU,EAAE,MAAM,EAClB,MAAM,EAAE,YAAY,EACpB,KAAK,EAAE,WAAW,EAClB,UAAU,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,gBAAgB,CAAC;IAmE5B;;;OAGG;IACH,OAAO,CAAC,aAAa;IAoBrB;;;OAGG;IACH,OAAO,CAAC,kBAAkB;IAQ1B;;;OAGG;IACH,OAAO,CAAC,gBAAgB;IAYxB;;;;;;;;;;OAUG;IACH,OAAO,CAAC,mBAAmB;IAmI3B;;;OAGG;IACH,OAAO,CAAC,eAAe;IAOvB;;OAEG;IACH,OAAO,CAAC,cAAc;IA6DtB;;;;OAIG;IACH,OAAO,CAAC,SAAS;CAWlB"}
@@ -0,0 +1,457 @@
1
+ /**
2
+ * ProvenanceExporter - Export provenance records in JSON, W3C PROV-JSON, and CSV
3
+ *
4
+ * Constitution Compliance:
5
+ * - CP-001: Complete provenance chain for every data item
6
+ * - CP-003: SHA-256 content hashing
7
+ * - CP-005: Full reproducibility via processing params
8
+ *
9
+ * FAIL FAST: All errors throw immediately with detailed error info
10
+ * NO MOCKS: Tests use real DatabaseService
11
+ */
12
+ import { writeFile } from 'fs/promises';
13
+ import { existsSync, mkdirSync } from 'fs';
14
+ import { dirname } from 'path';
15
+ import { ProvenanceErrorCode } from './tracker.js';
16
+ import { ProvenanceType } from '../../models/provenance.js';
17
+ import { rowToProvenance } from '../storage/database/converters.js';
18
+ // ═══════════════════════════════════════════════════════════════════════════════
19
+ // ERROR HANDLING - FAIL FAST
20
+ // ═══════════════════════════════════════════════════════════════════════════════
21
+ /** Error codes for exporter operations - extends ProvenanceErrorCode */
22
+ export const ExporterErrorCode = {
23
+ ...ProvenanceErrorCode,
24
+ INVALID_SCOPE: 'INVALID_EXPORT_SCOPE',
25
+ INVALID_FORMAT: 'INVALID_EXPORT_FORMAT',
26
+ FILE_WRITE_ERROR: 'FILE_WRITE_ERROR',
27
+ DOCUMENT_REQUIRED: 'DOCUMENT_ID_REQUIRED_FOR_SCOPE',
28
+ };
29
+ /**
30
+ * ExporterError - Typed error for export operations
31
+ * FAIL FAST: Always throw with detailed error information
32
+ */
33
+ export class ExporterError extends Error {
34
+ code;
35
+ details;
36
+ constructor(message, code, details) {
37
+ super(message);
38
+ this.code = code;
39
+ this.details = details;
40
+ this.name = 'ExporterError';
41
+ Object.setPrototypeOf(this, ExporterError.prototype);
42
+ }
43
+ }
44
+ // ═══════════════════════════════════════════════════════════════════════════════
45
+ // MAIN CLASS
46
+ // ═══════════════════════════════════════════════════════════════════════════════
47
+ /**
48
+ * ProvenanceExporter - Export provenance records in multiple formats
49
+ *
50
+ * Supports:
51
+ * - JSON: Internal JSON format with full ProvenanceRecord data
52
+ * - W3C PROV-JSON: Standard W3C PROV-JSON format for interoperability
53
+ * - CSV: Tabular format for spreadsheet/analysis tools
54
+ *
55
+ * Scopes:
56
+ * - document: Export all records for a specific root document
57
+ * - database/all: Export all records in the database
58
+ */
59
+ export class ProvenanceExporter {
60
+ tracker;
61
+ rawDb;
62
+ static PROV_PREFIX = 'http://www.w3.org/ns/prov#';
63
+ static OCR_PREFIX = 'http://ocr-provenance.local/ns#';
64
+ static OCRP_PREFIX = 'http://ocr-provenance.local/prov#';
65
+ constructor(db, tracker) {
66
+ this.tracker = tracker;
67
+ this.rawDb = db.getConnection();
68
+ }
69
+ // ═══════════════════════════════════════════════════════════════════════════
70
+ // PUBLIC API
71
+ // ═══════════════════════════════════════════════════════════════════════════
72
+ /**
73
+ * Export provenance records as internal JSON format
74
+ *
75
+ * @param scope - 'document' (requires documentId), 'database', or 'all'
76
+ * @param documentId - Required when scope='document'
77
+ * @returns JSONExportResult with full ProvenanceRecord array
78
+ * @throws ExporterError if scope invalid or documentId missing when required
79
+ */
80
+ async exportJSON(scope, documentId) {
81
+ this.validateScope(scope, documentId);
82
+ const records = this.getRecordsForScope(scope, documentId);
83
+ return {
84
+ format: 'json',
85
+ scope,
86
+ document_id: documentId,
87
+ exported_at: new Date().toISOString(),
88
+ record_count: records.length,
89
+ records,
90
+ };
91
+ }
92
+ /**
93
+ * Export provenance records as W3C PROV-JSON format
94
+ *
95
+ * Reference: https://www.w3.org/submissions/prov-json/
96
+ *
97
+ * Mapping:
98
+ * - ProvenanceRecord -> prov:Entity
99
+ * - Processing operation -> prov:Activity (non-DOCUMENT types)
100
+ * - Processor -> prov:Agent
101
+ * - source_id relationship -> wasDerivedFrom
102
+ * - Processing creates record -> wasGeneratedBy
103
+ * - Processor attribution -> wasAttributedTo
104
+ *
105
+ * @param scope - 'document' (requires documentId), 'database', or 'all'
106
+ * @param documentId - Required when scope='document'
107
+ * @returns W3CPROVExportResult with PROV-JSON document
108
+ * @throws ExporterError if scope invalid or documentId missing when required
109
+ */
110
+ async exportW3CPROV(scope, documentId) {
111
+ this.validateScope(scope, documentId);
112
+ const records = this.getRecordsForScope(scope, documentId);
113
+ const provDoc = this.transformToPROVJSON(records);
114
+ return {
115
+ format: 'w3c-prov',
116
+ scope,
117
+ document_id: documentId,
118
+ exported_at: new Date().toISOString(),
119
+ entity_count: Object.keys(provDoc.entity).length,
120
+ activity_count: Object.keys(provDoc.activity).length,
121
+ agent_count: Object.keys(provDoc.agent).length,
122
+ prov_document: provDoc,
123
+ };
124
+ }
125
+ /**
126
+ * Export provenance records as CSV format
127
+ *
128
+ * @param scope - 'document' (requires documentId), 'database', or 'all'
129
+ * @param documentId - Required when scope='document'
130
+ * @returns CSVExportResult with CSV content string
131
+ * @throws ExporterError if scope invalid or documentId missing when required
132
+ */
133
+ async exportCSV(scope, documentId) {
134
+ this.validateScope(scope, documentId);
135
+ const records = this.getRecordsForScope(scope, documentId);
136
+ const csvContent = this.transformToCSV(records);
137
+ return {
138
+ format: 'csv',
139
+ scope,
140
+ document_id: documentId,
141
+ exported_at: new Date().toISOString(),
142
+ record_count: records.length,
143
+ csv_content: csvContent,
144
+ };
145
+ }
146
+ /**
147
+ * Export provenance records to a file
148
+ *
149
+ * Creates parent directories if they don't exist.
150
+ *
151
+ * @param outputPath - Absolute path to output file
152
+ * @param format - 'json', 'w3c-prov', or 'csv'
153
+ * @param scope - 'document' (requires documentId), 'database', or 'all'
154
+ * @param documentId - Required when scope='document'
155
+ * @returns FileExportResult with bytes written and path
156
+ * @throws ExporterError if format invalid, scope invalid, or write fails
157
+ */
158
+ async exportToFile(outputPath, format, scope, documentId) {
159
+ // Get content based on format (export methods validate scope internally)
160
+ let content;
161
+ let recordCount;
162
+ switch (format) {
163
+ case 'json': {
164
+ const result = await this.exportJSON(scope, documentId);
165
+ content = JSON.stringify(result, null, 2);
166
+ recordCount = result.record_count;
167
+ break;
168
+ }
169
+ case 'w3c-prov': {
170
+ const result = await this.exportW3CPROV(scope, documentId);
171
+ content = JSON.stringify(result, null, 2);
172
+ recordCount = result.entity_count;
173
+ break;
174
+ }
175
+ case 'csv': {
176
+ const result = await this.exportCSV(scope, documentId);
177
+ content = result.csv_content;
178
+ recordCount = result.record_count;
179
+ break;
180
+ }
181
+ default:
182
+ throw new ExporterError(`Invalid export format: ${format}. Valid formats: json, w3c-prov, csv`, ExporterErrorCode.INVALID_FORMAT, { providedFormat: format, validFormats: ['json', 'w3c-prov', 'csv'] });
183
+ }
184
+ // Create parent directories if needed
185
+ const dir = dirname(outputPath);
186
+ if (!existsSync(dir)) {
187
+ mkdirSync(dir, { recursive: true });
188
+ }
189
+ // Write file
190
+ try {
191
+ await writeFile(outputPath, content, 'utf-8');
192
+ }
193
+ catch (error) {
194
+ throw new ExporterError(`Failed to write export file: ${outputPath}`, ExporterErrorCode.FILE_WRITE_ERROR, {
195
+ outputPath,
196
+ error: error instanceof Error ? error.message : 'Unknown error',
197
+ });
198
+ }
199
+ const bytesWritten = Buffer.byteLength(content, 'utf-8');
200
+ return {
201
+ success: true,
202
+ format,
203
+ output_path: outputPath,
204
+ bytes_written: bytesWritten,
205
+ record_count: recordCount,
206
+ };
207
+ }
208
+ // ═══════════════════════════════════════════════════════════════════════════
209
+ // PRIVATE HELPERS
210
+ // ═══════════════════════════════════════════════════════════════════════════
211
+ /**
212
+ * Validate scope and documentId
213
+ * @throws ExporterError if invalid
214
+ */
215
+ validateScope(scope, documentId) {
216
+ const validScopes = ['document', 'database', 'all'];
217
+ if (!validScopes.includes(scope)) {
218
+ throw new ExporterError(`Invalid export scope: ${scope}. Valid scopes: ${validScopes.join(', ')}`, ExporterErrorCode.INVALID_SCOPE, { providedScope: scope, validScopes });
219
+ }
220
+ if (scope === 'document' && !documentId) {
221
+ throw new ExporterError('documentId is required when scope is "document"', ExporterErrorCode.DOCUMENT_REQUIRED, { scope });
222
+ }
223
+ }
224
+ /**
225
+ * Get provenance records for the given scope
226
+ * Note: validateScope() must be called before this to ensure documentId is present when needed
227
+ */
228
+ getRecordsForScope(scope, documentId) {
229
+ if (scope === 'document') {
230
+ return this.tracker.getProvenanceByRootDocument(documentId);
231
+ }
232
+ // 'database' and 'all' are equivalent - return all records
233
+ return this.getAllProvenance();
234
+ }
235
+ /**
236
+ * Get all provenance records from database
237
+ * Ordered by chain_depth ASC, created_at ASC
238
+ */
239
+ getAllProvenance() {
240
+ // H-5: Use iterate() to avoid double-allocation (.all() + .map())
241
+ const records = [];
242
+ const stmt = this.rawDb.prepare('SELECT * FROM provenance ORDER BY chain_depth ASC, created_at ASC');
243
+ for (const row of stmt.iterate()) {
244
+ records.push(rowToProvenance(row));
245
+ }
246
+ return records;
247
+ }
248
+ /**
249
+ * Transform provenance records to W3C PROV-JSON format
250
+ *
251
+ * Mapping rules:
252
+ * - Each ProvenanceRecord becomes an entity: ocr:<type>-<id>
253
+ * - Non-DOCUMENT types create an activity: ocr:activity-<id>
254
+ * - Unique agents per processor name
255
+ * - wasDerivedFrom links entities with source_id
256
+ * - wasGeneratedBy links entities to their activities
257
+ * - wasAttributedTo links all entities to their processor agents
258
+ */
259
+ transformToPROVJSON(records) {
260
+ const doc = {
261
+ prefix: {
262
+ prov: ProvenanceExporter.PROV_PREFIX,
263
+ ocr: ProvenanceExporter.OCR_PREFIX,
264
+ ocrp: ProvenanceExporter.OCRP_PREFIX,
265
+ },
266
+ entity: {},
267
+ activity: {},
268
+ agent: {},
269
+ wasDerivedFrom: {},
270
+ wasGeneratedBy: {},
271
+ wasAttributedTo: {},
272
+ };
273
+ // M-13: Map-based lookup to avoid O(N^2) find() in wasDerivedFrom
274
+ const recordById = new Map();
275
+ for (const record of records) {
276
+ recordById.set(record.id, record);
277
+ }
278
+ // Track unique agents
279
+ const agents = new Map(); // processor name -> agent ID
280
+ for (const record of records) {
281
+ // 1. Create entity
282
+ const entityId = `ocr:${record.type.toLowerCase()}-${record.id}`;
283
+ doc.entity[entityId] = {
284
+ 'prov:type': `ocr:${record.type}`,
285
+ 'prov:generatedAtTime': record.created_at,
286
+ 'ocr:content_hash': record.content_hash,
287
+ 'ocr:chain_depth': record.chain_depth,
288
+ 'ocr:root_document_id': record.root_document_id,
289
+ 'ocr:processor': record.processor,
290
+ 'ocr:processor_version': record.processor_version,
291
+ };
292
+ // Add location if present
293
+ if (record.location) {
294
+ doc.entity[entityId]['ocr:location'] = record.location;
295
+ }
296
+ // Add source_path for DOCUMENT type
297
+ if (record.type === ProvenanceType.DOCUMENT && record.source_path) {
298
+ doc.entity[entityId]['ocr:source_path'] = record.source_path;
299
+ }
300
+ // Add specific attributes for IMAGE type
301
+ if (record.type === ProvenanceType.IMAGE) {
302
+ doc.entity[entityId]['prov:type'] = 'ocrp:Image';
303
+ if (record.location?.bounding_box) {
304
+ doc.entity[entityId]['ocrp:bounding_box'] = record.location.bounding_box;
305
+ }
306
+ if (record.location?.page_number) {
307
+ doc.entity[entityId]['ocrp:page_number'] = record.location.page_number;
308
+ }
309
+ }
310
+ // Add specific attributes for VLM_DESCRIPTION type
311
+ if (record.type === ProvenanceType.VLM_DESCRIPTION) {
312
+ doc.entity[entityId]['prov:type'] = 'ocrp:VLMDescription';
313
+ }
314
+ // Add file_hash if present
315
+ if (record.file_hash) {
316
+ doc.entity[entityId]['ocr:file_hash'] = record.file_hash;
317
+ }
318
+ // 2. Create activity for non-DOCUMENT types
319
+ if (record.type !== ProvenanceType.DOCUMENT) {
320
+ const activityId = `ocr:activity-${record.id}`;
321
+ doc.activity[activityId] = {
322
+ 'prov:type': `ocr:${record.source_type}Activity`,
323
+ 'prov:startTime': record.created_at,
324
+ 'prov:endTime': record.processed_at,
325
+ 'ocr:processor': record.processor,
326
+ 'ocr:processor_version': record.processor_version,
327
+ 'ocr:processing_params': record.processing_params,
328
+ };
329
+ if (record.processing_duration_ms !== null) {
330
+ doc.activity[activityId]['ocr:processing_duration_ms'] = record.processing_duration_ms;
331
+ }
332
+ // 3. wasGeneratedBy - link entity to activity
333
+ const wgbId = `ocr:wgb-${record.id}`;
334
+ doc.wasGeneratedBy[wgbId] = {
335
+ 'prov:entity': entityId,
336
+ 'prov:activity': activityId,
337
+ 'prov:time': record.processed_at,
338
+ };
339
+ }
340
+ // 4. Create/link agent
341
+ const sanitizedProcessor = this.sanitizeAgentId(record.processor);
342
+ const agentId = `ocr:agent-${sanitizedProcessor}`;
343
+ if (!agents.has(record.processor)) {
344
+ agents.set(record.processor, agentId);
345
+ doc.agent[agentId] = {
346
+ 'prov:type': 'prov:SoftwareAgent',
347
+ 'ocr:name': record.processor,
348
+ 'ocr:version': record.processor_version,
349
+ };
350
+ }
351
+ // 5. wasAttributedTo - link entity to agent
352
+ const watId = `ocr:wat-${record.id}`;
353
+ doc.wasAttributedTo[watId] = {
354
+ 'prov:entity': entityId,
355
+ 'prov:agent': agents.get(record.processor),
356
+ };
357
+ // 6. wasDerivedFrom - link to source entity if present
358
+ if (record.source_id) {
359
+ // M-13: Use Map lookup instead of O(N) find()
360
+ const sourceRecord = recordById.get(record.source_id);
361
+ if (sourceRecord) {
362
+ const sourceEntityId = `ocr:${sourceRecord.type.toLowerCase()}-${sourceRecord.id}`;
363
+ const wdfId = `ocr:wdf-${record.id}`;
364
+ doc.wasDerivedFrom[wdfId] = {
365
+ 'prov:generatedEntity': entityId,
366
+ 'prov:usedEntity': sourceEntityId,
367
+ };
368
+ }
369
+ }
370
+ }
371
+ return doc;
372
+ }
373
+ /**
374
+ * Sanitize processor name for use as agent ID
375
+ * Only allows alphanumeric and hyphens
376
+ */
377
+ sanitizeAgentId(processor) {
378
+ return processor
379
+ .toLowerCase()
380
+ .replace(/[^a-z0-9-]+/g, '-')
381
+ .replace(/^-|-$/g, '');
382
+ }
383
+ /**
384
+ * Transform provenance records to CSV format
385
+ */
386
+ transformToCSV(records) {
387
+ const headers = [
388
+ 'id',
389
+ 'type',
390
+ 'created_at',
391
+ 'processed_at',
392
+ 'source_type',
393
+ 'source_path',
394
+ 'source_id',
395
+ 'root_document_id',
396
+ 'content_hash',
397
+ 'input_hash',
398
+ 'file_hash',
399
+ 'processor',
400
+ 'processor_version',
401
+ 'processing_params',
402
+ 'processing_duration_ms',
403
+ 'processing_quality_score',
404
+ 'parent_id',
405
+ 'parent_ids',
406
+ 'chain_depth',
407
+ 'chain_path',
408
+ 'location',
409
+ ];
410
+ const headerRow = headers.join(',');
411
+ if (records.length === 0) {
412
+ return headerRow;
413
+ }
414
+ const dataRows = records.map((record) => {
415
+ const values = [
416
+ this.escapeCSV(record.id),
417
+ this.escapeCSV(record.type),
418
+ this.escapeCSV(record.created_at),
419
+ this.escapeCSV(record.processed_at),
420
+ this.escapeCSV(record.source_type),
421
+ this.escapeCSV(record.source_path ?? ''),
422
+ this.escapeCSV(record.source_id ?? ''),
423
+ this.escapeCSV(record.root_document_id),
424
+ this.escapeCSV(record.content_hash),
425
+ this.escapeCSV(record.input_hash ?? ''),
426
+ this.escapeCSV(record.file_hash ?? ''),
427
+ this.escapeCSV(record.processor),
428
+ this.escapeCSV(record.processor_version),
429
+ this.escapeCSV(JSON.stringify(record.processing_params)),
430
+ record.processing_duration_ms?.toString() ?? '',
431
+ record.processing_quality_score?.toString() ?? '',
432
+ this.escapeCSV(record.parent_id ?? ''),
433
+ this.escapeCSV(record.parent_ids),
434
+ record.chain_depth.toString(),
435
+ this.escapeCSV(record.chain_path ?? ''),
436
+ this.escapeCSV(record.location ? JSON.stringify(record.location) : ''),
437
+ ];
438
+ return values.join(',');
439
+ });
440
+ return [headerRow, ...dataRows].join('\n');
441
+ }
442
+ /**
443
+ * Escape a value for CSV
444
+ * - Wrap in quotes if contains comma, quote, or newline
445
+ * - Escape quotes by doubling them
446
+ */
447
+ escapeCSV(value) {
448
+ if (value.includes(',') ||
449
+ value.includes('"') ||
450
+ value.includes('\n') ||
451
+ value.includes('\r')) {
452
+ return '"' + value.replace(/"/g, '""') + '"';
453
+ }
454
+ return value;
455
+ }
456
+ }
457
+ //# sourceMappingURL=exporter.js.map