ocr-provenance-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocr-provenance-mcp might be problematic. Click here for more details.

Files changed (578) hide show
  1. package/.env.example +55 -0
  2. package/LICENSE +78 -0
  3. package/README.md +1154 -0
  4. package/dist/bin-http.d.ts +24 -0
  5. package/dist/bin-http.d.ts.map +1 -0
  6. package/dist/bin-http.js +275 -0
  7. package/dist/bin-http.js.map +1 -0
  8. package/dist/bin-setup.d.ts +11 -0
  9. package/dist/bin-setup.d.ts.map +1 -0
  10. package/dist/bin-setup.js +610 -0
  11. package/dist/bin-setup.js.map +1 -0
  12. package/dist/bin.d.ts +16 -0
  13. package/dist/bin.d.ts.map +1 -0
  14. package/dist/bin.js +16 -0
  15. package/dist/bin.js.map +1 -0
  16. package/dist/index.d.ts +13 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +90 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/models/chunk.d.ts +136 -0
  21. package/dist/models/chunk.d.ts.map +1 -0
  22. package/dist/models/chunk.js +27 -0
  23. package/dist/models/chunk.js.map +1 -0
  24. package/dist/models/cluster.d.ts +79 -0
  25. package/dist/models/cluster.d.ts.map +1 -0
  26. package/dist/models/cluster.js +10 -0
  27. package/dist/models/cluster.js.map +1 -0
  28. package/dist/models/comparison.d.ts +62 -0
  29. package/dist/models/comparison.d.ts.map +1 -0
  30. package/dist/models/comparison.js +8 -0
  31. package/dist/models/comparison.js.map +1 -0
  32. package/dist/models/document.d.ts +104 -0
  33. package/dist/models/document.d.ts.map +1 -0
  34. package/dist/models/document.js +15 -0
  35. package/dist/models/document.js.map +1 -0
  36. package/dist/models/embedding.d.ts +87 -0
  37. package/dist/models/embedding.d.ts.map +1 -0
  38. package/dist/models/embedding.js +23 -0
  39. package/dist/models/embedding.js.map +1 -0
  40. package/dist/models/extraction.d.ts +15 -0
  41. package/dist/models/extraction.d.ts.map +1 -0
  42. package/dist/models/extraction.js +2 -0
  43. package/dist/models/extraction.js.map +1 -0
  44. package/dist/models/form-fill.d.ts +23 -0
  45. package/dist/models/form-fill.d.ts.map +1 -0
  46. package/dist/models/form-fill.js +2 -0
  47. package/dist/models/form-fill.js.map +1 -0
  48. package/dist/models/image.d.ts +177 -0
  49. package/dist/models/image.d.ts.map +1 -0
  50. package/dist/models/image.js +8 -0
  51. package/dist/models/image.js.map +1 -0
  52. package/dist/models/index.d.ts +14 -0
  53. package/dist/models/index.d.ts.map +1 -0
  54. package/dist/models/index.js +22 -0
  55. package/dist/models/index.js.map +1 -0
  56. package/dist/models/provenance.d.ts +174 -0
  57. package/dist/models/provenance.d.ts.map +1 -0
  58. package/dist/models/provenance.js +53 -0
  59. package/dist/models/provenance.js.map +1 -0
  60. package/dist/models/uploaded-file.d.ts +20 -0
  61. package/dist/models/uploaded-file.d.ts.map +1 -0
  62. package/dist/models/uploaded-file.js +2 -0
  63. package/dist/models/uploaded-file.js.map +1 -0
  64. package/dist/server/errors.d.ts +93 -0
  65. package/dist/server/errors.d.ts.map +1 -0
  66. package/dist/server/errors.js +256 -0
  67. package/dist/server/errors.js.map +1 -0
  68. package/dist/server/events.d.ts +36 -0
  69. package/dist/server/events.d.ts.map +1 -0
  70. package/dist/server/events.js +48 -0
  71. package/dist/server/events.js.map +1 -0
  72. package/dist/server/permissions.d.ts +26 -0
  73. package/dist/server/permissions.d.ts.map +1 -0
  74. package/dist/server/permissions.js +194 -0
  75. package/dist/server/permissions.js.map +1 -0
  76. package/dist/server/register-tools.d.ts +25 -0
  77. package/dist/server/register-tools.d.ts.map +1 -0
  78. package/dist/server/register-tools.js +102 -0
  79. package/dist/server/register-tools.js.map +1 -0
  80. package/dist/server/startup.d.ts +16 -0
  81. package/dist/server/startup.d.ts.map +1 -0
  82. package/dist/server/startup.js +37 -0
  83. package/dist/server/startup.js.map +1 -0
  84. package/dist/server/state.d.ts +166 -0
  85. package/dist/server/state.d.ts.map +1 -0
  86. package/dist/server/state.js +424 -0
  87. package/dist/server/state.js.map +1 -0
  88. package/dist/server/transports/http-transport.d.ts +37 -0
  89. package/dist/server/transports/http-transport.d.ts.map +1 -0
  90. package/dist/server/transports/http-transport.js +204 -0
  91. package/dist/server/transports/http-transport.js.map +1 -0
  92. package/dist/server/transports/index.d.ts +9 -0
  93. package/dist/server/transports/index.d.ts.map +1 -0
  94. package/dist/server/transports/index.js +9 -0
  95. package/dist/server/transports/index.js.map +1 -0
  96. package/dist/server/transports/session-manager.d.ts +40 -0
  97. package/dist/server/transports/session-manager.d.ts.map +1 -0
  98. package/dist/server/transports/session-manager.js +74 -0
  99. package/dist/server/transports/session-manager.js.map +1 -0
  100. package/dist/server/types.d.ts +82 -0
  101. package/dist/server/types.d.ts.map +1 -0
  102. package/dist/server/types.js +14 -0
  103. package/dist/server/types.js.map +1 -0
  104. package/dist/services/audit.d.ts +26 -0
  105. package/dist/services/audit.d.ts.map +1 -0
  106. package/dist/services/audit.js +43 -0
  107. package/dist/services/audit.js.map +1 -0
  108. package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
  109. package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
  110. package/dist/services/chunking/chunk-deduplicator.js +46 -0
  111. package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
  112. package/dist/services/chunking/chunk-merger.d.ts +26 -0
  113. package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
  114. package/dist/services/chunking/chunk-merger.js +94 -0
  115. package/dist/services/chunking/chunk-merger.js.map +1 -0
  116. package/dist/services/chunking/chunker.d.ts +62 -0
  117. package/dist/services/chunking/chunker.d.ts.map +1 -0
  118. package/dist/services/chunking/chunker.js +566 -0
  119. package/dist/services/chunking/chunker.js.map +1 -0
  120. package/dist/services/chunking/heading-normalizer.d.ts +33 -0
  121. package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
  122. package/dist/services/chunking/heading-normalizer.js +101 -0
  123. package/dist/services/chunking/heading-normalizer.js.map +1 -0
  124. package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
  125. package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
  126. package/dist/services/chunking/json-block-analyzer.js +1033 -0
  127. package/dist/services/chunking/json-block-analyzer.js.map +1 -0
  128. package/dist/services/chunking/markdown-parser.d.ts +75 -0
  129. package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
  130. package/dist/services/chunking/markdown-parser.js +428 -0
  131. package/dist/services/chunking/markdown-parser.js.map +1 -0
  132. package/dist/services/chunking/text-normalizer.d.ts +20 -0
  133. package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
  134. package/dist/services/chunking/text-normalizer.js +36 -0
  135. package/dist/services/chunking/text-normalizer.js.map +1 -0
  136. package/dist/services/clm/contract-schemas.d.ts +36 -0
  137. package/dist/services/clm/contract-schemas.d.ts.map +1 -0
  138. package/dist/services/clm/contract-schemas.js +92 -0
  139. package/dist/services/clm/contract-schemas.js.map +1 -0
  140. package/dist/services/clm/summarization.d.ts +46 -0
  141. package/dist/services/clm/summarization.d.ts.map +1 -0
  142. package/dist/services/clm/summarization.js +61 -0
  143. package/dist/services/clm/summarization.js.map +1 -0
  144. package/dist/services/clustering/clustering-service.d.ts +58 -0
  145. package/dist/services/clustering/clustering-service.d.ts.map +1 -0
  146. package/dist/services/clustering/clustering-service.js +467 -0
  147. package/dist/services/clustering/clustering-service.js.map +1 -0
  148. package/dist/services/comparison/diff-service.d.ts +41 -0
  149. package/dist/services/comparison/diff-service.d.ts.map +1 -0
  150. package/dist/services/comparison/diff-service.js +120 -0
  151. package/dist/services/comparison/diff-service.js.map +1 -0
  152. package/dist/services/embedding/embedder.d.ts +55 -0
  153. package/dist/services/embedding/embedder.d.ts.map +1 -0
  154. package/dist/services/embedding/embedder.js +202 -0
  155. package/dist/services/embedding/embedder.js.map +1 -0
  156. package/dist/services/embedding/nomic.d.ts +67 -0
  157. package/dist/services/embedding/nomic.d.ts.map +1 -0
  158. package/dist/services/embedding/nomic.js +280 -0
  159. package/dist/services/embedding/nomic.js.map +1 -0
  160. package/dist/services/gemini/circuit-breaker.d.ts +106 -0
  161. package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
  162. package/dist/services/gemini/circuit-breaker.js +237 -0
  163. package/dist/services/gemini/circuit-breaker.js.map +1 -0
  164. package/dist/services/gemini/client.d.ts +173 -0
  165. package/dist/services/gemini/client.d.ts.map +1 -0
  166. package/dist/services/gemini/client.js +483 -0
  167. package/dist/services/gemini/client.js.map +1 -0
  168. package/dist/services/gemini/config.d.ts +116 -0
  169. package/dist/services/gemini/config.d.ts.map +1 -0
  170. package/dist/services/gemini/config.js +118 -0
  171. package/dist/services/gemini/config.js.map +1 -0
  172. package/dist/services/gemini/index.d.ts +9 -0
  173. package/dist/services/gemini/index.d.ts.map +1 -0
  174. package/dist/services/gemini/index.js +13 -0
  175. package/dist/services/gemini/index.js.map +1 -0
  176. package/dist/services/gemini/rate-limiter.d.ts +62 -0
  177. package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
  178. package/dist/services/gemini/rate-limiter.js +120 -0
  179. package/dist/services/gemini/rate-limiter.js.map +1 -0
  180. package/dist/services/images/extractor.d.ts +88 -0
  181. package/dist/services/images/extractor.d.ts.map +1 -0
  182. package/dist/services/images/extractor.js +340 -0
  183. package/dist/services/images/extractor.js.map +1 -0
  184. package/dist/services/images/optimizer.d.ts +130 -0
  185. package/dist/services/images/optimizer.d.ts.map +1 -0
  186. package/dist/services/images/optimizer.js +228 -0
  187. package/dist/services/images/optimizer.js.map +1 -0
  188. package/dist/services/ocr/datalab.d.ts +64 -0
  189. package/dist/services/ocr/datalab.d.ts.map +1 -0
  190. package/dist/services/ocr/datalab.js +425 -0
  191. package/dist/services/ocr/datalab.js.map +1 -0
  192. package/dist/services/ocr/errors.d.ts +38 -0
  193. package/dist/services/ocr/errors.d.ts.map +1 -0
  194. package/dist/services/ocr/errors.js +83 -0
  195. package/dist/services/ocr/errors.js.map +1 -0
  196. package/dist/services/ocr/file-manager.d.ts +76 -0
  197. package/dist/services/ocr/file-manager.d.ts.map +1 -0
  198. package/dist/services/ocr/file-manager.js +238 -0
  199. package/dist/services/ocr/file-manager.js.map +1 -0
  200. package/dist/services/ocr/form-fill.d.ts +48 -0
  201. package/dist/services/ocr/form-fill.d.ts.map +1 -0
  202. package/dist/services/ocr/form-fill.js +213 -0
  203. package/dist/services/ocr/form-fill.js.map +1 -0
  204. package/dist/services/ocr/processor.d.ts +95 -0
  205. package/dist/services/ocr/processor.d.ts.map +1 -0
  206. package/dist/services/ocr/processor.js +259 -0
  207. package/dist/services/ocr/processor.js.map +1 -0
  208. package/dist/services/provenance/agent-metadata.d.ts +82 -0
  209. package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
  210. package/dist/services/provenance/agent-metadata.js +106 -0
  211. package/dist/services/provenance/agent-metadata.js.map +1 -0
  212. package/dist/services/provenance/chain-hash.d.ts +57 -0
  213. package/dist/services/provenance/chain-hash.d.ts.map +1 -0
  214. package/dist/services/provenance/chain-hash.js +131 -0
  215. package/dist/services/provenance/chain-hash.js.map +1 -0
  216. package/dist/services/provenance/exporter.d.ts +202 -0
  217. package/dist/services/provenance/exporter.d.ts.map +1 -0
  218. package/dist/services/provenance/exporter.js +457 -0
  219. package/dist/services/provenance/exporter.js.map +1 -0
  220. package/dist/services/provenance/index.d.ts +15 -0
  221. package/dist/services/provenance/index.d.ts.map +1 -0
  222. package/dist/services/provenance/index.js +17 -0
  223. package/dist/services/provenance/index.js.map +1 -0
  224. package/dist/services/provenance/tracker.d.ts +138 -0
  225. package/dist/services/provenance/tracker.d.ts.map +1 -0
  226. package/dist/services/provenance/tracker.js +293 -0
  227. package/dist/services/provenance/tracker.js.map +1 -0
  228. package/dist/services/provenance/verifier.d.ts +153 -0
  229. package/dist/services/provenance/verifier.d.ts.map +1 -0
  230. package/dist/services/provenance/verifier.js +536 -0
  231. package/dist/services/provenance/verifier.js.map +1 -0
  232. package/dist/services/python-pool.d.ts +70 -0
  233. package/dist/services/python-pool.d.ts.map +1 -0
  234. package/dist/services/python-pool.js +265 -0
  235. package/dist/services/python-pool.js.map +1 -0
  236. package/dist/services/search/bm25.d.ts +180 -0
  237. package/dist/services/search/bm25.d.ts.map +1 -0
  238. package/dist/services/search/bm25.js +656 -0
  239. package/dist/services/search/bm25.js.map +1 -0
  240. package/dist/services/search/fusion.d.ts +103 -0
  241. package/dist/services/search/fusion.d.ts.map +1 -0
  242. package/dist/services/search/fusion.js +122 -0
  243. package/dist/services/search/fusion.js.map +1 -0
  244. package/dist/services/search/local-reranker.d.ts +30 -0
  245. package/dist/services/search/local-reranker.d.ts.map +1 -0
  246. package/dist/services/search/local-reranker.js +123 -0
  247. package/dist/services/search/local-reranker.js.map +1 -0
  248. package/dist/services/search/quality.d.ts +11 -0
  249. package/dist/services/search/quality.d.ts.map +1 -0
  250. package/dist/services/search/quality.js +17 -0
  251. package/dist/services/search/quality.js.map +1 -0
  252. package/dist/services/search/query-classifier.d.ts +34 -0
  253. package/dist/services/search/query-classifier.d.ts.map +1 -0
  254. package/dist/services/search/query-classifier.js +114 -0
  255. package/dist/services/search/query-classifier.js.map +1 -0
  256. package/dist/services/search/query-expander.d.ts +73 -0
  257. package/dist/services/search/query-expander.d.ts.map +1 -0
  258. package/dist/services/search/query-expander.js +281 -0
  259. package/dist/services/search/query-expander.js.map +1 -0
  260. package/dist/services/search/reranker.d.ts +44 -0
  261. package/dist/services/search/reranker.d.ts.map +1 -0
  262. package/dist/services/search/reranker.js +101 -0
  263. package/dist/services/search/reranker.js.map +1 -0
  264. package/dist/services/storage/database/annotation-operations.d.ts +113 -0
  265. package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
  266. package/dist/services/storage/database/annotation-operations.js +177 -0
  267. package/dist/services/storage/database/annotation-operations.js.map +1 -0
  268. package/dist/services/storage/database/approval-operations.d.ts +132 -0
  269. package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
  270. package/dist/services/storage/database/approval-operations.js +206 -0
  271. package/dist/services/storage/database/approval-operations.js.map +1 -0
  272. package/dist/services/storage/database/chunk-operations.d.ts +132 -0
  273. package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
  274. package/dist/services/storage/database/chunk-operations.js +306 -0
  275. package/dist/services/storage/database/chunk-operations.js.map +1 -0
  276. package/dist/services/storage/database/cluster-operations.d.ts +97 -0
  277. package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
  278. package/dist/services/storage/database/cluster-operations.js +258 -0
  279. package/dist/services/storage/database/cluster-operations.js.map +1 -0
  280. package/dist/services/storage/database/comparison-operations.d.ts +41 -0
  281. package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
  282. package/dist/services/storage/database/comparison-operations.js +65 -0
  283. package/dist/services/storage/database/comparison-operations.js.map +1 -0
  284. package/dist/services/storage/database/converters.d.ts +36 -0
  285. package/dist/services/storage/database/converters.d.ts.map +1 -0
  286. package/dist/services/storage/database/converters.js +244 -0
  287. package/dist/services/storage/database/converters.js.map +1 -0
  288. package/dist/services/storage/database/document-operations.d.ts +145 -0
  289. package/dist/services/storage/database/document-operations.d.ts.map +1 -0
  290. package/dist/services/storage/database/document-operations.js +498 -0
  291. package/dist/services/storage/database/document-operations.js.map +1 -0
  292. package/dist/services/storage/database/embedding-operations.d.ts +130 -0
  293. package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
  294. package/dist/services/storage/database/embedding-operations.js +315 -0
  295. package/dist/services/storage/database/embedding-operations.js.map +1 -0
  296. package/dist/services/storage/database/extraction-operations.d.ts +47 -0
  297. package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
  298. package/dist/services/storage/database/extraction-operations.js +85 -0
  299. package/dist/services/storage/database/extraction-operations.js.map +1 -0
  300. package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
  301. package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
  302. package/dist/services/storage/database/form-fill-operations.js +116 -0
  303. package/dist/services/storage/database/form-fill-operations.js.map +1 -0
  304. package/dist/services/storage/database/helpers.d.ts +29 -0
  305. package/dist/services/storage/database/helpers.d.ts.map +1 -0
  306. package/dist/services/storage/database/helpers.js +55 -0
  307. package/dist/services/storage/database/helpers.js.map +1 -0
  308. package/dist/services/storage/database/image-operations.d.ts +202 -0
  309. package/dist/services/storage/database/image-operations.d.ts.map +1 -0
  310. package/dist/services/storage/database/image-operations.js +484 -0
  311. package/dist/services/storage/database/image-operations.js.map +1 -0
  312. package/dist/services/storage/database/index.d.ts +13 -0
  313. package/dist/services/storage/database/index.d.ts.map +1 -0
  314. package/dist/services/storage/database/index.js +16 -0
  315. package/dist/services/storage/database/index.js.map +1 -0
  316. package/dist/services/storage/database/lock-operations.d.ts +59 -0
  317. package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
  318. package/dist/services/storage/database/lock-operations.js +89 -0
  319. package/dist/services/storage/database/lock-operations.js.map +1 -0
  320. package/dist/services/storage/database/obligation-operations.d.ts +88 -0
  321. package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
  322. package/dist/services/storage/database/obligation-operations.js +206 -0
  323. package/dist/services/storage/database/obligation-operations.js.map +1 -0
  324. package/dist/services/storage/database/ocr-operations.d.ts +33 -0
  325. package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
  326. package/dist/services/storage/database/ocr-operations.js +70 -0
  327. package/dist/services/storage/database/ocr-operations.js.map +1 -0
  328. package/dist/services/storage/database/playbook-operations.d.ts +72 -0
  329. package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
  330. package/dist/services/storage/database/playbook-operations.js +247 -0
  331. package/dist/services/storage/database/playbook-operations.js.map +1 -0
  332. package/dist/services/storage/database/provenance-operations.d.ts +112 -0
  333. package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
  334. package/dist/services/storage/database/provenance-operations.js +251 -0
  335. package/dist/services/storage/database/provenance-operations.js.map +1 -0
  336. package/dist/services/storage/database/service.d.ts +142 -0
  337. package/dist/services/storage/database/service.d.ts.map +1 -0
  338. package/dist/services/storage/database/service.js +310 -0
  339. package/dist/services/storage/database/service.js.map +1 -0
  340. package/dist/services/storage/database/static-operations.d.ts +30 -0
  341. package/dist/services/storage/database/static-operations.d.ts.map +1 -0
  342. package/dist/services/storage/database/static-operations.js +218 -0
  343. package/dist/services/storage/database/static-operations.js.map +1 -0
  344. package/dist/services/storage/database/stats-operations.d.ts +101 -0
  345. package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
  346. package/dist/services/storage/database/stats-operations.js +394 -0
  347. package/dist/services/storage/database/stats-operations.js.map +1 -0
  348. package/dist/services/storage/database/tag-operations.d.ts +76 -0
  349. package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
  350. package/dist/services/storage/database/tag-operations.js +178 -0
  351. package/dist/services/storage/database/tag-operations.js.map +1 -0
  352. package/dist/services/storage/database/types.d.ts +286 -0
  353. package/dist/services/storage/database/types.d.ts.map +1 -0
  354. package/dist/services/storage/database/types.js +39 -0
  355. package/dist/services/storage/database/types.js.map +1 -0
  356. package/dist/services/storage/database/upload-operations.d.ts +71 -0
  357. package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
  358. package/dist/services/storage/database/upload-operations.js +124 -0
  359. package/dist/services/storage/database/upload-operations.js.map +1 -0
  360. package/dist/services/storage/database/user-operations.d.ts +102 -0
  361. package/dist/services/storage/database/user-operations.d.ts.map +1 -0
  362. package/dist/services/storage/database/user-operations.js +151 -0
  363. package/dist/services/storage/database/user-operations.js.map +1 -0
  364. package/dist/services/storage/database/workflow-operations.d.ts +98 -0
  365. package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
  366. package/dist/services/storage/database/workflow-operations.js +157 -0
  367. package/dist/services/storage/database/workflow-operations.js.map +1 -0
  368. package/dist/services/storage/database.d.ts +16 -0
  369. package/dist/services/storage/database.d.ts.map +1 -0
  370. package/dist/services/storage/database.js +15 -0
  371. package/dist/services/storage/database.js.map +1 -0
  372. package/dist/services/storage/index.d.ts +10 -0
  373. package/dist/services/storage/index.d.ts.map +1 -0
  374. package/dist/services/storage/index.js +10 -0
  375. package/dist/services/storage/index.js.map +1 -0
  376. package/dist/services/storage/migrations/index.d.ts +16 -0
  377. package/dist/services/storage/migrations/index.d.ts.map +1 -0
  378. package/dist/services/storage/migrations/index.js +20 -0
  379. package/dist/services/storage/migrations/index.js.map +1 -0
  380. package/dist/services/storage/migrations/operations.d.ts +40 -0
  381. package/dist/services/storage/migrations/operations.d.ts.map +1 -0
  382. package/dist/services/storage/migrations/operations.js +2910 -0
  383. package/dist/services/storage/migrations/operations.js.map +1 -0
  384. package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
  385. package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
  386. package/dist/services/storage/migrations/schema-definitions.js +1006 -0
  387. package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
  388. package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
  389. package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
  390. package/dist/services/storage/migrations/schema-helpers.js +176 -0
  391. package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
  392. package/dist/services/storage/migrations/types.d.ts +15 -0
  393. package/dist/services/storage/migrations/types.d.ts.map +1 -0
  394. package/dist/services/storage/migrations/types.js +21 -0
  395. package/dist/services/storage/migrations/types.js.map +1 -0
  396. package/dist/services/storage/migrations/verification.d.ts +20 -0
  397. package/dist/services/storage/migrations/verification.d.ts.map +1 -0
  398. package/dist/services/storage/migrations/verification.js +78 -0
  399. package/dist/services/storage/migrations/verification.js.map +1 -0
  400. package/dist/services/storage/migrations.d.ts +16 -0
  401. package/dist/services/storage/migrations.d.ts.map +1 -0
  402. package/dist/services/storage/migrations.js +17 -0
  403. package/dist/services/storage/migrations.js.map +1 -0
  404. package/dist/services/storage/types.d.ts +12 -0
  405. package/dist/services/storage/types.d.ts.map +1 -0
  406. package/dist/services/storage/types.js +5 -0
  407. package/dist/services/storage/types.js.map +1 -0
  408. package/dist/services/storage/vector.d.ts +208 -0
  409. package/dist/services/storage/vector.d.ts.map +1 -0
  410. package/dist/services/storage/vector.js +526 -0
  411. package/dist/services/storage/vector.js.map +1 -0
  412. package/dist/services/vlm/pipeline.d.ts +194 -0
  413. package/dist/services/vlm/pipeline.d.ts.map +1 -0
  414. package/dist/services/vlm/pipeline.js +800 -0
  415. package/dist/services/vlm/pipeline.js.map +1 -0
  416. package/dist/services/vlm/prompts.d.ts +171 -0
  417. package/dist/services/vlm/prompts.d.ts.map +1 -0
  418. package/dist/services/vlm/prompts.js +229 -0
  419. package/dist/services/vlm/prompts.js.map +1 -0
  420. package/dist/services/vlm/service.d.ts +174 -0
  421. package/dist/services/vlm/service.d.ts.map +1 -0
  422. package/dist/services/vlm/service.js +256 -0
  423. package/dist/services/vlm/service.js.map +1 -0
  424. package/dist/services/webhook-delivery.d.ts +4 -0
  425. package/dist/services/webhook-delivery.d.ts.map +1 -0
  426. package/dist/services/webhook-delivery.js +140 -0
  427. package/dist/services/webhook-delivery.js.map +1 -0
  428. package/dist/tools/chunks.d.ts +19 -0
  429. package/dist/tools/chunks.d.ts.map +1 -0
  430. package/dist/tools/chunks.js +392 -0
  431. package/dist/tools/chunks.js.map +1 -0
  432. package/dist/tools/clm.d.ts +16 -0
  433. package/dist/tools/clm.d.ts.map +1 -0
  434. package/dist/tools/clm.js +668 -0
  435. package/dist/tools/clm.js.map +1 -0
  436. package/dist/tools/clustering.d.ts +13 -0
  437. package/dist/tools/clustering.d.ts.map +1 -0
  438. package/dist/tools/clustering.js +498 -0
  439. package/dist/tools/clustering.js.map +1 -0
  440. package/dist/tools/collaboration.d.ts +15 -0
  441. package/dist/tools/collaboration.d.ts.map +1 -0
  442. package/dist/tools/collaboration.js +516 -0
  443. package/dist/tools/collaboration.js.map +1 -0
  444. package/dist/tools/comparison.d.ts +13 -0
  445. package/dist/tools/comparison.d.ts.map +1 -0
  446. package/dist/tools/comparison.js +735 -0
  447. package/dist/tools/comparison.js.map +1 -0
  448. package/dist/tools/compliance.d.ts +15 -0
  449. package/dist/tools/compliance.d.ts.map +1 -0
  450. package/dist/tools/compliance.js +640 -0
  451. package/dist/tools/compliance.js.map +1 -0
  452. package/dist/tools/config.d.ts +19 -0
  453. package/dist/tools/config.d.ts.map +1 -0
  454. package/dist/tools/config.js +213 -0
  455. package/dist/tools/config.js.map +1 -0
  456. package/dist/tools/database.d.ts +62 -0
  457. package/dist/tools/database.d.ts.map +1 -0
  458. package/dist/tools/database.js +288 -0
  459. package/dist/tools/database.js.map +1 -0
  460. package/dist/tools/documents.d.ts +61 -0
  461. package/dist/tools/documents.d.ts.map +1 -0
  462. package/dist/tools/documents.js +1624 -0
  463. package/dist/tools/documents.js.map +1 -0
  464. package/dist/tools/embeddings.d.ts +14 -0
  465. package/dist/tools/embeddings.d.ts.map +1 -0
  466. package/dist/tools/embeddings.js +626 -0
  467. package/dist/tools/embeddings.js.map +1 -0
  468. package/dist/tools/evaluation.d.ts +25 -0
  469. package/dist/tools/evaluation.d.ts.map +1 -0
  470. package/dist/tools/evaluation.js +523 -0
  471. package/dist/tools/evaluation.js.map +1 -0
  472. package/dist/tools/events.d.ts +16 -0
  473. package/dist/tools/events.d.ts.map +1 -0
  474. package/dist/tools/events.js +493 -0
  475. package/dist/tools/events.js.map +1 -0
  476. package/dist/tools/extraction-structured.d.ts +13 -0
  477. package/dist/tools/extraction-structured.d.ts.map +1 -0
  478. package/dist/tools/extraction-structured.js +390 -0
  479. package/dist/tools/extraction-structured.js.map +1 -0
  480. package/dist/tools/extraction.d.ts +24 -0
  481. package/dist/tools/extraction.d.ts.map +1 -0
  482. package/dist/tools/extraction.js +424 -0
  483. package/dist/tools/extraction.js.map +1 -0
  484. package/dist/tools/file-management.d.ts +14 -0
  485. package/dist/tools/file-management.d.ts.map +1 -0
  486. package/dist/tools/file-management.js +523 -0
  487. package/dist/tools/file-management.js.map +1 -0
  488. package/dist/tools/form-fill.d.ts +13 -0
  489. package/dist/tools/form-fill.d.ts.map +1 -0
  490. package/dist/tools/form-fill.js +250 -0
  491. package/dist/tools/form-fill.js.map +1 -0
  492. package/dist/tools/health.d.ts +19 -0
  493. package/dist/tools/health.d.ts.map +1 -0
  494. package/dist/tools/health.js +229 -0
  495. package/dist/tools/health.js.map +1 -0
  496. package/dist/tools/images.d.ts +54 -0
  497. package/dist/tools/images.d.ts.map +1 -0
  498. package/dist/tools/images.js +787 -0
  499. package/dist/tools/images.js.map +1 -0
  500. package/dist/tools/ingestion.d.ts +94 -0
  501. package/dist/tools/ingestion.d.ts.map +1 -0
  502. package/dist/tools/ingestion.js +1659 -0
  503. package/dist/tools/ingestion.js.map +1 -0
  504. package/dist/tools/intelligence.d.ts +18 -0
  505. package/dist/tools/intelligence.d.ts.map +1 -0
  506. package/dist/tools/intelligence.js +1039 -0
  507. package/dist/tools/intelligence.js.map +1 -0
  508. package/dist/tools/provenance.d.ts +51 -0
  509. package/dist/tools/provenance.d.ts.map +1 -0
  510. package/dist/tools/provenance.js +691 -0
  511. package/dist/tools/provenance.js.map +1 -0
  512. package/dist/tools/reports.d.ts +41 -0
  513. package/dist/tools/reports.d.ts.map +1 -0
  514. package/dist/tools/reports.js +1394 -0
  515. package/dist/tools/reports.js.map +1 -0
  516. package/dist/tools/search.d.ts +35 -0
  517. package/dist/tools/search.d.ts.map +1 -0
  518. package/dist/tools/search.js +2528 -0
  519. package/dist/tools/search.js.map +1 -0
  520. package/dist/tools/shared.d.ts +52 -0
  521. package/dist/tools/shared.d.ts.map +1 -0
  522. package/dist/tools/shared.js +54 -0
  523. package/dist/tools/shared.js.map +1 -0
  524. package/dist/tools/tags.d.ts +15 -0
  525. package/dist/tools/tags.d.ts.map +1 -0
  526. package/dist/tools/tags.js +287 -0
  527. package/dist/tools/tags.js.map +1 -0
  528. package/dist/tools/timeline.d.ts +15 -0
  529. package/dist/tools/timeline.d.ts.map +1 -0
  530. package/dist/tools/timeline.js +14 -0
  531. package/dist/tools/timeline.js.map +1 -0
  532. package/dist/tools/users.d.ts +14 -0
  533. package/dist/tools/users.d.ts.map +1 -0
  534. package/dist/tools/users.js +257 -0
  535. package/dist/tools/users.js.map +1 -0
  536. package/dist/tools/vlm.d.ts +40 -0
  537. package/dist/tools/vlm.d.ts.map +1 -0
  538. package/dist/tools/vlm.js +475 -0
  539. package/dist/tools/vlm.js.map +1 -0
  540. package/dist/tools/workflow.d.ts +16 -0
  541. package/dist/tools/workflow.d.ts.map +1 -0
  542. package/dist/tools/workflow.js +495 -0
  543. package/dist/tools/workflow.js.map +1 -0
  544. package/dist/utils/backoff.d.ts +53 -0
  545. package/dist/utils/backoff.d.ts.map +1 -0
  546. package/dist/utils/backoff.js +78 -0
  547. package/dist/utils/backoff.js.map +1 -0
  548. package/dist/utils/config-persistence.d.ts +33 -0
  549. package/dist/utils/config-persistence.d.ts.map +1 -0
  550. package/dist/utils/config-persistence.js +61 -0
  551. package/dist/utils/config-persistence.js.map +1 -0
  552. package/dist/utils/hash.d.ts +65 -0
  553. package/dist/utils/hash.d.ts.map +1 -0
  554. package/dist/utils/hash.js +146 -0
  555. package/dist/utils/hash.js.map +1 -0
  556. package/dist/utils/math.d.ts +21 -0
  557. package/dist/utils/math.d.ts.map +1 -0
  558. package/dist/utils/math.js +39 -0
  559. package/dist/utils/math.js.map +1 -0
  560. package/dist/utils/validation.d.ts +697 -0
  561. package/dist/utils/validation.d.ts.map +1 -0
  562. package/dist/utils/validation.js +529 -0
  563. package/dist/utils/validation.js.map +1 -0
  564. package/package.json +96 -0
  565. package/python/.gitkeep +0 -0
  566. package/python/__init__.py +104 -0
  567. package/python/clustering_worker.py +440 -0
  568. package/python/docx_image_extractor.py +524 -0
  569. package/python/embedding_worker.py +552 -0
  570. package/python/file_manager_worker.py +564 -0
  571. package/python/form_fill_worker.py +399 -0
  572. package/python/gpu_utils.py +582 -0
  573. package/python/image_extractor.py +317 -0
  574. package/python/image_optimizer.py +444 -0
  575. package/python/ocr_worker.py +712 -0
  576. package/python/pyproject.toml +76 -0
  577. package/python/requirements.txt +51 -0
  578. package/python/reranker_worker.py +87 -0
@@ -0,0 +1,582 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ GPU Utilities for OCR Provenance MCP System
4
+
5
+ Provides GPU verification, VRAM monitoring, and cross-platform device detection.
6
+ Supports CUDA (Linux/Windows), MPS (macOS Apple Silicon), and CPU fallback.
7
+ Auto-detects best available device at runtime.
8
+ """
9
+
10
+ import argparse
11
+ import json
12
+ import logging
13
+ import sys
14
+ from pathlib import Path
15
+ from typing import TypedDict
16
+
17
+ # Configure logging with detailed format for debugging
18
+ logging.basicConfig(
19
+ level=logging.INFO,
20
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
21
+ stream=sys.stderr,
22
+ )
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ # =============================================================================
27
+ # Error Classes (CS-ERR-001 compliant)
28
+ # =============================================================================
29
+
30
+
31
+ class GPUError(Exception):
32
+ """Base class for GPU-related errors."""
33
+
34
+ pass
35
+
36
+
37
+ class GPUNotAvailableError(GPUError):
38
+ """Raised when no CUDA GPU is available."""
39
+
40
+ def __init__(
41
+ self,
42
+ message: str = "CUDA GPU not available. Try device='auto' for MPS (Apple Silicon) or CPU fallback.",
43
+ ):
44
+ self.message = message
45
+ super().__init__(self.message)
46
+
47
+
48
+ class GPUOutOfMemoryError(GPUError):
49
+ """Raised when GPU runs out of memory."""
50
+
51
+ def __init__(
52
+ self, message: str, vram_required: float | None = None, vram_available: float | None = None
53
+ ):
54
+ self.message = message
55
+ self.vram_required = vram_required
56
+ self.vram_available = vram_available
57
+ super().__init__(self.message)
58
+
59
+
60
+ class EmbeddingModelError(Exception):
61
+ """Raised when embedding model fails to load or run."""
62
+
63
+ def __init__(self, message: str, model_path: str | None = None, cause: Exception | None = None):
64
+ self.message = message
65
+ self.model_path = model_path
66
+ self.cause = cause
67
+ super().__init__(self.message)
68
+
69
+
70
+ # =============================================================================
71
+ # Type Definitions
72
+ # =============================================================================
73
+
74
+
75
+ class GPUInfo(TypedDict):
76
+ """GPU information structure."""
77
+
78
+ available: bool
79
+ name: str
80
+ vram_gb: float
81
+ vram_used_gb: float
82
+ vram_free_gb: float
83
+ cuda_version: str
84
+ compute_capability: str
85
+ driver_version: str
86
+
87
+
88
+ class VRAMUsage(TypedDict):
89
+ """VRAM usage statistics."""
90
+
91
+ allocated_gb: float
92
+ reserved_gb: float
93
+ free_gb: float
94
+ total_gb: float
95
+
96
+
97
+ class ModelInfo(TypedDict):
98
+ """Embedding model information."""
99
+
100
+ success: bool
101
+ model_path: str
102
+ error: str | None
103
+ model_info: dict
104
+
105
+
106
+ # =============================================================================
107
+ # GPU Verification Functions
108
+ # =============================================================================
109
+
110
+
111
+ def detect_best_device() -> str:
112
+ """
113
+ Detect the best available compute device: CUDA > MPS > CPU.
114
+
115
+ Returns:
116
+ Device string ('cuda:0', 'mps', or 'cpu')
117
+ """
118
+ try:
119
+ import torch
120
+ except ImportError:
121
+ return "cpu"
122
+
123
+ if torch.cuda.is_available():
124
+ return "cuda:0"
125
+ if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
126
+ return "mps"
127
+ return "cpu"
128
+
129
+
130
+ def verify_gpu() -> GPUInfo:
131
+ """
132
+ Verify GPU availability and return detailed information.
133
+
134
+ Returns GPUInfo with available=False (instead of raising) when no CUDA GPU
135
+ is found, allowing callers to check and handle gracefully.
136
+
137
+ Returns:
138
+ GPUInfo dict with GPU specifications
139
+
140
+ Raises:
141
+ ImportError: If PyTorch is not installed
142
+ """
143
+ logger.info("Starting GPU verification...")
144
+
145
+ try:
146
+ import torch
147
+ except ImportError as e:
148
+ logger.error("PyTorch not installed: %s", e)
149
+ raise ImportError("PyTorch is not installed. Install with: pip install torch") from e
150
+
151
+ if not torch.cuda.is_available():
152
+ best = detect_best_device()
153
+ logger.warning(
154
+ "CUDA is not available. Best available device: %s. "
155
+ "Set EMBEDDING_DEVICE=auto to use it automatically.",
156
+ best,
157
+ )
158
+ return GPUInfo(
159
+ available=False,
160
+ name=f"No CUDA GPU (best device: {best})",
161
+ vram_gb=0,
162
+ vram_used_gb=0,
163
+ vram_free_gb=0,
164
+ cuda_version="N/A",
165
+ compute_capability="N/A",
166
+ driver_version="N/A",
167
+ )
168
+
169
+ device = torch.cuda.current_device()
170
+ props = torch.cuda.get_device_properties(device)
171
+
172
+ total_memory = props.total_memory / (1024**3) # Convert to GB
173
+ allocated = torch.cuda.memory_allocated(device) / (1024**3)
174
+ free = total_memory - allocated
175
+
176
+ compute_cap = f"{props.major}.{props.minor}"
177
+
178
+ # Check minimum VRAM requirement (8GB recommended)
179
+ if total_memory < 8.0:
180
+ logger.warning(
181
+ "GPU VRAM (%.2f GB) below recommended minimum (8 GB). Performance may be degraded.",
182
+ total_memory,
183
+ )
184
+
185
+ gpu_info = GPUInfo(
186
+ available=True,
187
+ name=props.name,
188
+ vram_gb=round(total_memory, 2),
189
+ vram_used_gb=round(allocated, 2),
190
+ vram_free_gb=round(free, 2),
191
+ cuda_version=torch.version.cuda or "unknown",
192
+ compute_capability=compute_cap,
193
+ driver_version=str(torch.cuda.get_device_capability(device)),
194
+ )
195
+
196
+ logger.info(
197
+ "GPU verified: %s, VRAM: %.2f GB, CUDA: %s, Compute: %s",
198
+ gpu_info["name"],
199
+ gpu_info["vram_gb"],
200
+ gpu_info["cuda_version"],
201
+ gpu_info["compute_capability"],
202
+ )
203
+
204
+ return gpu_info
205
+
206
+
207
+ def get_vram_usage() -> VRAMUsage:
208
+ """
209
+ Get current VRAM usage statistics.
210
+
211
+ Returns:
212
+ VRAMUsage dict with allocated, reserved, and free memory in GB
213
+
214
+ Raises:
215
+ GPUNotAvailableError: If CUDA is not available
216
+ ImportError: If PyTorch is not installed
217
+ """
218
+ logger.debug("Querying VRAM usage...")
219
+
220
+ try:
221
+ import torch
222
+ except ImportError as e:
223
+ logger.error("PyTorch not installed: %s", e)
224
+ raise ImportError("PyTorch not installed") from e
225
+
226
+ if not torch.cuda.is_available():
227
+ logger.error("CUDA not available for VRAM query")
228
+ raise GPUNotAvailableError("CUDA not available for VRAM usage query")
229
+
230
+ device = torch.cuda.current_device()
231
+
232
+ allocated = torch.cuda.memory_allocated(device) / (1024**3)
233
+ reserved = torch.cuda.memory_reserved(device) / (1024**3)
234
+ total = torch.cuda.get_device_properties(device).total_memory / (1024**3)
235
+
236
+ usage = VRAMUsage(
237
+ allocated_gb=round(allocated, 3),
238
+ reserved_gb=round(reserved, 3),
239
+ free_gb=round(total - reserved, 3),
240
+ total_gb=round(total, 2),
241
+ )
242
+
243
+ logger.debug(
244
+ "VRAM: allocated=%.3f GB, reserved=%.3f GB, free=%.3f GB, total=%.2f GB",
245
+ usage["allocated_gb"],
246
+ usage["reserved_gb"],
247
+ usage["free_gb"],
248
+ usage["total_gb"],
249
+ )
250
+
251
+ return usage
252
+
253
+
254
+ def verify_model_loading(model_path: str = "./models/nomic-embed-text-v1.5") -> ModelInfo:
255
+ """
256
+ Verify the embedding model can be loaded on GPU.
257
+
258
+ Args:
259
+ model_path: Path to the nomic-embed-text-v1.5 model
260
+
261
+ Returns:
262
+ ModelInfo dict with loading status and model info
263
+
264
+ Raises:
265
+ EmbeddingModelError: If model fails to load
266
+ GPUNotAvailableError: If GPU is not available
267
+ """
268
+ logger.info("Verifying model loading from: %s", model_path)
269
+
270
+ model_dir = Path(model_path)
271
+ if not model_dir.exists():
272
+ error_msg = f"Model directory not found: {model_path}"
273
+ logger.error(error_msg)
274
+ raise EmbeddingModelError(error_msg, model_path=model_path)
275
+
276
+ # Check for required model files
277
+ required_files = ["config.json", "model.safetensors", "tokenizer.json"]
278
+ missing_files = [f for f in required_files if not (model_dir / f).exists()]
279
+ if missing_files:
280
+ error_msg = f"Missing required model files: {missing_files}"
281
+ logger.error(error_msg)
282
+ raise EmbeddingModelError(error_msg, model_path=model_path)
283
+
284
+ try:
285
+ import torch
286
+ from sentence_transformers import SentenceTransformer
287
+
288
+ if not torch.cuda.is_available():
289
+ raise GPUNotAvailableError(
290
+ "GPU required for model loading. No CPU fallback allowed per CP-004."
291
+ )
292
+
293
+ device = "cuda:0"
294
+ logger.info("Loading model to device: %s", device)
295
+
296
+ # Load model to GPU
297
+ model = SentenceTransformer(model_path, device=device, trust_remote_code=True)
298
+
299
+ # Get model info
300
+ embedding_dim = model.get_sentence_embedding_dimension()
301
+ max_seq_len = model.max_seq_length
302
+
303
+ model_info = ModelInfo(
304
+ success=True,
305
+ model_path=model_path,
306
+ error=None,
307
+ model_info={
308
+ "device": device,
309
+ "embedding_dimension": embedding_dim,
310
+ "max_seq_length": max_seq_len,
311
+ },
312
+ )
313
+
314
+ logger.info(
315
+ "Model loaded successfully: dim=%d, max_seq=%d, device=%s",
316
+ embedding_dim,
317
+ max_seq_len,
318
+ device,
319
+ )
320
+
321
+ # Cleanup
322
+ del model
323
+ torch.cuda.empty_cache()
324
+ logger.debug("Model unloaded and GPU cache cleared")
325
+
326
+ return model_info
327
+
328
+ except GPUNotAvailableError:
329
+ raise
330
+ except Exception as e:
331
+ logger.error("Model loading failed: %s", e)
332
+ raise EmbeddingModelError(
333
+ f"Failed to load embedding model: {e}", model_path=model_path, cause=e
334
+ ) from e
335
+
336
+
337
+ def clear_gpu_memory() -> None:
338
+ """
339
+ Clear GPU memory cache.
340
+
341
+ Raises:
342
+ GPUNotAvailableError: If CUDA is not available
343
+ """
344
+ logger.debug("Clearing GPU memory...")
345
+
346
+ try:
347
+ import torch
348
+
349
+ if not torch.cuda.is_available():
350
+ raise GPUNotAvailableError("Cannot clear GPU memory: CUDA not available")
351
+
352
+ torch.cuda.empty_cache()
353
+ torch.cuda.synchronize()
354
+ logger.info("GPU memory cleared successfully")
355
+ except GPUNotAvailableError:
356
+ raise
357
+ except Exception as e:
358
+ logger.error("Failed to clear GPU memory: %s", e)
359
+ raise GPUError(f"Failed to clear GPU memory: {e}") from e
360
+
361
+
362
+ def test_embedding_generation(model_path: str = "./models/nomic-embed-text-v1.5") -> dict:
363
+ """
364
+ Test that embedding generation works end-to-end on GPU.
365
+
366
+ Args:
367
+ model_path: Path to the embedding model
368
+
369
+ Returns:
370
+ dict with test results including embedding shape and timing
371
+
372
+ Raises:
373
+ EmbeddingModelError: If embedding generation fails
374
+ GPUNotAvailableError: If GPU is not available
375
+ """
376
+ logger.info("Testing embedding generation...")
377
+
378
+ try:
379
+ import time
380
+
381
+ import torch
382
+ from sentence_transformers import SentenceTransformer
383
+
384
+ if not torch.cuda.is_available():
385
+ raise GPUNotAvailableError("GPU required for embedding generation")
386
+
387
+ device = "cuda:0"
388
+ model = SentenceTransformer(model_path, device=device, trust_remote_code=True)
389
+
390
+ # Test with sample text
391
+ test_texts = [
392
+ "This is a test document for OCR provenance verification.",
393
+ "The system must maintain complete data lineage.",
394
+ "Every embedding must trace back to its source file.",
395
+ ]
396
+
397
+ # Time the embedding generation
398
+ start_time = time.perf_counter()
399
+ embeddings = model.encode(test_texts, device=device, convert_to_numpy=True)
400
+ end_time = time.perf_counter()
401
+
402
+ elapsed_ms = (end_time - start_time) * 1000
403
+
404
+ result = {
405
+ "success": True,
406
+ "embedding_shape": list(embeddings.shape),
407
+ "embedding_dimension": embeddings.shape[1],
408
+ "num_texts": len(test_texts),
409
+ "elapsed_ms": round(elapsed_ms, 2),
410
+ "ms_per_text": round(elapsed_ms / len(test_texts), 2),
411
+ "device": device,
412
+ }
413
+
414
+ logger.info(
415
+ "Embedding test passed: shape=%s, time=%.2fms, device=%s",
416
+ embeddings.shape,
417
+ elapsed_ms,
418
+ device,
419
+ )
420
+
421
+ # Cleanup
422
+ del model
423
+ del embeddings
424
+ torch.cuda.empty_cache()
425
+
426
+ return result
427
+
428
+ except (GPUNotAvailableError, EmbeddingModelError):
429
+ raise
430
+ except Exception as e:
431
+ logger.error("Embedding generation test failed: %s", e)
432
+ raise EmbeddingModelError(
433
+ f"Embedding generation test failed: {e}", model_path=model_path, cause=e
434
+ ) from e
435
+
436
+
437
+ # =============================================================================
438
+ # CLI Entry Point
439
+ # =============================================================================
440
+
441
+
442
+ def main() -> None:
443
+ """CLI entry point for GPU verification."""
444
+ parser = argparse.ArgumentParser(
445
+ description="GPU Utilities for OCR Provenance MCP System",
446
+ formatter_class=argparse.RawDescriptionHelpFormatter,
447
+ epilog="""
448
+ Examples:
449
+ python gpu_utils.py --verify # Verify GPU availability
450
+ python gpu_utils.py --verify --json # Output as JSON
451
+ python gpu_utils.py --vram # Show VRAM usage
452
+ python gpu_utils.py --verify-model # Verify model loading
453
+ python gpu_utils.py --test-embedding # Test embedding generation
454
+ """,
455
+ )
456
+ parser.add_argument(
457
+ "--verify", action="store_true", help="Verify GPU availability and capabilities"
458
+ )
459
+ parser.add_argument("--vram", action="store_true", help="Show current VRAM usage")
460
+ parser.add_argument(
461
+ "--model",
462
+ type=str,
463
+ default="./models/nomic-embed-text-v1.5",
464
+ help="Path to embedding model (default: ./models/nomic-embed-text-v1.5)",
465
+ )
466
+ parser.add_argument(
467
+ "--verify-model", action="store_true", help="Verify model can be loaded on GPU"
468
+ )
469
+ parser.add_argument(
470
+ "--test-embedding", action="store_true", help="Test embedding generation end-to-end"
471
+ )
472
+ parser.add_argument("--json", action="store_true", help="Output results as JSON")
473
+ parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
474
+
475
+ args = parser.parse_args()
476
+
477
+ if args.verbose:
478
+ logging.getLogger().setLevel(logging.DEBUG)
479
+
480
+ # Default to --verify if no arguments
481
+ if not any([args.verify, args.vram, args.verify_model, args.test_embedding]):
482
+ args.verify = True
483
+
484
+ results: dict = {}
485
+ exit_code = 0
486
+
487
+ try:
488
+ if args.verify:
489
+ gpu_info = verify_gpu()
490
+ results["gpu"] = dict(gpu_info)
491
+
492
+ if not args.json:
493
+ print("=" * 60)
494
+ print("GPU Verification Results")
495
+ print("=" * 60)
496
+ print(" GPU Available: Yes")
497
+ print(f" GPU Name: {gpu_info['name']}")
498
+ print(f" VRAM Total: {gpu_info['vram_gb']} GB")
499
+ print(f" VRAM Free: {gpu_info['vram_free_gb']} GB")
500
+ print(f" CUDA Version: {gpu_info['cuda_version']}")
501
+ print(f" Compute Capability: {gpu_info['compute_capability']}")
502
+ print("=" * 60)
503
+
504
+ if args.vram:
505
+ vram_info = get_vram_usage()
506
+ results["vram"] = dict(vram_info)
507
+
508
+ if not args.json:
509
+ print("\nVRAM Usage:")
510
+ print(f" Allocated: {vram_info['allocated_gb']} GB")
511
+ print(f" Reserved: {vram_info['reserved_gb']} GB")
512
+ print(f" Free: {vram_info['free_gb']} GB")
513
+ print(f" Total: {vram_info['total_gb']} GB")
514
+
515
+ if args.verify_model:
516
+ model_result = verify_model_loading(args.model)
517
+ results["model"] = dict(model_result)
518
+
519
+ if not args.json:
520
+ print("\nModel Verification:")
521
+ print(" Status: SUCCESS")
522
+ print(f" Path: {model_result['model_path']}")
523
+ print(f" Device: {model_result['model_info'].get('device', 'N/A')}")
524
+ print(
525
+ f" Dimension: {model_result['model_info'].get('embedding_dimension', 'N/A')}"
526
+ )
527
+ print(f" Max Seq: {model_result['model_info'].get('max_seq_length', 'N/A')}")
528
+
529
+ if args.test_embedding:
530
+ embed_result = test_embedding_generation(args.model)
531
+ results["embedding_test"] = embed_result
532
+
533
+ if not args.json:
534
+ print("\nEmbedding Generation Test:")
535
+ print(" Status: SUCCESS")
536
+ print(f" Shape: {embed_result['embedding_shape']}")
537
+ print(f" Dimension: {embed_result['embedding_dimension']}")
538
+ print(f" Time: {embed_result['elapsed_ms']} ms")
539
+ print(f" Per Text: {embed_result['ms_per_text']} ms")
540
+ print(f" Device: {embed_result['device']}")
541
+
542
+ except GPUNotAvailableError as e:
543
+ logger.error("GPU Error: %s", e)
544
+ results["error"] = {"type": "GPU_NOT_AVAILABLE", "message": str(e)}
545
+ if not args.json:
546
+ print(f"\nERROR: {e}")
547
+ print("This system requires a CUDA-capable GPU. No fallback is allowed.")
548
+ exit_code = 1
549
+
550
+ except EmbeddingModelError as e:
551
+ logger.error("Model Error: %s", e)
552
+ results["error"] = {
553
+ "type": "EMBEDDING_MODEL_ERROR",
554
+ "message": str(e),
555
+ "model_path": e.model_path,
556
+ }
557
+ if not args.json:
558
+ print(f"\nERROR: {e}")
559
+ exit_code = 1
560
+
561
+ except ImportError as e:
562
+ logger.error("Import Error: %s", e)
563
+ results["error"] = {"type": "IMPORT_ERROR", "message": str(e)}
564
+ if not args.json:
565
+ print(f"\nERROR: {e}")
566
+ exit_code = 1
567
+
568
+ except Exception as e:
569
+ logger.exception("Unexpected error: %s", e)
570
+ results["error"] = {"type": "UNEXPECTED_ERROR", "message": str(e)}
571
+ if not args.json:
572
+ print(f"\nUNEXPECTED ERROR: {e}")
573
+ exit_code = 1
574
+
575
+ if args.json:
576
+ print(json.dumps(results, indent=2))
577
+
578
+ sys.exit(exit_code)
579
+
580
+
581
+ if __name__ == "__main__":
582
+ main()