ocr-provenance-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocr-provenance-mcp might be problematic. Click here for more details.

Files changed (578) hide show
  1. package/.env.example +55 -0
  2. package/LICENSE +78 -0
  3. package/README.md +1154 -0
  4. package/dist/bin-http.d.ts +24 -0
  5. package/dist/bin-http.d.ts.map +1 -0
  6. package/dist/bin-http.js +275 -0
  7. package/dist/bin-http.js.map +1 -0
  8. package/dist/bin-setup.d.ts +11 -0
  9. package/dist/bin-setup.d.ts.map +1 -0
  10. package/dist/bin-setup.js +610 -0
  11. package/dist/bin-setup.js.map +1 -0
  12. package/dist/bin.d.ts +16 -0
  13. package/dist/bin.d.ts.map +1 -0
  14. package/dist/bin.js +16 -0
  15. package/dist/bin.js.map +1 -0
  16. package/dist/index.d.ts +13 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +90 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/models/chunk.d.ts +136 -0
  21. package/dist/models/chunk.d.ts.map +1 -0
  22. package/dist/models/chunk.js +27 -0
  23. package/dist/models/chunk.js.map +1 -0
  24. package/dist/models/cluster.d.ts +79 -0
  25. package/dist/models/cluster.d.ts.map +1 -0
  26. package/dist/models/cluster.js +10 -0
  27. package/dist/models/cluster.js.map +1 -0
  28. package/dist/models/comparison.d.ts +62 -0
  29. package/dist/models/comparison.d.ts.map +1 -0
  30. package/dist/models/comparison.js +8 -0
  31. package/dist/models/comparison.js.map +1 -0
  32. package/dist/models/document.d.ts +104 -0
  33. package/dist/models/document.d.ts.map +1 -0
  34. package/dist/models/document.js +15 -0
  35. package/dist/models/document.js.map +1 -0
  36. package/dist/models/embedding.d.ts +87 -0
  37. package/dist/models/embedding.d.ts.map +1 -0
  38. package/dist/models/embedding.js +23 -0
  39. package/dist/models/embedding.js.map +1 -0
  40. package/dist/models/extraction.d.ts +15 -0
  41. package/dist/models/extraction.d.ts.map +1 -0
  42. package/dist/models/extraction.js +2 -0
  43. package/dist/models/extraction.js.map +1 -0
  44. package/dist/models/form-fill.d.ts +23 -0
  45. package/dist/models/form-fill.d.ts.map +1 -0
  46. package/dist/models/form-fill.js +2 -0
  47. package/dist/models/form-fill.js.map +1 -0
  48. package/dist/models/image.d.ts +177 -0
  49. package/dist/models/image.d.ts.map +1 -0
  50. package/dist/models/image.js +8 -0
  51. package/dist/models/image.js.map +1 -0
  52. package/dist/models/index.d.ts +14 -0
  53. package/dist/models/index.d.ts.map +1 -0
  54. package/dist/models/index.js +22 -0
  55. package/dist/models/index.js.map +1 -0
  56. package/dist/models/provenance.d.ts +174 -0
  57. package/dist/models/provenance.d.ts.map +1 -0
  58. package/dist/models/provenance.js +53 -0
  59. package/dist/models/provenance.js.map +1 -0
  60. package/dist/models/uploaded-file.d.ts +20 -0
  61. package/dist/models/uploaded-file.d.ts.map +1 -0
  62. package/dist/models/uploaded-file.js +2 -0
  63. package/dist/models/uploaded-file.js.map +1 -0
  64. package/dist/server/errors.d.ts +93 -0
  65. package/dist/server/errors.d.ts.map +1 -0
  66. package/dist/server/errors.js +256 -0
  67. package/dist/server/errors.js.map +1 -0
  68. package/dist/server/events.d.ts +36 -0
  69. package/dist/server/events.d.ts.map +1 -0
  70. package/dist/server/events.js +48 -0
  71. package/dist/server/events.js.map +1 -0
  72. package/dist/server/permissions.d.ts +26 -0
  73. package/dist/server/permissions.d.ts.map +1 -0
  74. package/dist/server/permissions.js +194 -0
  75. package/dist/server/permissions.js.map +1 -0
  76. package/dist/server/register-tools.d.ts +25 -0
  77. package/dist/server/register-tools.d.ts.map +1 -0
  78. package/dist/server/register-tools.js +102 -0
  79. package/dist/server/register-tools.js.map +1 -0
  80. package/dist/server/startup.d.ts +16 -0
  81. package/dist/server/startup.d.ts.map +1 -0
  82. package/dist/server/startup.js +37 -0
  83. package/dist/server/startup.js.map +1 -0
  84. package/dist/server/state.d.ts +166 -0
  85. package/dist/server/state.d.ts.map +1 -0
  86. package/dist/server/state.js +424 -0
  87. package/dist/server/state.js.map +1 -0
  88. package/dist/server/transports/http-transport.d.ts +37 -0
  89. package/dist/server/transports/http-transport.d.ts.map +1 -0
  90. package/dist/server/transports/http-transport.js +204 -0
  91. package/dist/server/transports/http-transport.js.map +1 -0
  92. package/dist/server/transports/index.d.ts +9 -0
  93. package/dist/server/transports/index.d.ts.map +1 -0
  94. package/dist/server/transports/index.js +9 -0
  95. package/dist/server/transports/index.js.map +1 -0
  96. package/dist/server/transports/session-manager.d.ts +40 -0
  97. package/dist/server/transports/session-manager.d.ts.map +1 -0
  98. package/dist/server/transports/session-manager.js +74 -0
  99. package/dist/server/transports/session-manager.js.map +1 -0
  100. package/dist/server/types.d.ts +82 -0
  101. package/dist/server/types.d.ts.map +1 -0
  102. package/dist/server/types.js +14 -0
  103. package/dist/server/types.js.map +1 -0
  104. package/dist/services/audit.d.ts +26 -0
  105. package/dist/services/audit.d.ts.map +1 -0
  106. package/dist/services/audit.js +43 -0
  107. package/dist/services/audit.js.map +1 -0
  108. package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
  109. package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
  110. package/dist/services/chunking/chunk-deduplicator.js +46 -0
  111. package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
  112. package/dist/services/chunking/chunk-merger.d.ts +26 -0
  113. package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
  114. package/dist/services/chunking/chunk-merger.js +94 -0
  115. package/dist/services/chunking/chunk-merger.js.map +1 -0
  116. package/dist/services/chunking/chunker.d.ts +62 -0
  117. package/dist/services/chunking/chunker.d.ts.map +1 -0
  118. package/dist/services/chunking/chunker.js +566 -0
  119. package/dist/services/chunking/chunker.js.map +1 -0
  120. package/dist/services/chunking/heading-normalizer.d.ts +33 -0
  121. package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
  122. package/dist/services/chunking/heading-normalizer.js +101 -0
  123. package/dist/services/chunking/heading-normalizer.js.map +1 -0
  124. package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
  125. package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
  126. package/dist/services/chunking/json-block-analyzer.js +1033 -0
  127. package/dist/services/chunking/json-block-analyzer.js.map +1 -0
  128. package/dist/services/chunking/markdown-parser.d.ts +75 -0
  129. package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
  130. package/dist/services/chunking/markdown-parser.js +428 -0
  131. package/dist/services/chunking/markdown-parser.js.map +1 -0
  132. package/dist/services/chunking/text-normalizer.d.ts +20 -0
  133. package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
  134. package/dist/services/chunking/text-normalizer.js +36 -0
  135. package/dist/services/chunking/text-normalizer.js.map +1 -0
  136. package/dist/services/clm/contract-schemas.d.ts +36 -0
  137. package/dist/services/clm/contract-schemas.d.ts.map +1 -0
  138. package/dist/services/clm/contract-schemas.js +92 -0
  139. package/dist/services/clm/contract-schemas.js.map +1 -0
  140. package/dist/services/clm/summarization.d.ts +46 -0
  141. package/dist/services/clm/summarization.d.ts.map +1 -0
  142. package/dist/services/clm/summarization.js +61 -0
  143. package/dist/services/clm/summarization.js.map +1 -0
  144. package/dist/services/clustering/clustering-service.d.ts +58 -0
  145. package/dist/services/clustering/clustering-service.d.ts.map +1 -0
  146. package/dist/services/clustering/clustering-service.js +467 -0
  147. package/dist/services/clustering/clustering-service.js.map +1 -0
  148. package/dist/services/comparison/diff-service.d.ts +41 -0
  149. package/dist/services/comparison/diff-service.d.ts.map +1 -0
  150. package/dist/services/comparison/diff-service.js +120 -0
  151. package/dist/services/comparison/diff-service.js.map +1 -0
  152. package/dist/services/embedding/embedder.d.ts +55 -0
  153. package/dist/services/embedding/embedder.d.ts.map +1 -0
  154. package/dist/services/embedding/embedder.js +202 -0
  155. package/dist/services/embedding/embedder.js.map +1 -0
  156. package/dist/services/embedding/nomic.d.ts +67 -0
  157. package/dist/services/embedding/nomic.d.ts.map +1 -0
  158. package/dist/services/embedding/nomic.js +280 -0
  159. package/dist/services/embedding/nomic.js.map +1 -0
  160. package/dist/services/gemini/circuit-breaker.d.ts +106 -0
  161. package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
  162. package/dist/services/gemini/circuit-breaker.js +237 -0
  163. package/dist/services/gemini/circuit-breaker.js.map +1 -0
  164. package/dist/services/gemini/client.d.ts +173 -0
  165. package/dist/services/gemini/client.d.ts.map +1 -0
  166. package/dist/services/gemini/client.js +483 -0
  167. package/dist/services/gemini/client.js.map +1 -0
  168. package/dist/services/gemini/config.d.ts +116 -0
  169. package/dist/services/gemini/config.d.ts.map +1 -0
  170. package/dist/services/gemini/config.js +118 -0
  171. package/dist/services/gemini/config.js.map +1 -0
  172. package/dist/services/gemini/index.d.ts +9 -0
  173. package/dist/services/gemini/index.d.ts.map +1 -0
  174. package/dist/services/gemini/index.js +13 -0
  175. package/dist/services/gemini/index.js.map +1 -0
  176. package/dist/services/gemini/rate-limiter.d.ts +62 -0
  177. package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
  178. package/dist/services/gemini/rate-limiter.js +120 -0
  179. package/dist/services/gemini/rate-limiter.js.map +1 -0
  180. package/dist/services/images/extractor.d.ts +88 -0
  181. package/dist/services/images/extractor.d.ts.map +1 -0
  182. package/dist/services/images/extractor.js +340 -0
  183. package/dist/services/images/extractor.js.map +1 -0
  184. package/dist/services/images/optimizer.d.ts +130 -0
  185. package/dist/services/images/optimizer.d.ts.map +1 -0
  186. package/dist/services/images/optimizer.js +228 -0
  187. package/dist/services/images/optimizer.js.map +1 -0
  188. package/dist/services/ocr/datalab.d.ts +64 -0
  189. package/dist/services/ocr/datalab.d.ts.map +1 -0
  190. package/dist/services/ocr/datalab.js +425 -0
  191. package/dist/services/ocr/datalab.js.map +1 -0
  192. package/dist/services/ocr/errors.d.ts +38 -0
  193. package/dist/services/ocr/errors.d.ts.map +1 -0
  194. package/dist/services/ocr/errors.js +83 -0
  195. package/dist/services/ocr/errors.js.map +1 -0
  196. package/dist/services/ocr/file-manager.d.ts +76 -0
  197. package/dist/services/ocr/file-manager.d.ts.map +1 -0
  198. package/dist/services/ocr/file-manager.js +238 -0
  199. package/dist/services/ocr/file-manager.js.map +1 -0
  200. package/dist/services/ocr/form-fill.d.ts +48 -0
  201. package/dist/services/ocr/form-fill.d.ts.map +1 -0
  202. package/dist/services/ocr/form-fill.js +213 -0
  203. package/dist/services/ocr/form-fill.js.map +1 -0
  204. package/dist/services/ocr/processor.d.ts +95 -0
  205. package/dist/services/ocr/processor.d.ts.map +1 -0
  206. package/dist/services/ocr/processor.js +259 -0
  207. package/dist/services/ocr/processor.js.map +1 -0
  208. package/dist/services/provenance/agent-metadata.d.ts +82 -0
  209. package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
  210. package/dist/services/provenance/agent-metadata.js +106 -0
  211. package/dist/services/provenance/agent-metadata.js.map +1 -0
  212. package/dist/services/provenance/chain-hash.d.ts +57 -0
  213. package/dist/services/provenance/chain-hash.d.ts.map +1 -0
  214. package/dist/services/provenance/chain-hash.js +131 -0
  215. package/dist/services/provenance/chain-hash.js.map +1 -0
  216. package/dist/services/provenance/exporter.d.ts +202 -0
  217. package/dist/services/provenance/exporter.d.ts.map +1 -0
  218. package/dist/services/provenance/exporter.js +457 -0
  219. package/dist/services/provenance/exporter.js.map +1 -0
  220. package/dist/services/provenance/index.d.ts +15 -0
  221. package/dist/services/provenance/index.d.ts.map +1 -0
  222. package/dist/services/provenance/index.js +17 -0
  223. package/dist/services/provenance/index.js.map +1 -0
  224. package/dist/services/provenance/tracker.d.ts +138 -0
  225. package/dist/services/provenance/tracker.d.ts.map +1 -0
  226. package/dist/services/provenance/tracker.js +293 -0
  227. package/dist/services/provenance/tracker.js.map +1 -0
  228. package/dist/services/provenance/verifier.d.ts +153 -0
  229. package/dist/services/provenance/verifier.d.ts.map +1 -0
  230. package/dist/services/provenance/verifier.js +536 -0
  231. package/dist/services/provenance/verifier.js.map +1 -0
  232. package/dist/services/python-pool.d.ts +70 -0
  233. package/dist/services/python-pool.d.ts.map +1 -0
  234. package/dist/services/python-pool.js +265 -0
  235. package/dist/services/python-pool.js.map +1 -0
  236. package/dist/services/search/bm25.d.ts +180 -0
  237. package/dist/services/search/bm25.d.ts.map +1 -0
  238. package/dist/services/search/bm25.js +656 -0
  239. package/dist/services/search/bm25.js.map +1 -0
  240. package/dist/services/search/fusion.d.ts +103 -0
  241. package/dist/services/search/fusion.d.ts.map +1 -0
  242. package/dist/services/search/fusion.js +122 -0
  243. package/dist/services/search/fusion.js.map +1 -0
  244. package/dist/services/search/local-reranker.d.ts +30 -0
  245. package/dist/services/search/local-reranker.d.ts.map +1 -0
  246. package/dist/services/search/local-reranker.js +123 -0
  247. package/dist/services/search/local-reranker.js.map +1 -0
  248. package/dist/services/search/quality.d.ts +11 -0
  249. package/dist/services/search/quality.d.ts.map +1 -0
  250. package/dist/services/search/quality.js +17 -0
  251. package/dist/services/search/quality.js.map +1 -0
  252. package/dist/services/search/query-classifier.d.ts +34 -0
  253. package/dist/services/search/query-classifier.d.ts.map +1 -0
  254. package/dist/services/search/query-classifier.js +114 -0
  255. package/dist/services/search/query-classifier.js.map +1 -0
  256. package/dist/services/search/query-expander.d.ts +73 -0
  257. package/dist/services/search/query-expander.d.ts.map +1 -0
  258. package/dist/services/search/query-expander.js +281 -0
  259. package/dist/services/search/query-expander.js.map +1 -0
  260. package/dist/services/search/reranker.d.ts +44 -0
  261. package/dist/services/search/reranker.d.ts.map +1 -0
  262. package/dist/services/search/reranker.js +101 -0
  263. package/dist/services/search/reranker.js.map +1 -0
  264. package/dist/services/storage/database/annotation-operations.d.ts +113 -0
  265. package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
  266. package/dist/services/storage/database/annotation-operations.js +177 -0
  267. package/dist/services/storage/database/annotation-operations.js.map +1 -0
  268. package/dist/services/storage/database/approval-operations.d.ts +132 -0
  269. package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
  270. package/dist/services/storage/database/approval-operations.js +206 -0
  271. package/dist/services/storage/database/approval-operations.js.map +1 -0
  272. package/dist/services/storage/database/chunk-operations.d.ts +132 -0
  273. package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
  274. package/dist/services/storage/database/chunk-operations.js +306 -0
  275. package/dist/services/storage/database/chunk-operations.js.map +1 -0
  276. package/dist/services/storage/database/cluster-operations.d.ts +97 -0
  277. package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
  278. package/dist/services/storage/database/cluster-operations.js +258 -0
  279. package/dist/services/storage/database/cluster-operations.js.map +1 -0
  280. package/dist/services/storage/database/comparison-operations.d.ts +41 -0
  281. package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
  282. package/dist/services/storage/database/comparison-operations.js +65 -0
  283. package/dist/services/storage/database/comparison-operations.js.map +1 -0
  284. package/dist/services/storage/database/converters.d.ts +36 -0
  285. package/dist/services/storage/database/converters.d.ts.map +1 -0
  286. package/dist/services/storage/database/converters.js +244 -0
  287. package/dist/services/storage/database/converters.js.map +1 -0
  288. package/dist/services/storage/database/document-operations.d.ts +145 -0
  289. package/dist/services/storage/database/document-operations.d.ts.map +1 -0
  290. package/dist/services/storage/database/document-operations.js +498 -0
  291. package/dist/services/storage/database/document-operations.js.map +1 -0
  292. package/dist/services/storage/database/embedding-operations.d.ts +130 -0
  293. package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
  294. package/dist/services/storage/database/embedding-operations.js +315 -0
  295. package/dist/services/storage/database/embedding-operations.js.map +1 -0
  296. package/dist/services/storage/database/extraction-operations.d.ts +47 -0
  297. package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
  298. package/dist/services/storage/database/extraction-operations.js +85 -0
  299. package/dist/services/storage/database/extraction-operations.js.map +1 -0
  300. package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
  301. package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
  302. package/dist/services/storage/database/form-fill-operations.js +116 -0
  303. package/dist/services/storage/database/form-fill-operations.js.map +1 -0
  304. package/dist/services/storage/database/helpers.d.ts +29 -0
  305. package/dist/services/storage/database/helpers.d.ts.map +1 -0
  306. package/dist/services/storage/database/helpers.js +55 -0
  307. package/dist/services/storage/database/helpers.js.map +1 -0
  308. package/dist/services/storage/database/image-operations.d.ts +202 -0
  309. package/dist/services/storage/database/image-operations.d.ts.map +1 -0
  310. package/dist/services/storage/database/image-operations.js +484 -0
  311. package/dist/services/storage/database/image-operations.js.map +1 -0
  312. package/dist/services/storage/database/index.d.ts +13 -0
  313. package/dist/services/storage/database/index.d.ts.map +1 -0
  314. package/dist/services/storage/database/index.js +16 -0
  315. package/dist/services/storage/database/index.js.map +1 -0
  316. package/dist/services/storage/database/lock-operations.d.ts +59 -0
  317. package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
  318. package/dist/services/storage/database/lock-operations.js +89 -0
  319. package/dist/services/storage/database/lock-operations.js.map +1 -0
  320. package/dist/services/storage/database/obligation-operations.d.ts +88 -0
  321. package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
  322. package/dist/services/storage/database/obligation-operations.js +206 -0
  323. package/dist/services/storage/database/obligation-operations.js.map +1 -0
  324. package/dist/services/storage/database/ocr-operations.d.ts +33 -0
  325. package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
  326. package/dist/services/storage/database/ocr-operations.js +70 -0
  327. package/dist/services/storage/database/ocr-operations.js.map +1 -0
  328. package/dist/services/storage/database/playbook-operations.d.ts +72 -0
  329. package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
  330. package/dist/services/storage/database/playbook-operations.js +247 -0
  331. package/dist/services/storage/database/playbook-operations.js.map +1 -0
  332. package/dist/services/storage/database/provenance-operations.d.ts +112 -0
  333. package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
  334. package/dist/services/storage/database/provenance-operations.js +251 -0
  335. package/dist/services/storage/database/provenance-operations.js.map +1 -0
  336. package/dist/services/storage/database/service.d.ts +142 -0
  337. package/dist/services/storage/database/service.d.ts.map +1 -0
  338. package/dist/services/storage/database/service.js +310 -0
  339. package/dist/services/storage/database/service.js.map +1 -0
  340. package/dist/services/storage/database/static-operations.d.ts +30 -0
  341. package/dist/services/storage/database/static-operations.d.ts.map +1 -0
  342. package/dist/services/storage/database/static-operations.js +218 -0
  343. package/dist/services/storage/database/static-operations.js.map +1 -0
  344. package/dist/services/storage/database/stats-operations.d.ts +101 -0
  345. package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
  346. package/dist/services/storage/database/stats-operations.js +394 -0
  347. package/dist/services/storage/database/stats-operations.js.map +1 -0
  348. package/dist/services/storage/database/tag-operations.d.ts +76 -0
  349. package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
  350. package/dist/services/storage/database/tag-operations.js +178 -0
  351. package/dist/services/storage/database/tag-operations.js.map +1 -0
  352. package/dist/services/storage/database/types.d.ts +286 -0
  353. package/dist/services/storage/database/types.d.ts.map +1 -0
  354. package/dist/services/storage/database/types.js +39 -0
  355. package/dist/services/storage/database/types.js.map +1 -0
  356. package/dist/services/storage/database/upload-operations.d.ts +71 -0
  357. package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
  358. package/dist/services/storage/database/upload-operations.js +124 -0
  359. package/dist/services/storage/database/upload-operations.js.map +1 -0
  360. package/dist/services/storage/database/user-operations.d.ts +102 -0
  361. package/dist/services/storage/database/user-operations.d.ts.map +1 -0
  362. package/dist/services/storage/database/user-operations.js +151 -0
  363. package/dist/services/storage/database/user-operations.js.map +1 -0
  364. package/dist/services/storage/database/workflow-operations.d.ts +98 -0
  365. package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
  366. package/dist/services/storage/database/workflow-operations.js +157 -0
  367. package/dist/services/storage/database/workflow-operations.js.map +1 -0
  368. package/dist/services/storage/database.d.ts +16 -0
  369. package/dist/services/storage/database.d.ts.map +1 -0
  370. package/dist/services/storage/database.js +15 -0
  371. package/dist/services/storage/database.js.map +1 -0
  372. package/dist/services/storage/index.d.ts +10 -0
  373. package/dist/services/storage/index.d.ts.map +1 -0
  374. package/dist/services/storage/index.js +10 -0
  375. package/dist/services/storage/index.js.map +1 -0
  376. package/dist/services/storage/migrations/index.d.ts +16 -0
  377. package/dist/services/storage/migrations/index.d.ts.map +1 -0
  378. package/dist/services/storage/migrations/index.js +20 -0
  379. package/dist/services/storage/migrations/index.js.map +1 -0
  380. package/dist/services/storage/migrations/operations.d.ts +40 -0
  381. package/dist/services/storage/migrations/operations.d.ts.map +1 -0
  382. package/dist/services/storage/migrations/operations.js +2910 -0
  383. package/dist/services/storage/migrations/operations.js.map +1 -0
  384. package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
  385. package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
  386. package/dist/services/storage/migrations/schema-definitions.js +1006 -0
  387. package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
  388. package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
  389. package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
  390. package/dist/services/storage/migrations/schema-helpers.js +176 -0
  391. package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
  392. package/dist/services/storage/migrations/types.d.ts +15 -0
  393. package/dist/services/storage/migrations/types.d.ts.map +1 -0
  394. package/dist/services/storage/migrations/types.js +21 -0
  395. package/dist/services/storage/migrations/types.js.map +1 -0
  396. package/dist/services/storage/migrations/verification.d.ts +20 -0
  397. package/dist/services/storage/migrations/verification.d.ts.map +1 -0
  398. package/dist/services/storage/migrations/verification.js +78 -0
  399. package/dist/services/storage/migrations/verification.js.map +1 -0
  400. package/dist/services/storage/migrations.d.ts +16 -0
  401. package/dist/services/storage/migrations.d.ts.map +1 -0
  402. package/dist/services/storage/migrations.js +17 -0
  403. package/dist/services/storage/migrations.js.map +1 -0
  404. package/dist/services/storage/types.d.ts +12 -0
  405. package/dist/services/storage/types.d.ts.map +1 -0
  406. package/dist/services/storage/types.js +5 -0
  407. package/dist/services/storage/types.js.map +1 -0
  408. package/dist/services/storage/vector.d.ts +208 -0
  409. package/dist/services/storage/vector.d.ts.map +1 -0
  410. package/dist/services/storage/vector.js +526 -0
  411. package/dist/services/storage/vector.js.map +1 -0
  412. package/dist/services/vlm/pipeline.d.ts +194 -0
  413. package/dist/services/vlm/pipeline.d.ts.map +1 -0
  414. package/dist/services/vlm/pipeline.js +800 -0
  415. package/dist/services/vlm/pipeline.js.map +1 -0
  416. package/dist/services/vlm/prompts.d.ts +171 -0
  417. package/dist/services/vlm/prompts.d.ts.map +1 -0
  418. package/dist/services/vlm/prompts.js +229 -0
  419. package/dist/services/vlm/prompts.js.map +1 -0
  420. package/dist/services/vlm/service.d.ts +174 -0
  421. package/dist/services/vlm/service.d.ts.map +1 -0
  422. package/dist/services/vlm/service.js +256 -0
  423. package/dist/services/vlm/service.js.map +1 -0
  424. package/dist/services/webhook-delivery.d.ts +4 -0
  425. package/dist/services/webhook-delivery.d.ts.map +1 -0
  426. package/dist/services/webhook-delivery.js +140 -0
  427. package/dist/services/webhook-delivery.js.map +1 -0
  428. package/dist/tools/chunks.d.ts +19 -0
  429. package/dist/tools/chunks.d.ts.map +1 -0
  430. package/dist/tools/chunks.js +392 -0
  431. package/dist/tools/chunks.js.map +1 -0
  432. package/dist/tools/clm.d.ts +16 -0
  433. package/dist/tools/clm.d.ts.map +1 -0
  434. package/dist/tools/clm.js +668 -0
  435. package/dist/tools/clm.js.map +1 -0
  436. package/dist/tools/clustering.d.ts +13 -0
  437. package/dist/tools/clustering.d.ts.map +1 -0
  438. package/dist/tools/clustering.js +498 -0
  439. package/dist/tools/clustering.js.map +1 -0
  440. package/dist/tools/collaboration.d.ts +15 -0
  441. package/dist/tools/collaboration.d.ts.map +1 -0
  442. package/dist/tools/collaboration.js +516 -0
  443. package/dist/tools/collaboration.js.map +1 -0
  444. package/dist/tools/comparison.d.ts +13 -0
  445. package/dist/tools/comparison.d.ts.map +1 -0
  446. package/dist/tools/comparison.js +735 -0
  447. package/dist/tools/comparison.js.map +1 -0
  448. package/dist/tools/compliance.d.ts +15 -0
  449. package/dist/tools/compliance.d.ts.map +1 -0
  450. package/dist/tools/compliance.js +640 -0
  451. package/dist/tools/compliance.js.map +1 -0
  452. package/dist/tools/config.d.ts +19 -0
  453. package/dist/tools/config.d.ts.map +1 -0
  454. package/dist/tools/config.js +213 -0
  455. package/dist/tools/config.js.map +1 -0
  456. package/dist/tools/database.d.ts +62 -0
  457. package/dist/tools/database.d.ts.map +1 -0
  458. package/dist/tools/database.js +288 -0
  459. package/dist/tools/database.js.map +1 -0
  460. package/dist/tools/documents.d.ts +61 -0
  461. package/dist/tools/documents.d.ts.map +1 -0
  462. package/dist/tools/documents.js +1624 -0
  463. package/dist/tools/documents.js.map +1 -0
  464. package/dist/tools/embeddings.d.ts +14 -0
  465. package/dist/tools/embeddings.d.ts.map +1 -0
  466. package/dist/tools/embeddings.js +626 -0
  467. package/dist/tools/embeddings.js.map +1 -0
  468. package/dist/tools/evaluation.d.ts +25 -0
  469. package/dist/tools/evaluation.d.ts.map +1 -0
  470. package/dist/tools/evaluation.js +523 -0
  471. package/dist/tools/evaluation.js.map +1 -0
  472. package/dist/tools/events.d.ts +16 -0
  473. package/dist/tools/events.d.ts.map +1 -0
  474. package/dist/tools/events.js +493 -0
  475. package/dist/tools/events.js.map +1 -0
  476. package/dist/tools/extraction-structured.d.ts +13 -0
  477. package/dist/tools/extraction-structured.d.ts.map +1 -0
  478. package/dist/tools/extraction-structured.js +390 -0
  479. package/dist/tools/extraction-structured.js.map +1 -0
  480. package/dist/tools/extraction.d.ts +24 -0
  481. package/dist/tools/extraction.d.ts.map +1 -0
  482. package/dist/tools/extraction.js +424 -0
  483. package/dist/tools/extraction.js.map +1 -0
  484. package/dist/tools/file-management.d.ts +14 -0
  485. package/dist/tools/file-management.d.ts.map +1 -0
  486. package/dist/tools/file-management.js +523 -0
  487. package/dist/tools/file-management.js.map +1 -0
  488. package/dist/tools/form-fill.d.ts +13 -0
  489. package/dist/tools/form-fill.d.ts.map +1 -0
  490. package/dist/tools/form-fill.js +250 -0
  491. package/dist/tools/form-fill.js.map +1 -0
  492. package/dist/tools/health.d.ts +19 -0
  493. package/dist/tools/health.d.ts.map +1 -0
  494. package/dist/tools/health.js +229 -0
  495. package/dist/tools/health.js.map +1 -0
  496. package/dist/tools/images.d.ts +54 -0
  497. package/dist/tools/images.d.ts.map +1 -0
  498. package/dist/tools/images.js +787 -0
  499. package/dist/tools/images.js.map +1 -0
  500. package/dist/tools/ingestion.d.ts +94 -0
  501. package/dist/tools/ingestion.d.ts.map +1 -0
  502. package/dist/tools/ingestion.js +1659 -0
  503. package/dist/tools/ingestion.js.map +1 -0
  504. package/dist/tools/intelligence.d.ts +18 -0
  505. package/dist/tools/intelligence.d.ts.map +1 -0
  506. package/dist/tools/intelligence.js +1039 -0
  507. package/dist/tools/intelligence.js.map +1 -0
  508. package/dist/tools/provenance.d.ts +51 -0
  509. package/dist/tools/provenance.d.ts.map +1 -0
  510. package/dist/tools/provenance.js +691 -0
  511. package/dist/tools/provenance.js.map +1 -0
  512. package/dist/tools/reports.d.ts +41 -0
  513. package/dist/tools/reports.d.ts.map +1 -0
  514. package/dist/tools/reports.js +1394 -0
  515. package/dist/tools/reports.js.map +1 -0
  516. package/dist/tools/search.d.ts +35 -0
  517. package/dist/tools/search.d.ts.map +1 -0
  518. package/dist/tools/search.js +2528 -0
  519. package/dist/tools/search.js.map +1 -0
  520. package/dist/tools/shared.d.ts +52 -0
  521. package/dist/tools/shared.d.ts.map +1 -0
  522. package/dist/tools/shared.js +54 -0
  523. package/dist/tools/shared.js.map +1 -0
  524. package/dist/tools/tags.d.ts +15 -0
  525. package/dist/tools/tags.d.ts.map +1 -0
  526. package/dist/tools/tags.js +287 -0
  527. package/dist/tools/tags.js.map +1 -0
  528. package/dist/tools/timeline.d.ts +15 -0
  529. package/dist/tools/timeline.d.ts.map +1 -0
  530. package/dist/tools/timeline.js +14 -0
  531. package/dist/tools/timeline.js.map +1 -0
  532. package/dist/tools/users.d.ts +14 -0
  533. package/dist/tools/users.d.ts.map +1 -0
  534. package/dist/tools/users.js +257 -0
  535. package/dist/tools/users.js.map +1 -0
  536. package/dist/tools/vlm.d.ts +40 -0
  537. package/dist/tools/vlm.d.ts.map +1 -0
  538. package/dist/tools/vlm.js +475 -0
  539. package/dist/tools/vlm.js.map +1 -0
  540. package/dist/tools/workflow.d.ts +16 -0
  541. package/dist/tools/workflow.d.ts.map +1 -0
  542. package/dist/tools/workflow.js +495 -0
  543. package/dist/tools/workflow.js.map +1 -0
  544. package/dist/utils/backoff.d.ts +53 -0
  545. package/dist/utils/backoff.d.ts.map +1 -0
  546. package/dist/utils/backoff.js +78 -0
  547. package/dist/utils/backoff.js.map +1 -0
  548. package/dist/utils/config-persistence.d.ts +33 -0
  549. package/dist/utils/config-persistence.d.ts.map +1 -0
  550. package/dist/utils/config-persistence.js +61 -0
  551. package/dist/utils/config-persistence.js.map +1 -0
  552. package/dist/utils/hash.d.ts +65 -0
  553. package/dist/utils/hash.d.ts.map +1 -0
  554. package/dist/utils/hash.js +146 -0
  555. package/dist/utils/hash.js.map +1 -0
  556. package/dist/utils/math.d.ts +21 -0
  557. package/dist/utils/math.d.ts.map +1 -0
  558. package/dist/utils/math.js +39 -0
  559. package/dist/utils/math.js.map +1 -0
  560. package/dist/utils/validation.d.ts +697 -0
  561. package/dist/utils/validation.d.ts.map +1 -0
  562. package/dist/utils/validation.js +529 -0
  563. package/dist/utils/validation.js.map +1 -0
  564. package/package.json +96 -0
  565. package/python/.gitkeep +0 -0
  566. package/python/__init__.py +104 -0
  567. package/python/clustering_worker.py +440 -0
  568. package/python/docx_image_extractor.py +524 -0
  569. package/python/embedding_worker.py +552 -0
  570. package/python/file_manager_worker.py +564 -0
  571. package/python/form_fill_worker.py +399 -0
  572. package/python/gpu_utils.py +582 -0
  573. package/python/image_extractor.py +317 -0
  574. package/python/image_optimizer.py +444 -0
  575. package/python/ocr_worker.py +712 -0
  576. package/python/pyproject.toml +76 -0
  577. package/python/requirements.txt +51 -0
  578. package/python/reranker_worker.py +87 -0
@@ -0,0 +1,668 @@
1
+ /**
2
+ * Contract Lifecycle Management (CLM) Tools
3
+ *
4
+ * Provides 9 MCP tools for contract intelligence:
5
+ * - ocr_contract_extract (contract data extraction using predefined schemas)
6
+ * - ocr_obligation_list/update/calendar (obligation tracking)
7
+ * - ocr_playbook_create/compare/list (playbook management)
8
+ * - ocr_document_summarize/ocr_corpus_summarize (summarization)
9
+ *
10
+ * CRITICAL: NEVER use console.log() - stdout is reserved for JSON-RPC protocol.
11
+ *
12
+ * @module tools/clm
13
+ */
14
+ import { z } from 'zod';
15
+ import { formatResponse, handleError } from './shared.js';
16
+ import { successResult } from '../server/types.js';
17
+ import { requireDatabase } from '../server/state.js';
18
+ import { validateInput } from '../utils/validation.js';
19
+ import { listObligations, updateObligationStatus, getObligationCalendar, markOverdueObligations, OBLIGATION_TYPES, OBLIGATION_STATUSES, } from '../services/storage/database/obligation-operations.js';
20
+ import { createPlaybook, listPlaybooks, compareWithPlaybook, } from '../services/storage/database/playbook-operations.js';
21
+ import { getSchemasByName, ALL_CONTRACT_SCHEMAS, } from '../services/clm/contract-schemas.js';
22
+ import { summarizeDocument } from '../services/clm/summarization.js';
23
+ // =============================================================================
24
+ // SCHEMAS
25
+ // =============================================================================
26
+ const ObligationTypeSchema = z.enum(OBLIGATION_TYPES);
27
+ const ObligationStatusSchema = z.enum(OBLIGATION_STATUSES);
28
+ const ClauseSeveritySchema = z.enum(['critical', 'major', 'minor']);
29
+ // =============================================================================
30
+ // TOOL 4.1: ocr_contract_extract
31
+ // =============================================================================
32
+ /**
33
+ * Extract contract-specific information using predefined schemas.
34
+ * Reads chunks for the document and uses pattern matching to identify
35
+ * contract elements. No external API calls.
36
+ */
37
+ async function handleContractExtract(params) {
38
+ try {
39
+ const input = validateInput(z.object({
40
+ document_id: z.string().min(1),
41
+ schemas: z.array(z.string()).optional(),
42
+ }), params);
43
+ const { db } = requireDatabase();
44
+ const conn = db.getConnection();
45
+ // Verify document exists
46
+ const doc = conn.prepare('SELECT id, file_name, status FROM documents WHERE id = ?').get(input.document_id);
47
+ if (!doc) {
48
+ throw new Error(`Document not found: ${input.document_id}`);
49
+ }
50
+ // Get schemas to extract
51
+ const schemas = getSchemasByName(input.schemas);
52
+ // Get all chunks for the document
53
+ const chunks = conn.prepare(`
54
+ SELECT id, text, page_number, heading_context, section_path, content_types
55
+ FROM chunks WHERE document_id = ?
56
+ ORDER BY chunk_index ASC
57
+ `).all(input.document_id);
58
+ if (chunks.length === 0) {
59
+ throw new Error(`No chunks found for document ${input.document_id}. Process the document first.`);
60
+ }
61
+ // Extract data for each schema
62
+ const results = {};
63
+ for (const schema of schemas) {
64
+ results[schema.name] = extractSchemaFields(schema, chunks);
65
+ }
66
+ // Also auto-mark overdue obligations while we are at it
67
+ const overdueCount = markOverdueObligations(conn);
68
+ return formatResponse(successResult({
69
+ document_id: input.document_id,
70
+ file_name: doc.file_name,
71
+ schemas_extracted: schemas.map((s) => s.name),
72
+ chunk_count: chunks.length,
73
+ extractions: results,
74
+ overdue_marked: overdueCount,
75
+ next_steps: [
76
+ { tool: 'ocr_obligation_list', description: 'View obligations for this document' },
77
+ { tool: 'ocr_playbook_compare', description: 'Compare document against a playbook' },
78
+ { tool: 'ocr_document_summarize', description: 'Generate structured summary' },
79
+ ],
80
+ }));
81
+ }
82
+ catch (error) {
83
+ return handleError(error);
84
+ }
85
+ }
86
+ // =============================================================================
87
+ // TOOL 4.2: ocr_obligation_list
88
+ // =============================================================================
89
+ async function handleObligationList(params) {
90
+ try {
91
+ const input = validateInput(z.object({
92
+ document_id: z.string().min(1).optional(),
93
+ obligation_type: ObligationTypeSchema.optional(),
94
+ status: ObligationStatusSchema.optional(),
95
+ due_before: z.string().optional(),
96
+ due_after: z.string().optional(),
97
+ responsible_party: z.string().optional(),
98
+ limit: z.number().int().min(1).max(500).default(50),
99
+ offset: z.number().int().min(0).default(0),
100
+ }), params);
101
+ const { db } = requireDatabase();
102
+ const conn = db.getConnection();
103
+ // Auto-mark overdue before listing
104
+ markOverdueObligations(conn);
105
+ const result = listObligations(conn, {
106
+ document_id: input.document_id,
107
+ obligation_type: input.obligation_type,
108
+ status: input.status,
109
+ due_before: input.due_before,
110
+ due_after: input.due_after,
111
+ responsible_party: input.responsible_party,
112
+ limit: input.limit,
113
+ offset: input.offset,
114
+ });
115
+ const limit = input.limit ?? 50;
116
+ const offset = input.offset ?? 0;
117
+ const hasMore = offset + result.obligations.length < result.total;
118
+ return formatResponse(successResult({
119
+ obligations: result.obligations,
120
+ total: result.total,
121
+ limit,
122
+ offset,
123
+ has_more: hasMore,
124
+ next_steps: hasMore
125
+ ? [{ tool: 'ocr_obligation_list', description: `Get next page with offset=${offset + limit}` }]
126
+ : [
127
+ { tool: 'ocr_obligation_update', description: 'Update an obligation status' },
128
+ { tool: 'ocr_obligation_calendar', description: 'View obligation calendar' },
129
+ { tool: 'ocr_contract_extract', description: 'Extract more obligations from a document' },
130
+ ],
131
+ }));
132
+ }
133
+ catch (error) {
134
+ return handleError(error);
135
+ }
136
+ }
137
+ // =============================================================================
138
+ // TOOL 4.3: ocr_obligation_update
139
+ // =============================================================================
140
+ async function handleObligationUpdate(params) {
141
+ try {
142
+ const input = validateInput(z.object({
143
+ obligation_id: z.string().min(1),
144
+ status: ObligationStatusSchema,
145
+ reason: z.string().max(1000).optional(),
146
+ }), params);
147
+ const { db } = requireDatabase();
148
+ const conn = db.getConnection();
149
+ const updated = updateObligationStatus(conn, input.obligation_id, input.status, input.reason);
150
+ return formatResponse(successResult({
151
+ obligation: updated,
152
+ next_steps: [
153
+ { tool: 'ocr_obligation_list', description: 'List obligations to see updated state' },
154
+ { tool: 'ocr_obligation_calendar', description: 'View upcoming deadlines' },
155
+ ],
156
+ }));
157
+ }
158
+ catch (error) {
159
+ return handleError(error);
160
+ }
161
+ }
162
+ // =============================================================================
163
+ // TOOL 4.4: ocr_obligation_calendar
164
+ // =============================================================================
165
+ async function handleObligationCalendar(params) {
166
+ try {
167
+ const input = validateInput(z.object({
168
+ months_ahead: z.number().int().min(1).max(24).default(3),
169
+ status: ObligationStatusSchema.optional(),
170
+ document_id: z.string().min(1).optional(),
171
+ }), params);
172
+ const { db } = requireDatabase();
173
+ const conn = db.getConnection();
174
+ // Auto-mark overdue before calendar view
175
+ markOverdueObligations(conn);
176
+ const calendar = getObligationCalendar(conn, {
177
+ months_ahead: input.months_ahead,
178
+ status: input.status,
179
+ document_id: input.document_id,
180
+ });
181
+ const totalObligations = calendar.reduce((sum, m) => sum + m.obligations.length, 0);
182
+ return formatResponse(successResult({
183
+ months: calendar,
184
+ total_months: calendar.length,
185
+ total_obligations: totalObligations,
186
+ range: {
187
+ months_ahead: input.months_ahead,
188
+ status_filter: input.status ?? null,
189
+ document_id: input.document_id ?? null,
190
+ },
191
+ next_steps: [
192
+ { tool: 'ocr_obligation_list', description: 'List obligations with more filters' },
193
+ { tool: 'ocr_obligation_update', description: 'Update an obligation status' },
194
+ ],
195
+ }));
196
+ }
197
+ catch (error) {
198
+ return handleError(error);
199
+ }
200
+ }
201
+ // =============================================================================
202
+ // TOOL 4.5: ocr_playbook_create
203
+ // =============================================================================
204
+ async function handlePlaybookCreate(params) {
205
+ try {
206
+ const input = validateInput(z.object({
207
+ name: z.string().min(1).max(200),
208
+ description: z.string().max(2000).optional(),
209
+ clauses: z.array(z.object({
210
+ clause_name: z.string().min(1).max(200),
211
+ preferred_text: z.string().min(1).max(5000),
212
+ severity: ClauseSeveritySchema,
213
+ alternatives: z.array(z.string().max(5000)).default([]),
214
+ })).min(1),
215
+ }), params);
216
+ const { db } = requireDatabase();
217
+ const conn = db.getConnection();
218
+ const playbook = createPlaybook(conn, {
219
+ name: input.name,
220
+ description: input.description ?? null,
221
+ clauses: input.clauses.map((c) => ({
222
+ clause_name: c.clause_name,
223
+ preferred_text: c.preferred_text,
224
+ severity: c.severity,
225
+ alternatives: c.alternatives ?? [],
226
+ })),
227
+ });
228
+ return formatResponse(successResult({
229
+ playbook,
230
+ next_steps: [
231
+ { tool: 'ocr_playbook_compare', description: 'Compare a document against this playbook' },
232
+ { tool: 'ocr_playbook_list', description: 'List all playbooks' },
233
+ ],
234
+ }));
235
+ }
236
+ catch (error) {
237
+ return handleError(error);
238
+ }
239
+ }
240
+ // =============================================================================
241
+ // TOOL 4.6: ocr_playbook_compare
242
+ // =============================================================================
243
+ async function handlePlaybookCompare(params) {
244
+ try {
245
+ const input = validateInput(z.object({
246
+ document_id: z.string().min(1),
247
+ playbook_id: z.string().min(1),
248
+ }), params);
249
+ const { db } = requireDatabase();
250
+ const conn = db.getConnection();
251
+ const result = compareWithPlaybook(conn, input.document_id, input.playbook_id);
252
+ return formatResponse(successResult({
253
+ comparison: result,
254
+ next_steps: [
255
+ { tool: 'ocr_obligation_list', description: 'View obligations for this document' },
256
+ { tool: 'ocr_contract_extract', description: 'Extract contract details' },
257
+ { tool: 'ocr_playbook_list', description: 'Compare against a different playbook' },
258
+ ],
259
+ }));
260
+ }
261
+ catch (error) {
262
+ return handleError(error);
263
+ }
264
+ }
265
+ // =============================================================================
266
+ // TOOL 4.7: ocr_playbook_list
267
+ // =============================================================================
268
+ async function handlePlaybookList(params) {
269
+ try {
270
+ validateInput(z.object({}), params);
271
+ const { db } = requireDatabase();
272
+ const conn = db.getConnection();
273
+ const playbooks = listPlaybooks(conn);
274
+ return formatResponse(successResult({
275
+ playbooks: playbooks.map((p) => ({
276
+ id: p.id,
277
+ name: p.name,
278
+ description: p.description,
279
+ clause_count: p.clauses.length,
280
+ created_at: p.created_at,
281
+ updated_at: p.updated_at,
282
+ })),
283
+ total: playbooks.length,
284
+ next_steps: playbooks.length === 0
285
+ ? [{ tool: 'ocr_playbook_create', description: 'Create a playbook with preferred contract terms' }]
286
+ : [
287
+ { tool: 'ocr_playbook_compare', description: 'Compare a document against a playbook' },
288
+ { tool: 'ocr_playbook_create', description: 'Create a new playbook' },
289
+ ],
290
+ }));
291
+ }
292
+ catch (error) {
293
+ return handleError(error);
294
+ }
295
+ }
296
+ // =============================================================================
297
+ // TOOL 4.8: ocr_document_summarize
298
+ // =============================================================================
299
+ async function handleDocumentSummarize(params) {
300
+ try {
301
+ const input = validateInput(z.object({
302
+ document_id: z.string().min(1),
303
+ }), params);
304
+ const { db } = requireDatabase();
305
+ const conn = db.getConnection();
306
+ // Verify document exists
307
+ const doc = conn.prepare('SELECT id, file_name, status FROM documents WHERE id = ?').get(input.document_id);
308
+ if (!doc) {
309
+ throw new Error(`Document not found: ${input.document_id}`);
310
+ }
311
+ // Get all chunks with relevant columns
312
+ const chunks = conn.prepare(`
313
+ SELECT text, page_number, heading_context, section_path, content_types
314
+ FROM chunks WHERE document_id = ?
315
+ ORDER BY chunk_index ASC
316
+ `).all(input.document_id);
317
+ if (chunks.length === 0) {
318
+ throw new Error(`No chunks found for document ${input.document_id}. Process the document first.`);
319
+ }
320
+ const summary = summarizeDocument(chunks);
321
+ summary.document_id = input.document_id;
322
+ return formatResponse(successResult({
323
+ document_id: input.document_id,
324
+ file_name: doc.file_name,
325
+ status: doc.status,
326
+ summary,
327
+ next_steps: [
328
+ { tool: 'ocr_contract_extract', description: 'Extract contract-specific information' },
329
+ { tool: 'ocr_search', description: 'Search within this document' },
330
+ { tool: 'ocr_document_get', description: 'Get full document details' },
331
+ ],
332
+ }));
333
+ }
334
+ catch (error) {
335
+ return handleError(error);
336
+ }
337
+ }
338
+ // =============================================================================
339
+ // TOOL 4.9: ocr_corpus_summarize
340
+ // =============================================================================
341
+ async function handleCorpusSummarize(params) {
342
+ try {
343
+ const input = validateInput(z.object({
344
+ limit: z.number().int().min(1).max(500).default(100),
345
+ }), params);
346
+ const { db } = requireDatabase();
347
+ const conn = db.getConnection();
348
+ // Get document summaries
349
+ const documents = conn.prepare(`
350
+ SELECT d.id, d.file_name, d.status, d.page_count,
351
+ (SELECT COUNT(*) FROM chunks WHERE document_id = d.id) as chunk_count
352
+ FROM documents d
353
+ ORDER BY d.created_at DESC
354
+ LIMIT ?
355
+ `).all(input.limit);
356
+ // Aggregate stats
357
+ const totalChunks = documents.reduce((sum, d) => sum + d.chunk_count, 0);
358
+ const totalPages = documents.reduce((sum, d) => sum + (d.page_count ?? 0), 0);
359
+ // Get content type distribution across all chunks
360
+ const contentTypeRows = conn.prepare(`
361
+ SELECT content_types FROM chunks
362
+ WHERE content_types IS NOT NULL AND content_types != ''
363
+ LIMIT 10000
364
+ `).all();
365
+ const contentTypeDist = {};
366
+ for (const row of contentTypeRows) {
367
+ for (const ct of row.content_types.split(',')) {
368
+ const trimmed = ct.trim();
369
+ if (trimmed) {
370
+ contentTypeDist[trimmed] = (contentTypeDist[trimmed] ?? 0) + 1;
371
+ }
372
+ }
373
+ }
374
+ // Get top sections across corpus
375
+ const sectionRows = conn.prepare(`
376
+ SELECT DISTINCT heading_context FROM chunks
377
+ WHERE heading_context IS NOT NULL AND heading_context != ''
378
+ LIMIT 50
379
+ `).all();
380
+ const topSections = sectionRows.map((r) => r.heading_context);
381
+ // Get total word count (approximate from chunk count * avg words)
382
+ const wordCountRow = conn.prepare(`
383
+ SELECT SUM(LENGTH(text) - LENGTH(REPLACE(text, ' ', '')) + 1) as total_words
384
+ FROM chunks
385
+ LIMIT 10000
386
+ `).get();
387
+ return formatResponse(successResult({
388
+ total_documents: documents.length,
389
+ total_chunks: totalChunks,
390
+ total_pages: totalPages,
391
+ total_words: wordCountRow.total_words ?? 0,
392
+ documents: documents.map((d) => ({
393
+ document_id: d.id,
394
+ file_name: d.file_name,
395
+ page_count: d.page_count ?? 0,
396
+ chunk_count: d.chunk_count,
397
+ status: d.status,
398
+ })),
399
+ content_type_distribution: contentTypeDist,
400
+ top_sections: topSections,
401
+ next_steps: [
402
+ { tool: 'ocr_document_summarize', description: 'Summarize a specific document' },
403
+ { tool: 'ocr_search', description: 'Search across all documents' },
404
+ { tool: 'ocr_document_list', description: 'List all documents with filters' },
405
+ ],
406
+ }));
407
+ }
408
+ catch (error) {
409
+ return handleError(error);
410
+ }
411
+ }
412
+ // =============================================================================
413
+ // HELPER: EXTRACT SCHEMA FIELDS FROM CHUNKS
414
+ // =============================================================================
415
+ /**
416
+ * Extract schema fields from document chunks using pattern matching.
417
+ * No external API calls - pure text analysis.
418
+ */
419
+ function extractSchemaFields(schema, chunks) {
420
+ const result = {};
421
+ for (const field of schema.fields) {
422
+ result[field.name] = extractField(field, chunks);
423
+ }
424
+ return result;
425
+ }
426
+ /**
427
+ * Extract a single field from chunks using heuristic pattern matching.
428
+ */
429
+ function extractField(field, chunks) {
430
+ const fieldPatterns = getFieldPatterns(field.name, field.type);
431
+ const matches = [];
432
+ for (const chunk of chunks) {
433
+ for (const pattern of fieldPatterns) {
434
+ const regex = new RegExp(pattern, 'gi');
435
+ let match = regex.exec(chunk.text);
436
+ while (match !== null) {
437
+ const context = extractMatchContext(chunk.text, match.index, match[0].length);
438
+ matches.push({
439
+ text: context,
440
+ chunk_id: chunk.id,
441
+ page: chunk.page_number,
442
+ confidence: 0.7,
443
+ });
444
+ match = regex.exec(chunk.text);
445
+ }
446
+ }
447
+ // Also check if heading matches field description
448
+ if (chunk.heading_context) {
449
+ const headingLower = chunk.heading_context.toLowerCase();
450
+ const fieldNameLower = field.name.replace(/_/g, ' ').toLowerCase();
451
+ if (headingLower.includes(fieldNameLower)) {
452
+ matches.push({
453
+ text: chunk.text.substring(0, 500),
454
+ chunk_id: chunk.id,
455
+ page: chunk.page_number,
456
+ confidence: 0.8,
457
+ });
458
+ }
459
+ }
460
+ }
461
+ if (field.type === 'list') {
462
+ return matches.length > 0 ? matches : [];
463
+ }
464
+ if (field.type === 'boolean') {
465
+ return matches.length > 0;
466
+ }
467
+ if (matches.length > 0) {
468
+ // Return best match
469
+ matches.sort((a, b) => b.confidence - a.confidence);
470
+ return matches[0];
471
+ }
472
+ return null;
473
+ }
474
+ /**
475
+ * Get regex patterns for a given field name and type.
476
+ * Returns patterns that commonly match the field's expected content.
477
+ */
478
+ function getFieldPatterns(fieldName, _fieldType) {
479
+ const patternMap = {
480
+ parties: [
481
+ 'between\\s+([^,]+?)\\s+(?:and|&)\\s+([^,\\.]+)',
482
+ '(?:party|parties)\\s*[:.]\\s*([^\\n]+)',
483
+ '(?:hereinafter|hereafter)\\s+(?:referred to as|called)\\s+"([^"]+)"',
484
+ ],
485
+ effective_date: [
486
+ '(?:effective|start|commencement)\\s+date\\s*[:.]?\\s*([^\\n]{5,30})',
487
+ '(?:dated|entered into)\\s+(?:as of\\s+)?([^\\n]{5,30})',
488
+ '(?:this agreement|this contract).*(?:dated|of)\\s+([^\\n]{5,30})',
489
+ ],
490
+ expiration_date: [
491
+ '(?:expir(?:ation|es?)|end|termination)\\s+date\\s*[:.]?\\s*([^\\n]{5,30})',
492
+ '(?:valid|effective)\\s+(?:until|through)\\s+([^\\n]{5,30})',
493
+ ],
494
+ governing_law: [
495
+ '(?:governing|applicable)\\s+law\\s*[:.]?\\s*([^\\n]+)',
496
+ '(?:governed by|construed under|subject to)\\s+(?:the laws of\\s+)?([^\\n]+)',
497
+ '(?:jurisdiction|venue)\\s*[:.]?\\s*([^\\n]+)',
498
+ ],
499
+ contract_type: [
500
+ '(?:this\\s+)(\\w+\\s+agreement|\\w+\\s+contract)',
501
+ '(?:non-disclosure|confidentiality|master\\s+service|statement\\s+of\\s+work|employment)',
502
+ ],
503
+ total_value: [
504
+ '(?:total|aggregate|contract)\\s+(?:value|amount|price|consideration)\\s*[:.]?\\s*([^\\n]+)',
505
+ '\\$[\\d,]+\\.?\\d*',
506
+ '(?:sum of|amount of)\\s+([^\\n]+)',
507
+ ],
508
+ payment_schedule: [
509
+ '(?:payment|billing)\\s+(?:schedule|terms|frequency)\\s*[:.]?\\s*([^\\n]+)',
510
+ '(?:payable|due|invoiced?)\\s+(?:within|on|by|every)\\s+([^\\n]+)',
511
+ '(?:net\\s+\\d+|\\d+\\s+days)',
512
+ ],
513
+ penalties: [
514
+ '(?:penalty|penalties|liquidated damages|late fee)\\s*[:.]?\\s*([^\\n]+)',
515
+ '(?:interest|surcharge)\\s+(?:of|at)\\s+([^\\n]+)',
516
+ ],
517
+ interest_rate: [
518
+ '(?:interest|apr|rate)\\s*[:.]?\\s*(\\d+\\.?\\d*\\s*%)',
519
+ ],
520
+ deadlines: [
521
+ '(?:deadline|due date|by|before|no later than)\\s*[:.]?\\s*([^\\n]+)',
522
+ '(?:within|\\d+)\\s+(?:business\\s+)?days',
523
+ ],
524
+ deliverables: [
525
+ '(?:deliverable|milestone|shall deliver|shall provide)\\s*[:.]?\\s*([^\\n]+)',
526
+ ],
527
+ responsibilities: [
528
+ '(?:responsible for|shall|obligation|duty|must)\\s+([^\\n]+)',
529
+ ],
530
+ auto_renewal: [
531
+ '(?:auto(?:-|\\s)?renew|automatically\\s+renew)',
532
+ ],
533
+ renewal_notice_period: [
534
+ '(?:renewal|non-renewal)\\s+notice\\s*[:.]?\\s*([^\\n]+)',
535
+ '(?:notice of)\\s+(?:renewal|non-renewal)\\s+([^\\n]+)',
536
+ ],
537
+ termination_triggers: [
538
+ '(?:terminat(?:e|ion))\\s+(?:upon|if|in the event|for cause)\\s*[:.]?\\s*([^\\n]+)',
539
+ '(?:material breach|default|insolvency|bankruptcy)',
540
+ ],
541
+ termination_notice_period: [
542
+ '(?:termination)\\s+(?:notice|written notice)\\s*[:.]?\\s*([^\\n]+)',
543
+ '(?:\\d+)\\s+(?:days?|months?)\\s+(?:prior\\s+)?(?:written\\s+)?notice',
544
+ ],
545
+ indemnification: [
546
+ '(?:indemnif(?:y|ication|ies))\\s*[:.]?\\s*([^\\n]+)',
547
+ '(?:hold harmless|defend and indemnify)',
548
+ ],
549
+ liability_limit: [
550
+ '(?:limit(?:ation)?\\s+(?:of\\s+)?liability)\\s*[:.]?\\s*([^\\n]+)',
551
+ '(?:aggregate liability|total liability|maximum liability)\\s*[:.]?\\s*([^\\n]+)',
552
+ ],
553
+ force_majeure: [
554
+ '(?:force majeure|act of god|unforeseeable circumstances)',
555
+ ],
556
+ confidentiality: [
557
+ '(?:confidential(?:ity)?|non-disclosure|proprietary information)\\s*[:.]?\\s*([^\\n]+)',
558
+ ],
559
+ data_protection: [
560
+ '(?:data protection|privacy|gdpr|ccpa|personal data|data processing)\\s*[:.]?\\s*([^\\n]+)',
561
+ ],
562
+ };
563
+ return patternMap[fieldName] ?? [
564
+ `(?:${fieldName.replace(/_/g, '\\s+')})\\s*[:.]?\\s*([^\\n]+)`,
565
+ ];
566
+ }
567
+ /**
568
+ * Extract context around a match (up to 300 chars)
569
+ */
570
+ function extractMatchContext(text, matchIndex, matchLength) {
571
+ const contextBefore = 50;
572
+ const contextAfter = 200;
573
+ const start = Math.max(0, matchIndex - contextBefore);
574
+ const end = Math.min(text.length, matchIndex + matchLength + contextAfter);
575
+ let result = text.substring(start, end).replace(/\s+/g, ' ').trim();
576
+ if (start > 0)
577
+ result = '...' + result;
578
+ if (end < text.length)
579
+ result = result + '...';
580
+ return result;
581
+ }
582
+ // =============================================================================
583
+ // TOOL DEFINITIONS EXPORT
584
+ // =============================================================================
585
+ export const clmTools = {
586
+ ocr_contract_extract: {
587
+ description: '[PROCESSING] Extract contract-specific information using predefined schemas (contract_metadata, financial_terms, obligations, renewal_termination, compliance_clauses). Uses pattern matching on document chunks - no external API calls. Returns extracted fields organized by schema.',
588
+ inputSchema: {
589
+ document_id: z.string().min(1).describe('Document ID to extract contract information from'),
590
+ schemas: z.array(z.string()).optional().describe(`Schema names to extract (default: all). Available: ${ALL_CONTRACT_SCHEMAS.map((s) => s.name).join(', ')}`),
591
+ },
592
+ handler: handleContractExtract,
593
+ },
594
+ ocr_obligation_list: {
595
+ description: '[STATUS] List contract obligations with filters. Auto-marks overdue obligations. Filter by document, type, status, due date range, or responsible party.',
596
+ inputSchema: {
597
+ document_id: z.string().min(1).optional().describe('Filter by document ID'),
598
+ obligation_type: ObligationTypeSchema.optional().describe('Filter by type: payment, delivery, notification, renewal, termination, compliance, reporting, approval, other'),
599
+ status: ObligationStatusSchema.optional().describe('Filter by status: active, fulfilled, overdue, waived, expired'),
600
+ due_before: z.string().optional().describe('Filter obligations due before this ISO date'),
601
+ due_after: z.string().optional().describe('Filter obligations due after this ISO date'),
602
+ responsible_party: z.string().optional().describe('Filter by responsible party name'),
603
+ limit: z.number().int().min(1).max(500).default(50).describe('Max results (1-500)'),
604
+ offset: z.number().int().min(0).default(0).describe('Pagination offset'),
605
+ },
606
+ handler: handleObligationList,
607
+ },
608
+ ocr_obligation_update: {
609
+ description: '[MANAGE] Update obligation status (active, fulfilled, overdue, waived, expired). Optionally provide a reason which is recorded in status history.',
610
+ inputSchema: {
611
+ obligation_id: z.string().min(1).describe('Obligation ID to update'),
612
+ status: ObligationStatusSchema.describe('New status: active, fulfilled, overdue, waived, or expired'),
613
+ reason: z.string().max(1000).optional().describe('Reason for status change (recorded in history)'),
614
+ },
615
+ handler: handleObligationUpdate,
616
+ },
617
+ ocr_obligation_calendar: {
618
+ description: '[STATUS] Calendar view of upcoming obligation deadlines grouped by month. Auto-marks overdue obligations. Filter by status or document.',
619
+ inputSchema: {
620
+ months_ahead: z.number().int().min(1).max(24).default(3).describe('Number of months to look ahead (1-24, default 3)'),
621
+ status: ObligationStatusSchema.optional().describe('Filter by status'),
622
+ document_id: z.string().min(1).optional().describe('Filter by document ID'),
623
+ },
624
+ handler: handleObligationCalendar,
625
+ },
626
+ ocr_playbook_create: {
627
+ description: '[SETUP] Create a playbook of preferred contract terms for deviation detection. Each clause has a name, preferred text, severity (critical/major/minor), and optional alternatives.',
628
+ inputSchema: {
629
+ name: z.string().min(1).max(200).describe('Playbook name'),
630
+ description: z.string().max(2000).optional().describe('Playbook description'),
631
+ clauses: z.array(z.object({
632
+ clause_name: z.string().min(1).max(200).describe('Clause identifier name'),
633
+ preferred_text: z.string().min(1).max(5000).describe('Preferred contract language'),
634
+ severity: ClauseSeveritySchema.describe('Deviation severity: critical, major, or minor'),
635
+ alternatives: z.array(z.string().max(5000)).default([]).describe('Acceptable alternative texts'),
636
+ })).min(1).describe('Array of clause definitions'),
637
+ },
638
+ handler: handlePlaybookCreate,
639
+ },
640
+ ocr_playbook_compare: {
641
+ description: '[ANALYSIS] Compare a document against a playbook to detect deviations from preferred terms. Returns clause-by-clause match/deviation status with compliance score.',
642
+ inputSchema: {
643
+ document_id: z.string().min(1).describe('Document ID to compare'),
644
+ playbook_id: z.string().min(1).describe('Playbook ID to compare against'),
645
+ },
646
+ handler: handlePlaybookCompare,
647
+ },
648
+ ocr_playbook_list: {
649
+ description: '[STATUS] List all available playbooks with clause counts. Use to find a playbook for comparison.',
650
+ inputSchema: {},
651
+ handler: handlePlaybookList,
652
+ },
653
+ ocr_document_summarize: {
654
+ description: '[ANALYSIS] Generate a structured summary of a document from its chunks. Returns key sections, content types, word count, and section previews. No external API calls.',
655
+ inputSchema: {
656
+ document_id: z.string().min(1).describe('Document ID to summarize'),
657
+ },
658
+ handler: handleDocumentSummarize,
659
+ },
660
+ ocr_corpus_summarize: {
661
+ description: '[ANALYSIS] Summarize the entire document corpus. Returns document count, total pages/words, content type distribution, and top sections across all documents.',
662
+ inputSchema: {
663
+ limit: z.number().int().min(1).max(500).default(100).describe('Max documents to include (1-500, default 100)'),
664
+ },
665
+ handler: handleCorpusSummarize,
666
+ },
667
+ };
668
+ //# sourceMappingURL=clm.js.map