ocr-provenance-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocr-provenance-mcp might be problematic. Click here for more details.

Files changed (578) hide show
  1. package/.env.example +55 -0
  2. package/LICENSE +78 -0
  3. package/README.md +1154 -0
  4. package/dist/bin-http.d.ts +24 -0
  5. package/dist/bin-http.d.ts.map +1 -0
  6. package/dist/bin-http.js +275 -0
  7. package/dist/bin-http.js.map +1 -0
  8. package/dist/bin-setup.d.ts +11 -0
  9. package/dist/bin-setup.d.ts.map +1 -0
  10. package/dist/bin-setup.js +610 -0
  11. package/dist/bin-setup.js.map +1 -0
  12. package/dist/bin.d.ts +16 -0
  13. package/dist/bin.d.ts.map +1 -0
  14. package/dist/bin.js +16 -0
  15. package/dist/bin.js.map +1 -0
  16. package/dist/index.d.ts +13 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +90 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/models/chunk.d.ts +136 -0
  21. package/dist/models/chunk.d.ts.map +1 -0
  22. package/dist/models/chunk.js +27 -0
  23. package/dist/models/chunk.js.map +1 -0
  24. package/dist/models/cluster.d.ts +79 -0
  25. package/dist/models/cluster.d.ts.map +1 -0
  26. package/dist/models/cluster.js +10 -0
  27. package/dist/models/cluster.js.map +1 -0
  28. package/dist/models/comparison.d.ts +62 -0
  29. package/dist/models/comparison.d.ts.map +1 -0
  30. package/dist/models/comparison.js +8 -0
  31. package/dist/models/comparison.js.map +1 -0
  32. package/dist/models/document.d.ts +104 -0
  33. package/dist/models/document.d.ts.map +1 -0
  34. package/dist/models/document.js +15 -0
  35. package/dist/models/document.js.map +1 -0
  36. package/dist/models/embedding.d.ts +87 -0
  37. package/dist/models/embedding.d.ts.map +1 -0
  38. package/dist/models/embedding.js +23 -0
  39. package/dist/models/embedding.js.map +1 -0
  40. package/dist/models/extraction.d.ts +15 -0
  41. package/dist/models/extraction.d.ts.map +1 -0
  42. package/dist/models/extraction.js +2 -0
  43. package/dist/models/extraction.js.map +1 -0
  44. package/dist/models/form-fill.d.ts +23 -0
  45. package/dist/models/form-fill.d.ts.map +1 -0
  46. package/dist/models/form-fill.js +2 -0
  47. package/dist/models/form-fill.js.map +1 -0
  48. package/dist/models/image.d.ts +177 -0
  49. package/dist/models/image.d.ts.map +1 -0
  50. package/dist/models/image.js +8 -0
  51. package/dist/models/image.js.map +1 -0
  52. package/dist/models/index.d.ts +14 -0
  53. package/dist/models/index.d.ts.map +1 -0
  54. package/dist/models/index.js +22 -0
  55. package/dist/models/index.js.map +1 -0
  56. package/dist/models/provenance.d.ts +174 -0
  57. package/dist/models/provenance.d.ts.map +1 -0
  58. package/dist/models/provenance.js +53 -0
  59. package/dist/models/provenance.js.map +1 -0
  60. package/dist/models/uploaded-file.d.ts +20 -0
  61. package/dist/models/uploaded-file.d.ts.map +1 -0
  62. package/dist/models/uploaded-file.js +2 -0
  63. package/dist/models/uploaded-file.js.map +1 -0
  64. package/dist/server/errors.d.ts +93 -0
  65. package/dist/server/errors.d.ts.map +1 -0
  66. package/dist/server/errors.js +256 -0
  67. package/dist/server/errors.js.map +1 -0
  68. package/dist/server/events.d.ts +36 -0
  69. package/dist/server/events.d.ts.map +1 -0
  70. package/dist/server/events.js +48 -0
  71. package/dist/server/events.js.map +1 -0
  72. package/dist/server/permissions.d.ts +26 -0
  73. package/dist/server/permissions.d.ts.map +1 -0
  74. package/dist/server/permissions.js +194 -0
  75. package/dist/server/permissions.js.map +1 -0
  76. package/dist/server/register-tools.d.ts +25 -0
  77. package/dist/server/register-tools.d.ts.map +1 -0
  78. package/dist/server/register-tools.js +102 -0
  79. package/dist/server/register-tools.js.map +1 -0
  80. package/dist/server/startup.d.ts +16 -0
  81. package/dist/server/startup.d.ts.map +1 -0
  82. package/dist/server/startup.js +37 -0
  83. package/dist/server/startup.js.map +1 -0
  84. package/dist/server/state.d.ts +166 -0
  85. package/dist/server/state.d.ts.map +1 -0
  86. package/dist/server/state.js +424 -0
  87. package/dist/server/state.js.map +1 -0
  88. package/dist/server/transports/http-transport.d.ts +37 -0
  89. package/dist/server/transports/http-transport.d.ts.map +1 -0
  90. package/dist/server/transports/http-transport.js +204 -0
  91. package/dist/server/transports/http-transport.js.map +1 -0
  92. package/dist/server/transports/index.d.ts +9 -0
  93. package/dist/server/transports/index.d.ts.map +1 -0
  94. package/dist/server/transports/index.js +9 -0
  95. package/dist/server/transports/index.js.map +1 -0
  96. package/dist/server/transports/session-manager.d.ts +40 -0
  97. package/dist/server/transports/session-manager.d.ts.map +1 -0
  98. package/dist/server/transports/session-manager.js +74 -0
  99. package/dist/server/transports/session-manager.js.map +1 -0
  100. package/dist/server/types.d.ts +82 -0
  101. package/dist/server/types.d.ts.map +1 -0
  102. package/dist/server/types.js +14 -0
  103. package/dist/server/types.js.map +1 -0
  104. package/dist/services/audit.d.ts +26 -0
  105. package/dist/services/audit.d.ts.map +1 -0
  106. package/dist/services/audit.js +43 -0
  107. package/dist/services/audit.js.map +1 -0
  108. package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
  109. package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
  110. package/dist/services/chunking/chunk-deduplicator.js +46 -0
  111. package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
  112. package/dist/services/chunking/chunk-merger.d.ts +26 -0
  113. package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
  114. package/dist/services/chunking/chunk-merger.js +94 -0
  115. package/dist/services/chunking/chunk-merger.js.map +1 -0
  116. package/dist/services/chunking/chunker.d.ts +62 -0
  117. package/dist/services/chunking/chunker.d.ts.map +1 -0
  118. package/dist/services/chunking/chunker.js +566 -0
  119. package/dist/services/chunking/chunker.js.map +1 -0
  120. package/dist/services/chunking/heading-normalizer.d.ts +33 -0
  121. package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
  122. package/dist/services/chunking/heading-normalizer.js +101 -0
  123. package/dist/services/chunking/heading-normalizer.js.map +1 -0
  124. package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
  125. package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
  126. package/dist/services/chunking/json-block-analyzer.js +1033 -0
  127. package/dist/services/chunking/json-block-analyzer.js.map +1 -0
  128. package/dist/services/chunking/markdown-parser.d.ts +75 -0
  129. package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
  130. package/dist/services/chunking/markdown-parser.js +428 -0
  131. package/dist/services/chunking/markdown-parser.js.map +1 -0
  132. package/dist/services/chunking/text-normalizer.d.ts +20 -0
  133. package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
  134. package/dist/services/chunking/text-normalizer.js +36 -0
  135. package/dist/services/chunking/text-normalizer.js.map +1 -0
  136. package/dist/services/clm/contract-schemas.d.ts +36 -0
  137. package/dist/services/clm/contract-schemas.d.ts.map +1 -0
  138. package/dist/services/clm/contract-schemas.js +92 -0
  139. package/dist/services/clm/contract-schemas.js.map +1 -0
  140. package/dist/services/clm/summarization.d.ts +46 -0
  141. package/dist/services/clm/summarization.d.ts.map +1 -0
  142. package/dist/services/clm/summarization.js +61 -0
  143. package/dist/services/clm/summarization.js.map +1 -0
  144. package/dist/services/clustering/clustering-service.d.ts +58 -0
  145. package/dist/services/clustering/clustering-service.d.ts.map +1 -0
  146. package/dist/services/clustering/clustering-service.js +467 -0
  147. package/dist/services/clustering/clustering-service.js.map +1 -0
  148. package/dist/services/comparison/diff-service.d.ts +41 -0
  149. package/dist/services/comparison/diff-service.d.ts.map +1 -0
  150. package/dist/services/comparison/diff-service.js +120 -0
  151. package/dist/services/comparison/diff-service.js.map +1 -0
  152. package/dist/services/embedding/embedder.d.ts +55 -0
  153. package/dist/services/embedding/embedder.d.ts.map +1 -0
  154. package/dist/services/embedding/embedder.js +202 -0
  155. package/dist/services/embedding/embedder.js.map +1 -0
  156. package/dist/services/embedding/nomic.d.ts +67 -0
  157. package/dist/services/embedding/nomic.d.ts.map +1 -0
  158. package/dist/services/embedding/nomic.js +280 -0
  159. package/dist/services/embedding/nomic.js.map +1 -0
  160. package/dist/services/gemini/circuit-breaker.d.ts +106 -0
  161. package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
  162. package/dist/services/gemini/circuit-breaker.js +237 -0
  163. package/dist/services/gemini/circuit-breaker.js.map +1 -0
  164. package/dist/services/gemini/client.d.ts +173 -0
  165. package/dist/services/gemini/client.d.ts.map +1 -0
  166. package/dist/services/gemini/client.js +483 -0
  167. package/dist/services/gemini/client.js.map +1 -0
  168. package/dist/services/gemini/config.d.ts +116 -0
  169. package/dist/services/gemini/config.d.ts.map +1 -0
  170. package/dist/services/gemini/config.js +118 -0
  171. package/dist/services/gemini/config.js.map +1 -0
  172. package/dist/services/gemini/index.d.ts +9 -0
  173. package/dist/services/gemini/index.d.ts.map +1 -0
  174. package/dist/services/gemini/index.js +13 -0
  175. package/dist/services/gemini/index.js.map +1 -0
  176. package/dist/services/gemini/rate-limiter.d.ts +62 -0
  177. package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
  178. package/dist/services/gemini/rate-limiter.js +120 -0
  179. package/dist/services/gemini/rate-limiter.js.map +1 -0
  180. package/dist/services/images/extractor.d.ts +88 -0
  181. package/dist/services/images/extractor.d.ts.map +1 -0
  182. package/dist/services/images/extractor.js +340 -0
  183. package/dist/services/images/extractor.js.map +1 -0
  184. package/dist/services/images/optimizer.d.ts +130 -0
  185. package/dist/services/images/optimizer.d.ts.map +1 -0
  186. package/dist/services/images/optimizer.js +228 -0
  187. package/dist/services/images/optimizer.js.map +1 -0
  188. package/dist/services/ocr/datalab.d.ts +64 -0
  189. package/dist/services/ocr/datalab.d.ts.map +1 -0
  190. package/dist/services/ocr/datalab.js +425 -0
  191. package/dist/services/ocr/datalab.js.map +1 -0
  192. package/dist/services/ocr/errors.d.ts +38 -0
  193. package/dist/services/ocr/errors.d.ts.map +1 -0
  194. package/dist/services/ocr/errors.js +83 -0
  195. package/dist/services/ocr/errors.js.map +1 -0
  196. package/dist/services/ocr/file-manager.d.ts +76 -0
  197. package/dist/services/ocr/file-manager.d.ts.map +1 -0
  198. package/dist/services/ocr/file-manager.js +238 -0
  199. package/dist/services/ocr/file-manager.js.map +1 -0
  200. package/dist/services/ocr/form-fill.d.ts +48 -0
  201. package/dist/services/ocr/form-fill.d.ts.map +1 -0
  202. package/dist/services/ocr/form-fill.js +213 -0
  203. package/dist/services/ocr/form-fill.js.map +1 -0
  204. package/dist/services/ocr/processor.d.ts +95 -0
  205. package/dist/services/ocr/processor.d.ts.map +1 -0
  206. package/dist/services/ocr/processor.js +259 -0
  207. package/dist/services/ocr/processor.js.map +1 -0
  208. package/dist/services/provenance/agent-metadata.d.ts +82 -0
  209. package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
  210. package/dist/services/provenance/agent-metadata.js +106 -0
  211. package/dist/services/provenance/agent-metadata.js.map +1 -0
  212. package/dist/services/provenance/chain-hash.d.ts +57 -0
  213. package/dist/services/provenance/chain-hash.d.ts.map +1 -0
  214. package/dist/services/provenance/chain-hash.js +131 -0
  215. package/dist/services/provenance/chain-hash.js.map +1 -0
  216. package/dist/services/provenance/exporter.d.ts +202 -0
  217. package/dist/services/provenance/exporter.d.ts.map +1 -0
  218. package/dist/services/provenance/exporter.js +457 -0
  219. package/dist/services/provenance/exporter.js.map +1 -0
  220. package/dist/services/provenance/index.d.ts +15 -0
  221. package/dist/services/provenance/index.d.ts.map +1 -0
  222. package/dist/services/provenance/index.js +17 -0
  223. package/dist/services/provenance/index.js.map +1 -0
  224. package/dist/services/provenance/tracker.d.ts +138 -0
  225. package/dist/services/provenance/tracker.d.ts.map +1 -0
  226. package/dist/services/provenance/tracker.js +293 -0
  227. package/dist/services/provenance/tracker.js.map +1 -0
  228. package/dist/services/provenance/verifier.d.ts +153 -0
  229. package/dist/services/provenance/verifier.d.ts.map +1 -0
  230. package/dist/services/provenance/verifier.js +536 -0
  231. package/dist/services/provenance/verifier.js.map +1 -0
  232. package/dist/services/python-pool.d.ts +70 -0
  233. package/dist/services/python-pool.d.ts.map +1 -0
  234. package/dist/services/python-pool.js +265 -0
  235. package/dist/services/python-pool.js.map +1 -0
  236. package/dist/services/search/bm25.d.ts +180 -0
  237. package/dist/services/search/bm25.d.ts.map +1 -0
  238. package/dist/services/search/bm25.js +656 -0
  239. package/dist/services/search/bm25.js.map +1 -0
  240. package/dist/services/search/fusion.d.ts +103 -0
  241. package/dist/services/search/fusion.d.ts.map +1 -0
  242. package/dist/services/search/fusion.js +122 -0
  243. package/dist/services/search/fusion.js.map +1 -0
  244. package/dist/services/search/local-reranker.d.ts +30 -0
  245. package/dist/services/search/local-reranker.d.ts.map +1 -0
  246. package/dist/services/search/local-reranker.js +123 -0
  247. package/dist/services/search/local-reranker.js.map +1 -0
  248. package/dist/services/search/quality.d.ts +11 -0
  249. package/dist/services/search/quality.d.ts.map +1 -0
  250. package/dist/services/search/quality.js +17 -0
  251. package/dist/services/search/quality.js.map +1 -0
  252. package/dist/services/search/query-classifier.d.ts +34 -0
  253. package/dist/services/search/query-classifier.d.ts.map +1 -0
  254. package/dist/services/search/query-classifier.js +114 -0
  255. package/dist/services/search/query-classifier.js.map +1 -0
  256. package/dist/services/search/query-expander.d.ts +73 -0
  257. package/dist/services/search/query-expander.d.ts.map +1 -0
  258. package/dist/services/search/query-expander.js +281 -0
  259. package/dist/services/search/query-expander.js.map +1 -0
  260. package/dist/services/search/reranker.d.ts +44 -0
  261. package/dist/services/search/reranker.d.ts.map +1 -0
  262. package/dist/services/search/reranker.js +101 -0
  263. package/dist/services/search/reranker.js.map +1 -0
  264. package/dist/services/storage/database/annotation-operations.d.ts +113 -0
  265. package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
  266. package/dist/services/storage/database/annotation-operations.js +177 -0
  267. package/dist/services/storage/database/annotation-operations.js.map +1 -0
  268. package/dist/services/storage/database/approval-operations.d.ts +132 -0
  269. package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
  270. package/dist/services/storage/database/approval-operations.js +206 -0
  271. package/dist/services/storage/database/approval-operations.js.map +1 -0
  272. package/dist/services/storage/database/chunk-operations.d.ts +132 -0
  273. package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
  274. package/dist/services/storage/database/chunk-operations.js +306 -0
  275. package/dist/services/storage/database/chunk-operations.js.map +1 -0
  276. package/dist/services/storage/database/cluster-operations.d.ts +97 -0
  277. package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
  278. package/dist/services/storage/database/cluster-operations.js +258 -0
  279. package/dist/services/storage/database/cluster-operations.js.map +1 -0
  280. package/dist/services/storage/database/comparison-operations.d.ts +41 -0
  281. package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
  282. package/dist/services/storage/database/comparison-operations.js +65 -0
  283. package/dist/services/storage/database/comparison-operations.js.map +1 -0
  284. package/dist/services/storage/database/converters.d.ts +36 -0
  285. package/dist/services/storage/database/converters.d.ts.map +1 -0
  286. package/dist/services/storage/database/converters.js +244 -0
  287. package/dist/services/storage/database/converters.js.map +1 -0
  288. package/dist/services/storage/database/document-operations.d.ts +145 -0
  289. package/dist/services/storage/database/document-operations.d.ts.map +1 -0
  290. package/dist/services/storage/database/document-operations.js +498 -0
  291. package/dist/services/storage/database/document-operations.js.map +1 -0
  292. package/dist/services/storage/database/embedding-operations.d.ts +130 -0
  293. package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
  294. package/dist/services/storage/database/embedding-operations.js +315 -0
  295. package/dist/services/storage/database/embedding-operations.js.map +1 -0
  296. package/dist/services/storage/database/extraction-operations.d.ts +47 -0
  297. package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
  298. package/dist/services/storage/database/extraction-operations.js +85 -0
  299. package/dist/services/storage/database/extraction-operations.js.map +1 -0
  300. package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
  301. package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
  302. package/dist/services/storage/database/form-fill-operations.js +116 -0
  303. package/dist/services/storage/database/form-fill-operations.js.map +1 -0
  304. package/dist/services/storage/database/helpers.d.ts +29 -0
  305. package/dist/services/storage/database/helpers.d.ts.map +1 -0
  306. package/dist/services/storage/database/helpers.js +55 -0
  307. package/dist/services/storage/database/helpers.js.map +1 -0
  308. package/dist/services/storage/database/image-operations.d.ts +202 -0
  309. package/dist/services/storage/database/image-operations.d.ts.map +1 -0
  310. package/dist/services/storage/database/image-operations.js +484 -0
  311. package/dist/services/storage/database/image-operations.js.map +1 -0
  312. package/dist/services/storage/database/index.d.ts +13 -0
  313. package/dist/services/storage/database/index.d.ts.map +1 -0
  314. package/dist/services/storage/database/index.js +16 -0
  315. package/dist/services/storage/database/index.js.map +1 -0
  316. package/dist/services/storage/database/lock-operations.d.ts +59 -0
  317. package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
  318. package/dist/services/storage/database/lock-operations.js +89 -0
  319. package/dist/services/storage/database/lock-operations.js.map +1 -0
  320. package/dist/services/storage/database/obligation-operations.d.ts +88 -0
  321. package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
  322. package/dist/services/storage/database/obligation-operations.js +206 -0
  323. package/dist/services/storage/database/obligation-operations.js.map +1 -0
  324. package/dist/services/storage/database/ocr-operations.d.ts +33 -0
  325. package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
  326. package/dist/services/storage/database/ocr-operations.js +70 -0
  327. package/dist/services/storage/database/ocr-operations.js.map +1 -0
  328. package/dist/services/storage/database/playbook-operations.d.ts +72 -0
  329. package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
  330. package/dist/services/storage/database/playbook-operations.js +247 -0
  331. package/dist/services/storage/database/playbook-operations.js.map +1 -0
  332. package/dist/services/storage/database/provenance-operations.d.ts +112 -0
  333. package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
  334. package/dist/services/storage/database/provenance-operations.js +251 -0
  335. package/dist/services/storage/database/provenance-operations.js.map +1 -0
  336. package/dist/services/storage/database/service.d.ts +142 -0
  337. package/dist/services/storage/database/service.d.ts.map +1 -0
  338. package/dist/services/storage/database/service.js +310 -0
  339. package/dist/services/storage/database/service.js.map +1 -0
  340. package/dist/services/storage/database/static-operations.d.ts +30 -0
  341. package/dist/services/storage/database/static-operations.d.ts.map +1 -0
  342. package/dist/services/storage/database/static-operations.js +218 -0
  343. package/dist/services/storage/database/static-operations.js.map +1 -0
  344. package/dist/services/storage/database/stats-operations.d.ts +101 -0
  345. package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
  346. package/dist/services/storage/database/stats-operations.js +394 -0
  347. package/dist/services/storage/database/stats-operations.js.map +1 -0
  348. package/dist/services/storage/database/tag-operations.d.ts +76 -0
  349. package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
  350. package/dist/services/storage/database/tag-operations.js +178 -0
  351. package/dist/services/storage/database/tag-operations.js.map +1 -0
  352. package/dist/services/storage/database/types.d.ts +286 -0
  353. package/dist/services/storage/database/types.d.ts.map +1 -0
  354. package/dist/services/storage/database/types.js +39 -0
  355. package/dist/services/storage/database/types.js.map +1 -0
  356. package/dist/services/storage/database/upload-operations.d.ts +71 -0
  357. package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
  358. package/dist/services/storage/database/upload-operations.js +124 -0
  359. package/dist/services/storage/database/upload-operations.js.map +1 -0
  360. package/dist/services/storage/database/user-operations.d.ts +102 -0
  361. package/dist/services/storage/database/user-operations.d.ts.map +1 -0
  362. package/dist/services/storage/database/user-operations.js +151 -0
  363. package/dist/services/storage/database/user-operations.js.map +1 -0
  364. package/dist/services/storage/database/workflow-operations.d.ts +98 -0
  365. package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
  366. package/dist/services/storage/database/workflow-operations.js +157 -0
  367. package/dist/services/storage/database/workflow-operations.js.map +1 -0
  368. package/dist/services/storage/database.d.ts +16 -0
  369. package/dist/services/storage/database.d.ts.map +1 -0
  370. package/dist/services/storage/database.js +15 -0
  371. package/dist/services/storage/database.js.map +1 -0
  372. package/dist/services/storage/index.d.ts +10 -0
  373. package/dist/services/storage/index.d.ts.map +1 -0
  374. package/dist/services/storage/index.js +10 -0
  375. package/dist/services/storage/index.js.map +1 -0
  376. package/dist/services/storage/migrations/index.d.ts +16 -0
  377. package/dist/services/storage/migrations/index.d.ts.map +1 -0
  378. package/dist/services/storage/migrations/index.js +20 -0
  379. package/dist/services/storage/migrations/index.js.map +1 -0
  380. package/dist/services/storage/migrations/operations.d.ts +40 -0
  381. package/dist/services/storage/migrations/operations.d.ts.map +1 -0
  382. package/dist/services/storage/migrations/operations.js +2910 -0
  383. package/dist/services/storage/migrations/operations.js.map +1 -0
  384. package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
  385. package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
  386. package/dist/services/storage/migrations/schema-definitions.js +1006 -0
  387. package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
  388. package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
  389. package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
  390. package/dist/services/storage/migrations/schema-helpers.js +176 -0
  391. package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
  392. package/dist/services/storage/migrations/types.d.ts +15 -0
  393. package/dist/services/storage/migrations/types.d.ts.map +1 -0
  394. package/dist/services/storage/migrations/types.js +21 -0
  395. package/dist/services/storage/migrations/types.js.map +1 -0
  396. package/dist/services/storage/migrations/verification.d.ts +20 -0
  397. package/dist/services/storage/migrations/verification.d.ts.map +1 -0
  398. package/dist/services/storage/migrations/verification.js +78 -0
  399. package/dist/services/storage/migrations/verification.js.map +1 -0
  400. package/dist/services/storage/migrations.d.ts +16 -0
  401. package/dist/services/storage/migrations.d.ts.map +1 -0
  402. package/dist/services/storage/migrations.js +17 -0
  403. package/dist/services/storage/migrations.js.map +1 -0
  404. package/dist/services/storage/types.d.ts +12 -0
  405. package/dist/services/storage/types.d.ts.map +1 -0
  406. package/dist/services/storage/types.js +5 -0
  407. package/dist/services/storage/types.js.map +1 -0
  408. package/dist/services/storage/vector.d.ts +208 -0
  409. package/dist/services/storage/vector.d.ts.map +1 -0
  410. package/dist/services/storage/vector.js +526 -0
  411. package/dist/services/storage/vector.js.map +1 -0
  412. package/dist/services/vlm/pipeline.d.ts +194 -0
  413. package/dist/services/vlm/pipeline.d.ts.map +1 -0
  414. package/dist/services/vlm/pipeline.js +800 -0
  415. package/dist/services/vlm/pipeline.js.map +1 -0
  416. package/dist/services/vlm/prompts.d.ts +171 -0
  417. package/dist/services/vlm/prompts.d.ts.map +1 -0
  418. package/dist/services/vlm/prompts.js +229 -0
  419. package/dist/services/vlm/prompts.js.map +1 -0
  420. package/dist/services/vlm/service.d.ts +174 -0
  421. package/dist/services/vlm/service.d.ts.map +1 -0
  422. package/dist/services/vlm/service.js +256 -0
  423. package/dist/services/vlm/service.js.map +1 -0
  424. package/dist/services/webhook-delivery.d.ts +4 -0
  425. package/dist/services/webhook-delivery.d.ts.map +1 -0
  426. package/dist/services/webhook-delivery.js +140 -0
  427. package/dist/services/webhook-delivery.js.map +1 -0
  428. package/dist/tools/chunks.d.ts +19 -0
  429. package/dist/tools/chunks.d.ts.map +1 -0
  430. package/dist/tools/chunks.js +392 -0
  431. package/dist/tools/chunks.js.map +1 -0
  432. package/dist/tools/clm.d.ts +16 -0
  433. package/dist/tools/clm.d.ts.map +1 -0
  434. package/dist/tools/clm.js +668 -0
  435. package/dist/tools/clm.js.map +1 -0
  436. package/dist/tools/clustering.d.ts +13 -0
  437. package/dist/tools/clustering.d.ts.map +1 -0
  438. package/dist/tools/clustering.js +498 -0
  439. package/dist/tools/clustering.js.map +1 -0
  440. package/dist/tools/collaboration.d.ts +15 -0
  441. package/dist/tools/collaboration.d.ts.map +1 -0
  442. package/dist/tools/collaboration.js +516 -0
  443. package/dist/tools/collaboration.js.map +1 -0
  444. package/dist/tools/comparison.d.ts +13 -0
  445. package/dist/tools/comparison.d.ts.map +1 -0
  446. package/dist/tools/comparison.js +735 -0
  447. package/dist/tools/comparison.js.map +1 -0
  448. package/dist/tools/compliance.d.ts +15 -0
  449. package/dist/tools/compliance.d.ts.map +1 -0
  450. package/dist/tools/compliance.js +640 -0
  451. package/dist/tools/compliance.js.map +1 -0
  452. package/dist/tools/config.d.ts +19 -0
  453. package/dist/tools/config.d.ts.map +1 -0
  454. package/dist/tools/config.js +213 -0
  455. package/dist/tools/config.js.map +1 -0
  456. package/dist/tools/database.d.ts +62 -0
  457. package/dist/tools/database.d.ts.map +1 -0
  458. package/dist/tools/database.js +288 -0
  459. package/dist/tools/database.js.map +1 -0
  460. package/dist/tools/documents.d.ts +61 -0
  461. package/dist/tools/documents.d.ts.map +1 -0
  462. package/dist/tools/documents.js +1624 -0
  463. package/dist/tools/documents.js.map +1 -0
  464. package/dist/tools/embeddings.d.ts +14 -0
  465. package/dist/tools/embeddings.d.ts.map +1 -0
  466. package/dist/tools/embeddings.js +626 -0
  467. package/dist/tools/embeddings.js.map +1 -0
  468. package/dist/tools/evaluation.d.ts +25 -0
  469. package/dist/tools/evaluation.d.ts.map +1 -0
  470. package/dist/tools/evaluation.js +523 -0
  471. package/dist/tools/evaluation.js.map +1 -0
  472. package/dist/tools/events.d.ts +16 -0
  473. package/dist/tools/events.d.ts.map +1 -0
  474. package/dist/tools/events.js +493 -0
  475. package/dist/tools/events.js.map +1 -0
  476. package/dist/tools/extraction-structured.d.ts +13 -0
  477. package/dist/tools/extraction-structured.d.ts.map +1 -0
  478. package/dist/tools/extraction-structured.js +390 -0
  479. package/dist/tools/extraction-structured.js.map +1 -0
  480. package/dist/tools/extraction.d.ts +24 -0
  481. package/dist/tools/extraction.d.ts.map +1 -0
  482. package/dist/tools/extraction.js +424 -0
  483. package/dist/tools/extraction.js.map +1 -0
  484. package/dist/tools/file-management.d.ts +14 -0
  485. package/dist/tools/file-management.d.ts.map +1 -0
  486. package/dist/tools/file-management.js +523 -0
  487. package/dist/tools/file-management.js.map +1 -0
  488. package/dist/tools/form-fill.d.ts +13 -0
  489. package/dist/tools/form-fill.d.ts.map +1 -0
  490. package/dist/tools/form-fill.js +250 -0
  491. package/dist/tools/form-fill.js.map +1 -0
  492. package/dist/tools/health.d.ts +19 -0
  493. package/dist/tools/health.d.ts.map +1 -0
  494. package/dist/tools/health.js +229 -0
  495. package/dist/tools/health.js.map +1 -0
  496. package/dist/tools/images.d.ts +54 -0
  497. package/dist/tools/images.d.ts.map +1 -0
  498. package/dist/tools/images.js +787 -0
  499. package/dist/tools/images.js.map +1 -0
  500. package/dist/tools/ingestion.d.ts +94 -0
  501. package/dist/tools/ingestion.d.ts.map +1 -0
  502. package/dist/tools/ingestion.js +1659 -0
  503. package/dist/tools/ingestion.js.map +1 -0
  504. package/dist/tools/intelligence.d.ts +18 -0
  505. package/dist/tools/intelligence.d.ts.map +1 -0
  506. package/dist/tools/intelligence.js +1039 -0
  507. package/dist/tools/intelligence.js.map +1 -0
  508. package/dist/tools/provenance.d.ts +51 -0
  509. package/dist/tools/provenance.d.ts.map +1 -0
  510. package/dist/tools/provenance.js +691 -0
  511. package/dist/tools/provenance.js.map +1 -0
  512. package/dist/tools/reports.d.ts +41 -0
  513. package/dist/tools/reports.d.ts.map +1 -0
  514. package/dist/tools/reports.js +1394 -0
  515. package/dist/tools/reports.js.map +1 -0
  516. package/dist/tools/search.d.ts +35 -0
  517. package/dist/tools/search.d.ts.map +1 -0
  518. package/dist/tools/search.js +2528 -0
  519. package/dist/tools/search.js.map +1 -0
  520. package/dist/tools/shared.d.ts +52 -0
  521. package/dist/tools/shared.d.ts.map +1 -0
  522. package/dist/tools/shared.js +54 -0
  523. package/dist/tools/shared.js.map +1 -0
  524. package/dist/tools/tags.d.ts +15 -0
  525. package/dist/tools/tags.d.ts.map +1 -0
  526. package/dist/tools/tags.js +287 -0
  527. package/dist/tools/tags.js.map +1 -0
  528. package/dist/tools/timeline.d.ts +15 -0
  529. package/dist/tools/timeline.d.ts.map +1 -0
  530. package/dist/tools/timeline.js +14 -0
  531. package/dist/tools/timeline.js.map +1 -0
  532. package/dist/tools/users.d.ts +14 -0
  533. package/dist/tools/users.d.ts.map +1 -0
  534. package/dist/tools/users.js +257 -0
  535. package/dist/tools/users.js.map +1 -0
  536. package/dist/tools/vlm.d.ts +40 -0
  537. package/dist/tools/vlm.d.ts.map +1 -0
  538. package/dist/tools/vlm.js +475 -0
  539. package/dist/tools/vlm.js.map +1 -0
  540. package/dist/tools/workflow.d.ts +16 -0
  541. package/dist/tools/workflow.d.ts.map +1 -0
  542. package/dist/tools/workflow.js +495 -0
  543. package/dist/tools/workflow.js.map +1 -0
  544. package/dist/utils/backoff.d.ts +53 -0
  545. package/dist/utils/backoff.d.ts.map +1 -0
  546. package/dist/utils/backoff.js +78 -0
  547. package/dist/utils/backoff.js.map +1 -0
  548. package/dist/utils/config-persistence.d.ts +33 -0
  549. package/dist/utils/config-persistence.d.ts.map +1 -0
  550. package/dist/utils/config-persistence.js +61 -0
  551. package/dist/utils/config-persistence.js.map +1 -0
  552. package/dist/utils/hash.d.ts +65 -0
  553. package/dist/utils/hash.d.ts.map +1 -0
  554. package/dist/utils/hash.js +146 -0
  555. package/dist/utils/hash.js.map +1 -0
  556. package/dist/utils/math.d.ts +21 -0
  557. package/dist/utils/math.d.ts.map +1 -0
  558. package/dist/utils/math.js +39 -0
  559. package/dist/utils/math.js.map +1 -0
  560. package/dist/utils/validation.d.ts +697 -0
  561. package/dist/utils/validation.d.ts.map +1 -0
  562. package/dist/utils/validation.js +529 -0
  563. package/dist/utils/validation.js.map +1 -0
  564. package/package.json +96 -0
  565. package/python/.gitkeep +0 -0
  566. package/python/__init__.py +104 -0
  567. package/python/clustering_worker.py +440 -0
  568. package/python/docx_image_extractor.py +524 -0
  569. package/python/embedding_worker.py +552 -0
  570. package/python/file_manager_worker.py +564 -0
  571. package/python/form_fill_worker.py +399 -0
  572. package/python/gpu_utils.py +582 -0
  573. package/python/image_extractor.py +317 -0
  574. package/python/image_optimizer.py +444 -0
  575. package/python/ocr_worker.py +712 -0
  576. package/python/pyproject.toml +76 -0
  577. package/python/requirements.txt +51 -0
  578. package/python/reranker_worker.py +87 -0
@@ -0,0 +1,467 @@
1
+ /**
2
+ * Clustering Service - Orchestrates document clustering pipeline
3
+ *
4
+ * Pipeline:
5
+ * 1. Fetch document-level embeddings (average-pool chunk embeddings, L2-normalize)
6
+ * 2. Call Python clustering worker (HDBSCAN / agglomerative / kmeans)
7
+ * 3. Store cluster + document_cluster records with provenance
8
+ *
9
+ * CRITICAL: NEVER use console.log() - stdout is JSON-RPC protocol.
10
+ * Use console.error() for all logging.
11
+ *
12
+ * @module services/clustering/clustering-service
13
+ */
14
+ import { v4 as uuidv4 } from 'uuid';
15
+ import { PythonShell } from 'python-shell';
16
+ import path from 'path';
17
+ import { fileURLToPath } from 'url';
18
+ import { getProvenanceTracker } from '../provenance/index.js';
19
+ import { ProvenanceType } from '../../models/provenance.js';
20
+ import { insertCluster, insertDocumentCluster } from '../storage/database/cluster-operations.js';
21
+ import { computeHash } from '../../utils/hash.js';
22
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
23
+ class ClusteringError extends Error {
24
+ code;
25
+ details;
26
+ constructor(message, code, details) {
27
+ super(message);
28
+ this.code = code;
29
+ this.details = details;
30
+ this.name = 'ClusteringError';
31
+ }
32
+ }
33
+ // ═══════════════════════════════════════════════════════════════════════════════
34
+ // SERVICE
35
+ // ═══════════════════════════════════════════════════════════════════════════════
36
+ /** Worker timeout: 5 minutes */
37
+ const WORKER_TIMEOUT_MS = 300_000;
38
+ /** Max stderr accumulation: 10KB */
39
+ const MAX_STDERR_LENGTH = 10_240;
40
+ /**
41
+ * Compute document-level embeddings by average-pooling chunk embeddings.
42
+ *
43
+ * For each document that has embeddings, fetches all chunk-based vectors
44
+ * from vec_embeddings, computes the element-wise mean, and L2-normalizes.
45
+ *
46
+ * sqlite-vec has NO native vector averaging -- we extract to TypeScript.
47
+ *
48
+ * @param conn - Raw better-sqlite3 connection (for direct vec_embeddings queries)
49
+ * @param documentIds - Optional filter; if empty, includes all documents with embeddings
50
+ * @returns Array of DocumentEmbedding with 768-dim Float32Array per document
51
+ */
52
+ export function computeDocumentEmbeddings(conn, documentIds) {
53
+ // chunk_id IS NOT NULL ensures we only get chunk embeddings (not VLM or extraction)
54
+ const hasFilter = documentIds && documentIds.length > 0;
55
+ const filterClause = hasFilter
56
+ ? ` AND e.document_id IN (${documentIds.map(() => '?').join(', ')})`
57
+ : '';
58
+ const rows = conn
59
+ .prepare(`
60
+ SELECT e.document_id, v.vector
61
+ FROM vec_embeddings v
62
+ JOIN embeddings e ON e.id = v.embedding_id
63
+ WHERE e.chunk_id IS NOT NULL${filterClause}
64
+ ORDER BY e.document_id, e.chunk_index
65
+ `)
66
+ .all(...(hasFilter ? documentIds : []));
67
+ // Group by document_id
68
+ const docVectors = new Map();
69
+ for (const row of rows) {
70
+ const existing = docVectors.get(row.document_id);
71
+ if (existing) {
72
+ existing.push(row.vector);
73
+ }
74
+ else {
75
+ docVectors.set(row.document_id, [row.vector]);
76
+ }
77
+ }
78
+ // Average-pool + L2-normalize per document
79
+ const results = [];
80
+ for (const [docId, vectors] of docVectors) {
81
+ const averaged = averageVectors(vectors);
82
+ results.push({
83
+ document_id: docId,
84
+ embedding: averaged,
85
+ chunk_count: vectors.length,
86
+ });
87
+ }
88
+ return results;
89
+ }
90
+ /**
91
+ * Average-pool vectors and L2-normalize the result.
92
+ *
93
+ * @param vectors - Array of 768-dim vectors as Buffers (from sqlite-vec)
94
+ * @returns L2-normalized 768-dim Float32Array
95
+ */
96
+ function averageVectors(vectors) {
97
+ if (vectors.length === 0) {
98
+ return new Float32Array(768);
99
+ }
100
+ const dim = 768;
101
+ const sum = new Float64Array(dim); // Use float64 for accumulation precision
102
+ for (const buf of vectors) {
103
+ const f32 = new Float32Array(buf.buffer, buf.byteOffset, dim);
104
+ for (let i = 0; i < dim; i++) {
105
+ sum[i] += f32[i];
106
+ }
107
+ }
108
+ // Mean
109
+ const n = vectors.length;
110
+ const result = new Float32Array(dim);
111
+ for (let i = 0; i < dim; i++) {
112
+ result[i] = sum[i] / n;
113
+ }
114
+ // L2 normalize
115
+ let norm = 0;
116
+ for (let i = 0; i < dim; i++) {
117
+ norm += result[i] * result[i];
118
+ }
119
+ norm = Math.sqrt(norm);
120
+ if (norm > 0) {
121
+ for (let i = 0; i < dim; i++) {
122
+ result[i] /= norm;
123
+ }
124
+ }
125
+ return result;
126
+ }
127
+ /**
128
+ * Run the Python clustering worker via python-shell.
129
+ *
130
+ * Sends JSON to stdin, parses JSON from stdout.
131
+ * Uses the same PythonShell pattern as embedding_worker.py.
132
+ *
133
+ * @param embeddings - 2D array of embeddings [n_docs][768]
134
+ * @param documentIds - Document IDs matching embedding order
135
+ * @param config - Clustering algorithm configuration
136
+ * @param distanceMatrix - Optional precomputed distance matrix [n_docs][n_docs]
137
+ * @returns WorkerResult from Python
138
+ */
139
+ async function runClusteringWorker(embeddings, documentIds, config, distanceMatrix) {
140
+ const workerPath = path.resolve(__dirname, '../../../python/clustering_worker.py');
141
+ const workerInput = {
142
+ embeddings,
143
+ document_ids: documentIds,
144
+ algorithm: config.algorithm,
145
+ n_clusters: config.n_clusters,
146
+ min_cluster_size: config.min_cluster_size,
147
+ distance_threshold: config.distance_threshold,
148
+ linkage: config.linkage,
149
+ };
150
+ if (distanceMatrix) {
151
+ workerInput.distance_matrix = distanceMatrix;
152
+ }
153
+ const input = JSON.stringify(workerInput);
154
+ return new Promise((resolve, reject) => {
155
+ let settled = false;
156
+ const options = {
157
+ mode: 'text',
158
+ pythonOptions: ['-u'],
159
+ args: [],
160
+ };
161
+ const shell = new PythonShell(workerPath, options);
162
+ let stderr = '';
163
+ let sigkillTimer = null;
164
+ const cleanup = () => {
165
+ if (sigkillTimer) {
166
+ clearTimeout(sigkillTimer);
167
+ sigkillTimer = null;
168
+ }
169
+ };
170
+ const timer = setTimeout(() => {
171
+ if (settled)
172
+ return;
173
+ try {
174
+ shell.kill();
175
+ }
176
+ catch (error) {
177
+ console.error('[ClusteringService] Failed to kill shell on timeout:', error instanceof Error ? error.message : String(error));
178
+ /* ignore */
179
+ }
180
+ // M-6: SIGKILL escalation if SIGTERM doesn't exit within 5s
181
+ sigkillTimer = setTimeout(() => {
182
+ if (!settled) {
183
+ console.error(`[ClusteringService] Process did not exit after SIGTERM, sending SIGKILL (pid: ${shell.childProcess?.pid})`);
184
+ try {
185
+ shell.childProcess?.kill('SIGKILL');
186
+ }
187
+ catch (error) {
188
+ console.error('[ClusteringService] Failed to SIGKILL process (may already be gone):', error instanceof Error ? error.message : String(error));
189
+ }
190
+ }
191
+ if (!settled) {
192
+ settled = true;
193
+ reject(new ClusteringError(`Clustering worker timeout after ${WORKER_TIMEOUT_MS}ms (SIGKILL after 5s grace)`, 'WORKER_TIMEOUT', { stderr: stderr.substring(0, 1000) }));
194
+ }
195
+ }, 5000);
196
+ }, WORKER_TIMEOUT_MS);
197
+ const outputChunks = [];
198
+ shell.on('message', (msg) => {
199
+ outputChunks.push(msg);
200
+ });
201
+ shell.on('stderr', (err) => {
202
+ if (stderr.length < MAX_STDERR_LENGTH) {
203
+ stderr += err + '\n';
204
+ }
205
+ });
206
+ const handleEnd = (err) => {
207
+ clearTimeout(timer);
208
+ cleanup();
209
+ if (settled)
210
+ return;
211
+ settled = true;
212
+ if (err) {
213
+ console.error('[ClusterWorker] Error:', err.message);
214
+ if (stderr)
215
+ console.error('[ClusterWorker] Stderr:', stderr.substring(0, 1000));
216
+ }
217
+ const output = outputChunks.join('\n');
218
+ if (!output.trim()) {
219
+ if (err) {
220
+ reject(new ClusteringError(`Clustering worker failed: ${err.message}`, 'WORKER_FAILED', {
221
+ stderr: stderr.substring(0, 1000),
222
+ }));
223
+ }
224
+ else {
225
+ reject(new ClusteringError('Clustering worker produced no output', 'WORKER_FAILED', {
226
+ stderr: stderr.substring(0, 1000),
227
+ }));
228
+ }
229
+ return;
230
+ }
231
+ // Parse the last JSON line
232
+ const lines = output.trim().split('\n');
233
+ let parsed;
234
+ for (let i = lines.length - 1; i >= 0; i--) {
235
+ try {
236
+ parsed = JSON.parse(lines[i].trim());
237
+ break;
238
+ }
239
+ catch (error) {
240
+ console.error('[ClusteringService] JSON parse failed for output line, trying previous:', error instanceof Error ? error.message : String(error));
241
+ /* not JSON, try previous line */
242
+ }
243
+ }
244
+ if (parsed !== undefined) {
245
+ // Validate required fields exist on parsed worker result
246
+ if (typeof parsed !== 'object' || parsed === null) {
247
+ reject(new ClusteringError('Clustering worker returned non-object result', 'WORKER_PARSE_ERROR', { output: output.substring(0, 1000) }));
248
+ return;
249
+ }
250
+ if (typeof parsed.success !== 'boolean') {
251
+ reject(new ClusteringError('Clustering worker result missing required "success" field', 'WORKER_PARSE_ERROR', { output: output.substring(0, 1000) }));
252
+ return;
253
+ }
254
+ if (parsed.success &&
255
+ parsed.silhouette_score === undefined &&
256
+ parsed.labels === undefined) {
257
+ reject(new ClusteringError('Clustering worker success=true but missing "labels" and "silhouette_score" fields', 'WORKER_PARSE_ERROR', { output: output.substring(0, 1000) }));
258
+ return;
259
+ }
260
+ resolve(parsed);
261
+ }
262
+ else {
263
+ reject(new ClusteringError('Failed to parse clustering worker output as JSON', 'WORKER_PARSE_ERROR', { output: output.substring(0, 1000) }));
264
+ }
265
+ };
266
+ shell.send(input);
267
+ shell.end(handleEnd);
268
+ });
269
+ }
270
+ /**
271
+ * Compute cosine similarity between a document embedding and a centroid.
272
+ * Both vectors are assumed to be L2-normalized, so similarity = dot product.
273
+ */
274
+ export function cosineSimilarity(a, b) {
275
+ let dot = 0;
276
+ for (let i = 0; i < a.length; i++) {
277
+ dot += (a[i] ?? 0) * (b[i] ?? 0);
278
+ }
279
+ // Clamp to [0, 1] to handle floating point drift
280
+ return Math.max(0, Math.min(1, dot));
281
+ }
282
+ // ═══════════════════════════════════════════════════════════════════════════════
283
+ // MAIN CLUSTERING PIPELINE
284
+ // ═══════════════════════════════════════════════════════════════════════════════
285
+ /**
286
+ * Run the full clustering pipeline:
287
+ * 1. Compute document-level embeddings
288
+ * 2. Validate minimum document count
289
+ * 3. Call Python clustering worker
290
+ * 4. Create provenance records
291
+ * 5. Store clusters + document_cluster assignments
292
+ *
293
+ * @param db - DatabaseService instance
294
+ * @param vector - VectorService instance (unused directly but validates vec loaded)
295
+ * @param config - Clustering configuration
296
+ * @param documentIds - Optional filter (empty = all documents with embeddings)
297
+ * @returns ClusterRunResult with full results
298
+ */
299
+ export async function runClustering(db, _vector, config, documentIds) {
300
+ const startTime = performance.now();
301
+ const runId = uuidv4();
302
+ const conn = db.getConnection();
303
+ // Step 1: Compute document-level embeddings
304
+ console.error(`[CLUSTER] Computing document embeddings...`);
305
+ const docEmbeddings = computeDocumentEmbeddings(conn, documentIds);
306
+ if (docEmbeddings.length < 2) {
307
+ throw new ClusteringError(`At least 2 documents with embeddings required for clustering, got ${docEmbeddings.length}`, 'INSUFFICIENT_DOCUMENTS', { found: docEmbeddings.length, requested: documentIds?.length ?? 'all' });
308
+ }
309
+ console.error(`[CLUSTER] ${docEmbeddings.length} documents with embeddings`);
310
+ // Step 2: Prepare data for Python worker
311
+ const orderedDocIds = docEmbeddings.map((d) => d.document_id);
312
+ const embeddingMatrix = docEmbeddings.map((d) => Array.from(d.embedding));
313
+ // Step 3: Call Python clustering worker
314
+ console.error(`[CLUSTER] Running ${config.algorithm} clustering...`);
315
+ const workerResult = await runClusteringWorker(embeddingMatrix, orderedDocIds, config);
316
+ if (!workerResult.success) {
317
+ throw new ClusteringError(`Clustering worker failed: ${workerResult.error}`, 'WORKER_FAILED', {
318
+ error_type: workerResult.error_type,
319
+ error: workerResult.error,
320
+ });
321
+ }
322
+ // Step 4: Store results in database
323
+ console.error(`[CLUSTER] Found ${workerResult.n_clusters} clusters, storing results...`);
324
+ const tracker = getProvenanceTracker(db);
325
+ const now = new Date().toISOString();
326
+ const processingDurationMs = Math.round(performance.now() - startTime);
327
+ const algorithmParamsJson = JSON.stringify({
328
+ algorithm: config.algorithm,
329
+ n_clusters: config.n_clusters,
330
+ min_cluster_size: config.min_cluster_size,
331
+ distance_threshold: config.distance_threshold,
332
+ linkage: config.linkage,
333
+ });
334
+ // Build cluster result items and store cluster records
335
+ const clusterItems = [];
336
+ const labels = workerResult.labels;
337
+ const probabilities = workerResult.probabilities;
338
+ const centroids = workerResult.centroids;
339
+ const coherenceScores = workerResult.coherence_scores;
340
+ // Group documents by cluster label
341
+ const clusterGroups = new Map(); // label -> doc indices
342
+ for (let i = 0; i < labels.length; i++) {
343
+ const label = labels[i];
344
+ if (label === -1)
345
+ continue; // Skip noise
346
+ const existing = clusterGroups.get(label);
347
+ if (existing) {
348
+ existing.push(i);
349
+ }
350
+ else {
351
+ clusterGroups.set(label, [i]);
352
+ }
353
+ }
354
+ // Sort cluster labels
355
+ const sortedLabels = Array.from(clusterGroups.keys()).sort((a, b) => a - b);
356
+ const clusterIdMap = new Map(); // label -> cluster UUID
357
+ // Use a transaction to store everything atomically
358
+ const storeTransaction = conn.transaction(() => {
359
+ for (let ci = 0; ci < sortedLabels.length; ci++) {
360
+ const label = sortedLabels[ci];
361
+ const docIndices = clusterGroups.get(label);
362
+ const centroid = centroids[ci];
363
+ const coherence = coherenceScores[ci];
364
+ const clusterId = uuidv4();
365
+ clusterIdMap.set(label, clusterId);
366
+ // Content hash from centroid + run_id
367
+ const contentHash = computeHash(JSON.stringify(centroid) + ':' + runId);
368
+ // Find any document's provenance_id as the source_id for the cluster provenance
369
+ // Use the first document in this cluster
370
+ const firstDocId = orderedDocIds[docIndices[0]];
371
+ const firstDoc = db.getDocument(firstDocId);
372
+ const sourceProvId = firstDoc?.provenance_id ?? null;
373
+ // Create CLUSTERING provenance record
374
+ const provId = tracker.createProvenance({
375
+ type: ProvenanceType.CLUSTERING,
376
+ source_type: 'CLUSTERING',
377
+ source_id: sourceProvId,
378
+ root_document_id: firstDoc?.provenance_id ?? runId,
379
+ content_hash: contentHash,
380
+ input_hash: computeHash(algorithmParamsJson + ':' + docIndices.length),
381
+ processor: 'clustering-service',
382
+ processor_version: '1.0.0',
383
+ processing_params: {
384
+ algorithm: config.algorithm,
385
+ run_id: runId,
386
+ cluster_index: ci,
387
+ document_count: docIndices.length,
388
+ },
389
+ processing_duration_ms: processingDurationMs,
390
+ processing_quality_score: coherence,
391
+ });
392
+ // Insert cluster record
393
+ const cluster = {
394
+ id: clusterId,
395
+ run_id: runId,
396
+ cluster_index: ci,
397
+ label: null,
398
+ description: null,
399
+ classification_tag: null,
400
+ document_count: docIndices.length,
401
+ centroid_json: JSON.stringify(centroid),
402
+ top_terms_json: null,
403
+ coherence_score: coherence,
404
+ algorithm: config.algorithm,
405
+ algorithm_params_json: algorithmParamsJson,
406
+ silhouette_score: workerResult.silhouette_score ?? null,
407
+ content_hash: contentHash,
408
+ provenance_id: provId,
409
+ created_at: now,
410
+ processing_duration_ms: processingDurationMs,
411
+ };
412
+ insertCluster(conn, cluster);
413
+ // Build result item
414
+ const itemDocIds = [];
415
+ const itemSimilarities = [];
416
+ const itemProbabilities = [];
417
+ for (const idx of docIndices) {
418
+ itemDocIds.push(orderedDocIds[idx]);
419
+ itemSimilarities.push(cosineSimilarity(docEmbeddings[idx].embedding, centroid));
420
+ itemProbabilities.push(probabilities[idx]);
421
+ }
422
+ clusterItems.push({
423
+ cluster_index: ci,
424
+ document_count: docIndices.length,
425
+ coherence_score: coherence,
426
+ centroid,
427
+ document_ids: itemDocIds,
428
+ similarities: itemSimilarities,
429
+ probabilities: itemProbabilities,
430
+ });
431
+ }
432
+ // Store document-cluster assignments (both clustered and noise documents)
433
+ for (let i = 0; i < labels.length; i++) {
434
+ const label = labels[i];
435
+ const isNoise = label === -1;
436
+ const clusterId = isNoise ? null : (clusterIdMap.get(label) ?? null);
437
+ const centroid = isNoise ? null : centroids[sortedLabels.indexOf(label)];
438
+ const similarity = centroid ? cosineSimilarity(docEmbeddings[i].embedding, centroid) : 0;
439
+ const dc = {
440
+ id: uuidv4(),
441
+ document_id: orderedDocIds[i],
442
+ cluster_id: clusterId,
443
+ run_id: runId,
444
+ similarity_to_centroid: Math.round(similarity * 1000000) / 1000000,
445
+ membership_probability: probabilities[i],
446
+ is_noise: isNoise,
447
+ assigned_at: now,
448
+ };
449
+ insertDocumentCluster(conn, dc);
450
+ }
451
+ });
452
+ storeTransaction();
453
+ const noiseDocIds = orderedDocIds.filter((_, i) => labels[i] === -1);
454
+ const totalDurationMs = Math.round(performance.now() - startTime);
455
+ console.error(`[CLUSTER] Done: ${workerResult.n_clusters} clusters, ${noiseDocIds.length} noise docs, ${totalDurationMs}ms`);
456
+ return {
457
+ run_id: runId,
458
+ algorithm: config.algorithm,
459
+ n_clusters: workerResult.n_clusters,
460
+ total_documents: docEmbeddings.length,
461
+ noise_document_ids: noiseDocIds,
462
+ silhouette_score: workerResult.silhouette_score ?? 0,
463
+ clusters: clusterItems,
464
+ processing_duration_ms: totalDurationMs,
465
+ };
466
+ }
467
+ //# sourceMappingURL=clustering-service.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"clustering-service.js","sourceRoot":"","sources":["../../../src/services/clustering/clustering-service.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,EAAE,IAAI,MAAM,EAAE,MAAM,MAAM,CAAC;AACpC,OAAO,EAAE,WAAW,EAAiC,MAAM,cAAc,CAAC;AAC1E,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AAIpC,OAAO,EAAE,oBAAoB,EAAE,MAAM,wBAAwB,CAAC;AAC9D,OAAO,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAS5D,OAAO,EAAE,aAAa,EAAE,qBAAqB,EAAE,MAAM,2CAA2C,CAAC;AACjG,OAAO,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AAElD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAa/D,MAAM,eAAgB,SAAQ,KAAK;IAGf;IACA;IAHlB,YACE,OAAe,EACC,IAAyB,EACzB,OAAiC;QAEjD,KAAK,CAAC,OAAO,CAAC,CAAC;QAHC,SAAI,GAAJ,IAAI,CAAqB;QACzB,YAAO,GAAP,OAAO,CAA0B;QAGjD,IAAI,CAAC,IAAI,GAAG,iBAAiB,CAAC;IAChC,CAAC;CACF;AA6BD,kFAAkF;AAClF,UAAU;AACV,kFAAkF;AAElF,gCAAgC;AAChC,MAAM,iBAAiB,GAAG,OAAO,CAAC;AAElC,oCAAoC;AACpC,MAAM,iBAAiB,GAAG,MAAM,CAAC;AAEjC;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,yBAAyB,CACvC,IAAuB,EACvB,WAAsB;IAEtB,oFAAoF;IACpF,MAAM,SAAS,GAAG,WAAW,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;IACxD,MAAM,YAAY,GAAG,SAAS;QAC5B,CAAC,CAAC,0BAA0B,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;QACpE,CAAC,CAAC,EAAE,CAAC;IAEP,MAAM,IAAI,GAAG,IAAI;SACd,OAAO,CACN;;;;kCAI4B,YAAY;;GAE3C,CACE;SACA,GAAG,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC,CAGtC,CAAC;IAEH,uBAAuB;IACvB,MAAM,UAAU,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC/C,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QACjD,IAAI,QAAQ,EAAE,CAAC;YACb,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAC5B,CAAC;aAAM,CAAC;YACN,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED,2CAA2C;IAC3C,MAAM,OAAO,GAAwB,EAAE,CAAC;IACxC,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,UAAU,EAAE,CAAC;QAC1C,MAAM,QAAQ,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;QACzC,OAAO,CAAC,IAAI,CAAC;YACX,WAAW,EAAE,KAAK;YAClB,SAAS,EAAE,QAAQ;YACnB,WAAW,EAAE,OAAO,CAAC,MAAM;SAC5B,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;;GAKG;AACH,SAAS,cAAc,CAAC,OAAiB;IACvC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC;IAC/B,CAAC;IAED,MAAM,GAAG,GAAG,GAAG,CAAC;IAChB,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,yCAAyC;IAE5E,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC1B,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;QAC9D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7B,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IAED,OAAO;IACP,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IACzB,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC;IACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7B,MAAM,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACzB,CAAC;IAED,eAAe;IACf,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7B,IAAI,IAAI,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC;IACD,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEvB,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7B,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;QACpB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;;;;;;GAWG;AACH,KAAK,UAAU,mBAAmB,CAChC,UAAsB,EACtB,WAAqB,EACrB,MAAwB,EACxB,cAA2B;IAE3B,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,sCAAsC,CAAC,CAAC;IAEnF,MAAM,WAAW,GAA4B;QAC3C,UAAU;QACV,YAAY,EAAE,WAAW;QACzB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,UAAU,EAAE,MAAM,CAAC,UAAU;QAC7B,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,kBAAkB,EAAE,MAAM,CAAC,kBAAkB;QAC7C,OAAO,EAAE,MAAM,CAAC,OAAO;KACxB,CAAC;IAEF,IAAI,cAAc,EAAE,CAAC;QACnB,WAAW,CAAC,eAAe,GAAG,cAAc,CAAC;IAC/C,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAE1C,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;QACrC,IAAI,OAAO,GAAG,KAAK,CAAC;QACpB,MAAM,OAAO,GAAuB;YAClC,IAAI,EAAE,MAAM;YACZ,aAAa,EAAE,CAAC,IAAI,CAAC;YACrB,IAAI,EAAE,EAAE;SACT,CAAC;QAEF,MAAM,KAAK,GAAG,IAAI,WAAW,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;QACnD,IAAI,MAAM,GAAG,EAAE,CAAC;QAChB,IAAI,YAAY,GAAyC,IAAI,CAAC;QAE9D,MAAM,OAAO,GAAG,GAAG,EAAE;YACnB,IAAI,YAAY,EAAE,CAAC;gBACjB,YAAY,CAAC,YAAY,CAAC,CAAC;gBAC3B,YAAY,GAAG,IAAI,CAAC;YACtB,CAAC;QACH,CAAC,CAAC;QAEF,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;YAC5B,IAAI,OAAO;gBAAE,OAAO;YACpB,IAAI,CAAC;gBACH,KAAK,CAAC,IAAI,EAAE,CAAC;YACf,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,KAAK,CACX,sDAAsD,EACtD,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CACvD,CAAC;gBACF,YAAY;YACd,CAAC;YACD,4DAA4D;YAC5D,YAAY,GAAG,UAAU,CAAC,GAAG,EAAE;gBAC7B,IAAI,CAAC,OAAO,EAAE,CAAC;oBACb,OAAO,CAAC,KAAK,CACX,iFAAiF,KAAK,CAAC,YAAY,EAAE,GAAG,GAAG,CAC5G,CAAC;oBACF,IAAI,CAAC;wBACH,KAAK,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;oBACtC,CAAC;oBAAC,OAAO,KAAK,EAAE,CAAC;wBACf,OAAO,CAAC,KAAK,CACX,sEAAsE,EACtE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CACvD,CAAC;oBACJ,CAAC;gBACH,CAAC;gBACD,IAAI,CAAC,OAAO,EAAE,CAAC;oBACb,OAAO,GAAG,IAAI,CAAC;oBACf,MAAM,CACJ,IAAI,eAAe,CACjB,mCAAmC,iBAAiB,6BAA6B,EACjF,gBAAgB,EAChB,EAAE,MAAM,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CACtC,CACF,CAAC;gBACJ,CAAC;YACH,CAAC,EAAE,IAAI,CAAC,CAAC;QACX,CAAC,EAAE,iBAAiB,CAAC,CAAC;QAEtB,MAAM,YAAY,GAAa,EAAE,CAAC;QAClC,KAAK,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,GAAW,EAAE,EAAE;YAClC,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACzB,CAAC,CAAC,CAAC;QAEH,KAAK,CAAC,EAAE,CAAC,QAAQ,EAAE,CAAC,GAAW,EAAE,EAAE;YACjC,IAAI,MAAM,CAAC,MAAM,GAAG,iBAAiB,EAAE,CAAC;gBACtC,MAAM,IAAI,GAAG,GAAG,IAAI,CAAC;YACvB,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,MAAM,SAAS,GAAG,CAAC,GAAW,EAAE,EAAE;YAChC,YAAY,CAAC,KAAK,CAAC,CAAC;YACpB,OAAO,EAAE,CAAC;YACV,IAAI,OAAO;gBAAE,OAAO;YACpB,OAAO,GAAG,IAAI,CAAC;YAEf,IAAI,GAAG,EAAE,CAAC;gBACR,OAAO,CAAC,KAAK,CAAC,wBAAwB,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC;gBACrD,IAAI,MAAM;oBAAE,OAAO,CAAC,KAAK,CAAC,yBAAyB,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;YAClF,CAAC;YAED,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACvC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC;gBACnB,IAAI,GAAG,EAAE,CAAC;oBACR,MAAM,CACJ,IAAI,eAAe,CAAC,6BAA6B,GAAG,CAAC,OAAO,EAAE,EAAE,eAAe,EAAE;wBAC/E,MAAM,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC;qBAClC,CAAC,CACH,CAAC;gBACJ,CAAC;qBAAM,CAAC;oBACN,MAAM,CACJ,IAAI,eAAe,CAAC,sCAAsC,EAAE,eAAe,EAAE;wBAC3E,MAAM,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC;qBAClC,CAAC,CACH,CAAC;gBACJ,CAAC;gBACD,OAAO;YACT,CAAC;YAED,2BAA2B;YAC3B,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACxC,IAAI,MAAgC,CAAC;YACrC,KAAK,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC3C,IAAI,CAAC;oBACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAiB,CAAC;oBACrD,MAAM;gBACR,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,OAAO,CAAC,KAAK,CACX,yEAAyE,EACzE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CACvD,CAAC;oBACF,iCAAiC;gBACnC,CAAC;YACH,CAAC;YAED,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;gBACzB,yDAAyD;gBACzD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;oBAClD,MAAM,CACJ,IAAI,eAAe,CACjB,8CAA8C,EAC9C,oBAAoB,EACpB,EAAE,MAAM,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CACtC,CACF,CAAC;oBACF,OAAO;gBACT,CAAC;gBACD,IAAI,OAAO,MAAM,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;oBACxC,MAAM,CACJ,IAAI,eAAe,CACjB,2DAA2D,EAC3D,oBAAoB,EACpB,EAAE,MAAM,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CACtC,CACF,CAAC;oBACF,OAAO;gBACT,CAAC;gBACD,IACE,MAAM,CAAC,OAAO;oBACd,MAAM,CAAC,gBAAgB,KAAK,SAAS;oBACrC,MAAM,CAAC,MAAM,KAAK,SAAS,EAC3B,CAAC;oBACD,MAAM,CACJ,IAAI,eAAe,CACjB,mFAAmF,EACnF,oBAAoB,EACpB,EAAE,MAAM,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CACtC,CACF,CAAC;oBACF,OAAO;gBACT,CAAC;gBACD,OAAO,CAAC,MAAM,CAAC,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,MAAM,CACJ,IAAI,eAAe,CACjB,kDAAkD,EAClD,oBAAoB,EACpB,EAAE,MAAM,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CACtC,CACF,CAAC;YACJ,CAAC;QACH,CAAC,CAAC;QAEF,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClB,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IACvB,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,CAA0B,EAAE,CAAW;IACtE,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACnC,CAAC;IACD,iDAAiD;IACjD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;AACvC,CAAC;AAED,kFAAkF;AAClF,2BAA2B;AAC3B,kFAAkF;AAElF;;;;;;;;;;;;;GAaG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,EAAmB,EACnB,OAAsB,EACtB,MAAwB,EACxB,WAAsB;IAEtB,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IACpC,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC;IACvB,MAAM,IAAI,GAAG,EAAE,CAAC,aAAa,EAAE,CAAC;IAEhC,4CAA4C;IAC5C,OAAO,CAAC,KAAK,CAAC,4CAA4C,CAAC,CAAC;IAC5D,MAAM,aAAa,GAAG,yBAAyB,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IAEnE,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,MAAM,IAAI,eAAe,CACvB,qEAAqE,aAAa,CAAC,MAAM,EAAE,EAC3F,wBAAwB,EACxB,EAAE,KAAK,EAAE,aAAa,CAAC,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,IAAI,KAAK,EAAE,CACzE,CAAC;IACJ,CAAC;IAED,OAAO,CAAC,KAAK,CAAC,aAAa,aAAa,CAAC,MAAM,4BAA4B,CAAC,CAAC;IAE7E,yCAAyC;IACzC,MAAM,aAAa,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;IAC9D,MAAM,eAAe,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;IAE1E,wCAAwC;IACxC,OAAO,CAAC,KAAK,CAAC,qBAAqB,MAAM,CAAC,SAAS,gBAAgB,CAAC,CAAC;IACrE,MAAM,YAAY,GAAG,MAAM,mBAAmB,CAC5C,eAAe,EACf,aAAa,EACb,MAAM,CACP,CAAC;IAEF,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC;QAC1B,MAAM,IAAI,eAAe,CAAC,6BAA6B,YAAY,CAAC,KAAK,EAAE,EAAE,eAAe,EAAE;YAC5F,UAAU,EAAE,YAAY,CAAC,UAAU;YACnC,KAAK,EAAE,YAAY,CAAC,KAAK;SAC1B,CAAC,CAAC;IACL,CAAC;IAED,oCAAoC;IACpC,OAAO,CAAC,KAAK,CAAC,mBAAmB,YAAY,CAAC,UAAU,+BAA+B,CAAC,CAAC;IACzF,MAAM,OAAO,GAAG,oBAAoB,CAAC,EAAE,CAAC,CAAC;IACzC,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,MAAM,oBAAoB,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,CAAC;IACvE,MAAM,mBAAmB,GAAG,IAAI,CAAC,SAAS,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,UAAU,EAAE,MAAM,CAAC,UAAU;QAC7B,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,kBAAkB,EAAE,MAAM,CAAC,kBAAkB;QAC7C,OAAO,EAAE,MAAM,CAAC,OAAO;KACxB,CAAC,CAAC;IAEH,uDAAuD;IACvD,MAAM,YAAY,GAAwB,EAAE,CAAC;IAC7C,MAAM,MAAM,GAAG,YAAY,CAAC,MAAO,CAAC;IACpC,MAAM,aAAa,GAAG,YAAY,CAAC,aAAc,CAAC;IAClD,MAAM,SAAS,GAAG,YAAY,CAAC,SAAU,CAAC;IAC1C,MAAM,eAAe,GAAG,YAAY,CAAC,gBAAiB,CAAC;IAEvD,mCAAmC;IACnC,MAAM,aAAa,GAAG,IAAI,GAAG,EAAoB,CAAC,CAAC,uBAAuB;IAC1E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,KAAK,KAAK,CAAC,CAAC;YAAE,SAAS,CAAC,aAAa;QACzC,MAAM,QAAQ,GAAG,aAAa,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAC1C,IAAI,QAAQ,EAAE,CAAC;YACb,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACnB,CAAC;aAAM,CAAC;YACN,aAAa,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED,sBAAsB;IACtB,MAAM,YAAY,GAAG,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5E,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC,CAAC,wBAAwB;IAExE,mDAAmD;IACnD,MAAM,gBAAgB,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE;QAC7C,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,YAAY,CAAC,MAAM,EAAE,EAAE,EAAE,EAAE,CAAC;YAChD,MAAM,KAAK,GAAG,YAAY,CAAC,EAAE,CAAC,CAAC;YAC/B,MAAM,UAAU,GAAG,aAAa,CAAC,GAAG,CAAC,KAAK,CAAE,CAAC;YAC7C,MAAM,QAAQ,GAAG,SAAS,CAAC,EAAE,CAAC,CAAC;YAC/B,MAAM,SAAS,GAAG,eAAe,CAAC,EAAE,CAAC,CAAC;YAEtC,MAAM,SAAS,GAAG,MAAM,EAAE,CAAC;YAC3B,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;YAEnC,sCAAsC;YACtC,MAAM,WAAW,GAAG,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,GAAG,GAAG,GAAG,KAAK,CAAC,CAAC;YAExE,gFAAgF;YAChF,yCAAyC;YACzC,MAAM,UAAU,GAAG,aAAa,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;YAChD,MAAM,QAAQ,GAAG,EAAE,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;YAC5C,MAAM,YAAY,GAAG,QAAQ,EAAE,aAAa,IAAI,IAAI,CAAC;YAErD,sCAAsC;YACtC,MAAM,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;gBACtC,IAAI,EAAE,cAAc,CAAC,UAAU;gBAC/B,WAAW,EAAE,YAA0B;gBACvC,SAAS,EAAE,YAAY;gBACvB,gBAAgB,EAAE,QAAQ,EAAE,aAAa,IAAI,KAAK;gBAClD,YAAY,EAAE,WAAW;gBACzB,UAAU,EAAE,WAAW,CAAC,mBAAmB,GAAG,GAAG,GAAG,UAAU,CAAC,MAAM,CAAC;gBACtE,SAAS,EAAE,oBAAoB;gBAC/B,iBAAiB,EAAE,OAAO;gBAC1B,iBAAiB,EAAE;oBACjB,SAAS,EAAE,MAAM,CAAC,SAAS;oBAC3B,MAAM,EAAE,KAAK;oBACb,aAAa,EAAE,EAAE;oBACjB,cAAc,EAAE,UAAU,CAAC,MAAM;iBAClC;gBACD,sBAAsB,EAAE,oBAAoB;gBAC5C,wBAAwB,EAAE,SAAS;aACpC,CAAC,CAAC;YAEH,wBAAwB;YACxB,MAAM,OAAO,GAAY;gBACvB,EAAE,EAAE,SAAS;gBACb,MAAM,EAAE,KAAK;gBACb,aAAa,EAAE,EAAE;gBACjB,KAAK,EAAE,IAAI;gBACX,WAAW,EAAE,IAAI;gBACjB,kBAAkB,EAAE,IAAI;gBACxB,cAAc,EAAE,UAAU,CAAC,MAAM;gBACjC,aAAa,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;gBACvC,cAAc,EAAE,IAAI;gBACpB,eAAe,EAAE,SAAS;gBAC1B,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,qBAAqB,EAAE,mBAAmB;gBAC1C,gBAAgB,EAAE,YAAY,CAAC,gBAAgB,IAAI,IAAI;gBACvD,YAAY,EAAE,WAAW;gBACzB,aAAa,EAAE,MAAM;gBACrB,UAAU,EAAE,GAAG;gBACf,sBAAsB,EAAE,oBAAoB;aAC7C,CAAC;YAEF,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YAE7B,oBAAoB;YACpB,MAAM,UAAU,GAAa,EAAE,CAAC;YAChC,MAAM,gBAAgB,GAAa,EAAE,CAAC;YACtC,MAAM,iBAAiB,GAAa,EAAE,CAAC;YAEvC,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;gBAC7B,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC;gBACpC,gBAAgB,CAAC,IAAI,CAAC,gBAAgB,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC,CAAC;gBAChF,iBAAiB,CAAC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC;YAC7C,CAAC;YAED,YAAY,CAAC,IAAI,CAAC;gBAChB,aAAa,EAAE,EAAE;gBACjB,cAAc,EAAE,UAAU,CAAC,MAAM;gBACjC,eAAe,EAAE,SAAS;gBAC1B,QAAQ;gBACR,YAAY,EAAE,UAAU;gBACxB,YAAY,EAAE,gBAAgB;gBAC9B,aAAa,EAAE,iBAAiB;aACjC,CAAC,CAAC;QACL,CAAC;QAED,0EAA0E;QAC1E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACxB,MAAM,OAAO,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC;YAC7B,MAAM,SAAS,GAAG,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,CAAC;YACrE,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;YACzE,MAAM,UAAU,GAAG,QAAQ,CAAC,CAAC,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAEzF,MAAM,EAAE,GAAoB;gBAC1B,EAAE,EAAE,MAAM,EAAE;gBACZ,WAAW,EAAE,aAAa,CAAC,CAAC,CAAC;gBAC7B,UAAU,EAAE,SAAS;gBACrB,MAAM,EAAE,KAAK;gBACb,sBAAsB,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,OAAO,CAAC,GAAG,OAAO;gBAClE,sBAAsB,EAAE,aAAa,CAAC,CAAC,CAAC;gBACxC,QAAQ,EAAE,OAAO;gBACjB,WAAW,EAAE,GAAG;aACjB,CAAC;YAEF,qBAAqB,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAClC,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,gBAAgB,EAAE,CAAC;IAEnB,MAAM,WAAW,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAErE,MAAM,eAAe,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,CAAC;IAClE,OAAO,CAAC,KAAK,CACX,mBAAmB,YAAY,CAAC,UAAU,cAAc,WAAW,CAAC,MAAM,gBAAgB,eAAe,IAAI,CAC9G,CAAC;IAEF,OAAO;QACL,MAAM,EAAE,KAAK;QACb,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,UAAU,EAAE,YAAY,CAAC,UAAW;QACpC,eAAe,EAAE,aAAa,CAAC,MAAM;QACrC,kBAAkB,EAAE,WAAW;QAC/B,gBAAgB,EAAE,YAAY,CAAC,gBAAgB,IAAI,CAAC;QACpD,QAAQ,EAAE,YAAY;QACtB,sBAAsB,EAAE,eAAe;KACxC,CAAC;AACJ,CAAC"}
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Document Comparison Diff Service
3
+ *
4
+ * Computes text and structural diffs between two OCR-processed documents.
5
+ * Uses the `diff` npm package (jsdiff) for text comparison.
6
+ *
7
+ * CRITICAL: NEVER use console.log() - stdout is JSON-RPC protocol.
8
+ */
9
+ import type { TextDiffResult, StructuralDiff } from '../../models/comparison.js';
10
+ /**
11
+ * Input shape for a document's structural metadata used in compareStructure()
12
+ */
13
+ export interface StructuralDocInput {
14
+ page_count: number | null;
15
+ text_length: number;
16
+ quality_score: number | null;
17
+ ocr_mode: string;
18
+ chunk_count: number;
19
+ }
20
+ /**
21
+ * Compare structural metadata between two documents
22
+ *
23
+ * @param doc1 - First document structural metadata
24
+ * @param doc2 - Second document structural metadata
25
+ * @returns StructuralDiff with side-by-side metadata
26
+ */
27
+ export declare function compareStructure(doc1: StructuralDocInput, doc2: StructuralDocInput): StructuralDiff;
28
+ /**
29
+ * Compare two texts using line-level diff
30
+ *
31
+ * @param text1 - First document text
32
+ * @param text2 - Second document text
33
+ * @param maxOperations - Maximum operations to return (default 1000)
34
+ * @returns TextDiffResult with operations, counts, and similarity ratio
35
+ */
36
+ export declare function compareText(text1: string, text2: string, maxOperations?: number): TextDiffResult;
37
+ /**
38
+ * Generate a human-readable summary of the comparison
39
+ */
40
+ export declare function generateSummary(textDiff: TextDiffResult | null, structuralDiff: StructuralDiff, doc1Name: string, doc2Name: string): string;
41
+ //# sourceMappingURL=diff-service.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"diff-service.d.ts","sourceRoot":"","sources":["../../../src/services/comparison/diff-service.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAGH,OAAO,KAAK,EAEV,cAAc,EACd,cAAc,EACf,MAAM,4BAA4B,CAAC;AAEpC;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;CACrB;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,kBAAkB,EACxB,IAAI,EAAE,kBAAkB,GACvB,cAAc,CAahB;AAED;;;;;;;GAOG;AACH,wBAAgB,WAAW,CACzB,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,MAAM,EACb,aAAa,GAAE,MAAa,GAC3B,cAAc,CA+DhB;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,cAAc,GAAG,IAAI,EAC/B,cAAc,EAAE,cAAc,EAC9B,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,MAAM,GACf,MAAM,CAwBR"}
@@ -0,0 +1,120 @@
1
+ /**
2
+ * Document Comparison Diff Service
3
+ *
4
+ * Computes text and structural diffs between two OCR-processed documents.
5
+ * Uses the `diff` npm package (jsdiff) for text comparison.
6
+ *
7
+ * CRITICAL: NEVER use console.log() - stdout is JSON-RPC protocol.
8
+ */
9
+ import { diffLines } from 'diff';
10
+ /**
11
+ * Compare structural metadata between two documents
12
+ *
13
+ * @param doc1 - First document structural metadata
14
+ * @param doc2 - Second document structural metadata
15
+ * @returns StructuralDiff with side-by-side metadata
16
+ */
17
+ export function compareStructure(doc1, doc2) {
18
+ return {
19
+ doc1_page_count: doc1.page_count,
20
+ doc2_page_count: doc2.page_count,
21
+ doc1_chunk_count: doc1.chunk_count,
22
+ doc2_chunk_count: doc2.chunk_count,
23
+ doc1_text_length: doc1.text_length,
24
+ doc2_text_length: doc2.text_length,
25
+ doc1_quality_score: doc1.quality_score,
26
+ doc2_quality_score: doc2.quality_score,
27
+ doc1_ocr_mode: doc1.ocr_mode,
28
+ doc2_ocr_mode: doc2.ocr_mode,
29
+ };
30
+ }
31
+ /**
32
+ * Compare two texts using line-level diff
33
+ *
34
+ * @param text1 - First document text
35
+ * @param text2 - Second document text
36
+ * @param maxOperations - Maximum operations to return (default 1000)
37
+ * @returns TextDiffResult with operations, counts, and similarity ratio
38
+ */
39
+ export function compareText(text1, text2, maxOperations = 1000) {
40
+ const changes = diffLines(text1, text2);
41
+ let doc1Offset = 0;
42
+ let doc2Offset = 0;
43
+ let insertions = 0;
44
+ let deletions = 0;
45
+ let unchanged = 0;
46
+ const operations = [];
47
+ for (const change of changes) {
48
+ let type;
49
+ if (change.added) {
50
+ type = 'insert';
51
+ }
52
+ else if (change.removed) {
53
+ type = 'delete';
54
+ }
55
+ else {
56
+ type = 'equal';
57
+ }
58
+ operations.push({
59
+ type,
60
+ text: change.value,
61
+ doc1_offset: doc1Offset,
62
+ doc2_offset: doc2Offset,
63
+ line_count: change.count ?? 0,
64
+ });
65
+ if (change.added) {
66
+ insertions++;
67
+ doc2Offset += change.value.length;
68
+ }
69
+ else if (change.removed) {
70
+ deletions++;
71
+ doc1Offset += change.value.length;
72
+ }
73
+ else {
74
+ unchanged++;
75
+ doc1Offset += change.value.length;
76
+ doc2Offset += change.value.length;
77
+ }
78
+ }
79
+ const totalOps = operations.length;
80
+ const truncated = totalOps > maxOperations;
81
+ const finalOps = truncated ? operations.slice(0, maxOperations) : operations;
82
+ // Similarity = unchanged chars / total chars
83
+ const unchangedChars = operations
84
+ .filter((o) => o.type === 'equal')
85
+ .reduce((sum, o) => sum + o.text.length, 0);
86
+ const totalChars = text1.length + text2.length;
87
+ const similarityRatio = totalChars === 0 ? 1.0 : (2 * unchangedChars) / totalChars;
88
+ return {
89
+ operations: finalOps,
90
+ total_operations: totalOps,
91
+ truncated,
92
+ insertions,
93
+ deletions,
94
+ unchanged,
95
+ similarity_ratio: Math.round(similarityRatio * 10000) / 10000,
96
+ doc1_length: text1.length,
97
+ doc2_length: text2.length,
98
+ };
99
+ }
100
+ /**
101
+ * Generate a human-readable summary of the comparison
102
+ */
103
+ export function generateSummary(textDiff, structuralDiff, doc1Name, doc2Name) {
104
+ const parts = [];
105
+ parts.push(`Comparison of "${doc1Name}" vs "${doc2Name}".`);
106
+ if (textDiff) {
107
+ const pct = Math.round(textDiff.similarity_ratio * 100);
108
+ parts.push(`Text similarity: ${pct}%.`);
109
+ parts.push(`${textDiff.insertions} insertions, ${textDiff.deletions} deletions, ${textDiff.unchanged} unchanged sections.`);
110
+ if (textDiff.truncated) {
111
+ parts.push(`(Diff truncated: showing ${textDiff.operations.length} of ${textDiff.total_operations} operations.)`);
112
+ }
113
+ }
114
+ const pageDiff = (structuralDiff.doc1_page_count ?? 0) - (structuralDiff.doc2_page_count ?? 0);
115
+ if (pageDiff !== 0) {
116
+ parts.push(`Page count difference: ${Math.abs(pageDiff)} pages.`);
117
+ }
118
+ return parts.join(' ');
119
+ }
120
+ //# sourceMappingURL=diff-service.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"diff-service.js","sourceRoot":"","sources":["../../../src/services/comparison/diff-service.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,MAAM,CAAC;AAkBjC;;;;;;GAMG;AACH,MAAM,UAAU,gBAAgB,CAC9B,IAAwB,EACxB,IAAwB;IAExB,OAAO;QACL,eAAe,EAAE,IAAI,CAAC,UAAU;QAChC,eAAe,EAAE,IAAI,CAAC,UAAU;QAChC,gBAAgB,EAAE,IAAI,CAAC,WAAW;QAClC,gBAAgB,EAAE,IAAI,CAAC,WAAW;QAClC,gBAAgB,EAAE,IAAI,CAAC,WAAW;QAClC,gBAAgB,EAAE,IAAI,CAAC,WAAW;QAClC,kBAAkB,EAAE,IAAI,CAAC,aAAa;QACtC,kBAAkB,EAAE,IAAI,CAAC,aAAa;QACtC,aAAa,EAAE,IAAI,CAAC,QAAQ;QAC5B,aAAa,EAAE,IAAI,CAAC,QAAQ;KAC7B,CAAC;AACJ,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,WAAW,CACzB,KAAa,EACb,KAAa,EACb,gBAAwB,IAAI;IAE5B,MAAM,OAAO,GAAG,SAAS,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IAExC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,MAAM,UAAU,GAAwB,EAAE,CAAC;IAE3C,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,IAA+B,CAAC;QACpC,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YACjB,IAAI,GAAG,QAAQ,CAAC;QAClB,CAAC;aAAM,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YAC1B,IAAI,GAAG,QAAQ,CAAC;QAClB,CAAC;aAAM,CAAC;YACN,IAAI,GAAG,OAAO,CAAC;QACjB,CAAC;QAED,UAAU,CAAC,IAAI,CAAC;YACd,IAAI;YACJ,IAAI,EAAE,MAAM,CAAC,KAAK;YAClB,WAAW,EAAE,UAAU;YACvB,WAAW,EAAE,UAAU;YACvB,UAAU,EAAE,MAAM,CAAC,KAAK,IAAI,CAAC;SAC9B,CAAC,CAAC;QAEH,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;YACjB,UAAU,EAAE,CAAC;YACb,UAAU,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC;QACpC,CAAC;aAAM,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YAC1B,SAAS,EAAE,CAAC;YACZ,UAAU,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC;QACpC,CAAC;aAAM,CAAC;YACN,SAAS,EAAE,CAAC;YACZ,UAAU,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC;YAClC,UAAU,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC;QACpC,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAG,UAAU,CAAC,MAAM,CAAC;IACnC,MAAM,SAAS,GAAG,QAAQ,GAAG,aAAa,CAAC;IAC3C,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC;IAE7E,6CAA6C;IAC7C,MAAM,cAAc,GAAG,UAAU;SAC9B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC;SACjC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC9C,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;IAC/C,MAAM,eAAe,GAAG,UAAU,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,cAAc,CAAC,GAAG,UAAU,CAAC;IAEnF,OAAO;QACL,UAAU,EAAE,QAAQ;QACpB,gBAAgB,EAAE,QAAQ;QAC1B,SAAS;QACT,UAAU;QACV,SAAS;QACT,SAAS;QACT,gBAAgB,EAAE,IAAI,CAAC,KAAK,CAAC,eAAe,GAAG,KAAK,CAAC,GAAG,KAAK;QAC7D,WAAW,EAAE,KAAK,CAAC,MAAM;QACzB,WAAW,EAAE,KAAK,CAAC,MAAM;KAC1B,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAC7B,QAA+B,EAC/B,cAA8B,EAC9B,QAAgB,EAChB,QAAgB;IAEhB,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,CAAC,IAAI,CAAC,kBAAkB,QAAQ,SAAS,QAAQ,IAAI,CAAC,CAAC;IAE5D,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,gBAAgB,GAAG,GAAG,CAAC,CAAC;QACxD,KAAK,CAAC,IAAI,CAAC,oBAAoB,GAAG,IAAI,CAAC,CAAC;QACxC,KAAK,CAAC,IAAI,CACR,GAAG,QAAQ,CAAC,UAAU,gBAAgB,QAAQ,CAAC,SAAS,eAAe,QAAQ,CAAC,SAAS,sBAAsB,CAChH,CAAC;QACF,IAAI,QAAQ,CAAC,SAAS,EAAE,CAAC;YACvB,KAAK,CAAC,IAAI,CACR,4BAA4B,QAAQ,CAAC,UAAU,CAAC,MAAM,OAAO,QAAQ,CAAC,gBAAgB,eAAe,CACtG,CAAC;QACJ,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAG,CAAC,cAAc,CAAC,eAAe,IAAI,CAAC,CAAC,GAAG,CAAC,cAAc,CAAC,eAAe,IAAI,CAAC,CAAC,CAAC;IAC/F,IAAI,QAAQ,KAAK,CAAC,EAAE,CAAC;QACnB,KAAK,CAAC,IAAI,CAAC,0BAA0B,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;IACpE,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC"}