ocr-provenance-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ocr-provenance-mcp might be problematic. Click here for more details.

Files changed (578) hide show
  1. package/.env.example +55 -0
  2. package/LICENSE +78 -0
  3. package/README.md +1154 -0
  4. package/dist/bin-http.d.ts +24 -0
  5. package/dist/bin-http.d.ts.map +1 -0
  6. package/dist/bin-http.js +275 -0
  7. package/dist/bin-http.js.map +1 -0
  8. package/dist/bin-setup.d.ts +11 -0
  9. package/dist/bin-setup.d.ts.map +1 -0
  10. package/dist/bin-setup.js +610 -0
  11. package/dist/bin-setup.js.map +1 -0
  12. package/dist/bin.d.ts +16 -0
  13. package/dist/bin.d.ts.map +1 -0
  14. package/dist/bin.js +16 -0
  15. package/dist/bin.js.map +1 -0
  16. package/dist/index.d.ts +13 -0
  17. package/dist/index.d.ts.map +1 -0
  18. package/dist/index.js +90 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/models/chunk.d.ts +136 -0
  21. package/dist/models/chunk.d.ts.map +1 -0
  22. package/dist/models/chunk.js +27 -0
  23. package/dist/models/chunk.js.map +1 -0
  24. package/dist/models/cluster.d.ts +79 -0
  25. package/dist/models/cluster.d.ts.map +1 -0
  26. package/dist/models/cluster.js +10 -0
  27. package/dist/models/cluster.js.map +1 -0
  28. package/dist/models/comparison.d.ts +62 -0
  29. package/dist/models/comparison.d.ts.map +1 -0
  30. package/dist/models/comparison.js +8 -0
  31. package/dist/models/comparison.js.map +1 -0
  32. package/dist/models/document.d.ts +104 -0
  33. package/dist/models/document.d.ts.map +1 -0
  34. package/dist/models/document.js +15 -0
  35. package/dist/models/document.js.map +1 -0
  36. package/dist/models/embedding.d.ts +87 -0
  37. package/dist/models/embedding.d.ts.map +1 -0
  38. package/dist/models/embedding.js +23 -0
  39. package/dist/models/embedding.js.map +1 -0
  40. package/dist/models/extraction.d.ts +15 -0
  41. package/dist/models/extraction.d.ts.map +1 -0
  42. package/dist/models/extraction.js +2 -0
  43. package/dist/models/extraction.js.map +1 -0
  44. package/dist/models/form-fill.d.ts +23 -0
  45. package/dist/models/form-fill.d.ts.map +1 -0
  46. package/dist/models/form-fill.js +2 -0
  47. package/dist/models/form-fill.js.map +1 -0
  48. package/dist/models/image.d.ts +177 -0
  49. package/dist/models/image.d.ts.map +1 -0
  50. package/dist/models/image.js +8 -0
  51. package/dist/models/image.js.map +1 -0
  52. package/dist/models/index.d.ts +14 -0
  53. package/dist/models/index.d.ts.map +1 -0
  54. package/dist/models/index.js +22 -0
  55. package/dist/models/index.js.map +1 -0
  56. package/dist/models/provenance.d.ts +174 -0
  57. package/dist/models/provenance.d.ts.map +1 -0
  58. package/dist/models/provenance.js +53 -0
  59. package/dist/models/provenance.js.map +1 -0
  60. package/dist/models/uploaded-file.d.ts +20 -0
  61. package/dist/models/uploaded-file.d.ts.map +1 -0
  62. package/dist/models/uploaded-file.js +2 -0
  63. package/dist/models/uploaded-file.js.map +1 -0
  64. package/dist/server/errors.d.ts +93 -0
  65. package/dist/server/errors.d.ts.map +1 -0
  66. package/dist/server/errors.js +256 -0
  67. package/dist/server/errors.js.map +1 -0
  68. package/dist/server/events.d.ts +36 -0
  69. package/dist/server/events.d.ts.map +1 -0
  70. package/dist/server/events.js +48 -0
  71. package/dist/server/events.js.map +1 -0
  72. package/dist/server/permissions.d.ts +26 -0
  73. package/dist/server/permissions.d.ts.map +1 -0
  74. package/dist/server/permissions.js +194 -0
  75. package/dist/server/permissions.js.map +1 -0
  76. package/dist/server/register-tools.d.ts +25 -0
  77. package/dist/server/register-tools.d.ts.map +1 -0
  78. package/dist/server/register-tools.js +102 -0
  79. package/dist/server/register-tools.js.map +1 -0
  80. package/dist/server/startup.d.ts +16 -0
  81. package/dist/server/startup.d.ts.map +1 -0
  82. package/dist/server/startup.js +37 -0
  83. package/dist/server/startup.js.map +1 -0
  84. package/dist/server/state.d.ts +166 -0
  85. package/dist/server/state.d.ts.map +1 -0
  86. package/dist/server/state.js +424 -0
  87. package/dist/server/state.js.map +1 -0
  88. package/dist/server/transports/http-transport.d.ts +37 -0
  89. package/dist/server/transports/http-transport.d.ts.map +1 -0
  90. package/dist/server/transports/http-transport.js +204 -0
  91. package/dist/server/transports/http-transport.js.map +1 -0
  92. package/dist/server/transports/index.d.ts +9 -0
  93. package/dist/server/transports/index.d.ts.map +1 -0
  94. package/dist/server/transports/index.js +9 -0
  95. package/dist/server/transports/index.js.map +1 -0
  96. package/dist/server/transports/session-manager.d.ts +40 -0
  97. package/dist/server/transports/session-manager.d.ts.map +1 -0
  98. package/dist/server/transports/session-manager.js +74 -0
  99. package/dist/server/transports/session-manager.js.map +1 -0
  100. package/dist/server/types.d.ts +82 -0
  101. package/dist/server/types.d.ts.map +1 -0
  102. package/dist/server/types.js +14 -0
  103. package/dist/server/types.js.map +1 -0
  104. package/dist/services/audit.d.ts +26 -0
  105. package/dist/services/audit.d.ts.map +1 -0
  106. package/dist/services/audit.js +43 -0
  107. package/dist/services/audit.js.map +1 -0
  108. package/dist/services/chunking/chunk-deduplicator.d.ts +33 -0
  109. package/dist/services/chunking/chunk-deduplicator.d.ts.map +1 -0
  110. package/dist/services/chunking/chunk-deduplicator.js +46 -0
  111. package/dist/services/chunking/chunk-deduplicator.js.map +1 -0
  112. package/dist/services/chunking/chunk-merger.d.ts +26 -0
  113. package/dist/services/chunking/chunk-merger.d.ts.map +1 -0
  114. package/dist/services/chunking/chunk-merger.js +94 -0
  115. package/dist/services/chunking/chunk-merger.js.map +1 -0
  116. package/dist/services/chunking/chunker.d.ts +62 -0
  117. package/dist/services/chunking/chunker.d.ts.map +1 -0
  118. package/dist/services/chunking/chunker.js +566 -0
  119. package/dist/services/chunking/chunker.js.map +1 -0
  120. package/dist/services/chunking/heading-normalizer.d.ts +33 -0
  121. package/dist/services/chunking/heading-normalizer.d.ts.map +1 -0
  122. package/dist/services/chunking/heading-normalizer.js +101 -0
  123. package/dist/services/chunking/heading-normalizer.js.map +1 -0
  124. package/dist/services/chunking/json-block-analyzer.d.ts +163 -0
  125. package/dist/services/chunking/json-block-analyzer.d.ts.map +1 -0
  126. package/dist/services/chunking/json-block-analyzer.js +1033 -0
  127. package/dist/services/chunking/json-block-analyzer.js.map +1 -0
  128. package/dist/services/chunking/markdown-parser.d.ts +75 -0
  129. package/dist/services/chunking/markdown-parser.d.ts.map +1 -0
  130. package/dist/services/chunking/markdown-parser.js +428 -0
  131. package/dist/services/chunking/markdown-parser.js.map +1 -0
  132. package/dist/services/chunking/text-normalizer.d.ts +20 -0
  133. package/dist/services/chunking/text-normalizer.d.ts.map +1 -0
  134. package/dist/services/chunking/text-normalizer.js +36 -0
  135. package/dist/services/chunking/text-normalizer.js.map +1 -0
  136. package/dist/services/clm/contract-schemas.d.ts +36 -0
  137. package/dist/services/clm/contract-schemas.d.ts.map +1 -0
  138. package/dist/services/clm/contract-schemas.js +92 -0
  139. package/dist/services/clm/contract-schemas.js.map +1 -0
  140. package/dist/services/clm/summarization.d.ts +46 -0
  141. package/dist/services/clm/summarization.d.ts.map +1 -0
  142. package/dist/services/clm/summarization.js +61 -0
  143. package/dist/services/clm/summarization.js.map +1 -0
  144. package/dist/services/clustering/clustering-service.d.ts +58 -0
  145. package/dist/services/clustering/clustering-service.d.ts.map +1 -0
  146. package/dist/services/clustering/clustering-service.js +467 -0
  147. package/dist/services/clustering/clustering-service.js.map +1 -0
  148. package/dist/services/comparison/diff-service.d.ts +41 -0
  149. package/dist/services/comparison/diff-service.d.ts.map +1 -0
  150. package/dist/services/comparison/diff-service.js +120 -0
  151. package/dist/services/comparison/diff-service.js.map +1 -0
  152. package/dist/services/embedding/embedder.d.ts +55 -0
  153. package/dist/services/embedding/embedder.d.ts.map +1 -0
  154. package/dist/services/embedding/embedder.js +202 -0
  155. package/dist/services/embedding/embedder.js.map +1 -0
  156. package/dist/services/embedding/nomic.d.ts +67 -0
  157. package/dist/services/embedding/nomic.d.ts.map +1 -0
  158. package/dist/services/embedding/nomic.js +280 -0
  159. package/dist/services/embedding/nomic.js.map +1 -0
  160. package/dist/services/gemini/circuit-breaker.d.ts +106 -0
  161. package/dist/services/gemini/circuit-breaker.d.ts.map +1 -0
  162. package/dist/services/gemini/circuit-breaker.js +237 -0
  163. package/dist/services/gemini/circuit-breaker.js.map +1 -0
  164. package/dist/services/gemini/client.d.ts +173 -0
  165. package/dist/services/gemini/client.d.ts.map +1 -0
  166. package/dist/services/gemini/client.js +483 -0
  167. package/dist/services/gemini/client.js.map +1 -0
  168. package/dist/services/gemini/config.d.ts +116 -0
  169. package/dist/services/gemini/config.d.ts.map +1 -0
  170. package/dist/services/gemini/config.js +118 -0
  171. package/dist/services/gemini/config.js.map +1 -0
  172. package/dist/services/gemini/index.d.ts +9 -0
  173. package/dist/services/gemini/index.d.ts.map +1 -0
  174. package/dist/services/gemini/index.js +13 -0
  175. package/dist/services/gemini/index.js.map +1 -0
  176. package/dist/services/gemini/rate-limiter.d.ts +62 -0
  177. package/dist/services/gemini/rate-limiter.d.ts.map +1 -0
  178. package/dist/services/gemini/rate-limiter.js +120 -0
  179. package/dist/services/gemini/rate-limiter.js.map +1 -0
  180. package/dist/services/images/extractor.d.ts +88 -0
  181. package/dist/services/images/extractor.d.ts.map +1 -0
  182. package/dist/services/images/extractor.js +340 -0
  183. package/dist/services/images/extractor.js.map +1 -0
  184. package/dist/services/images/optimizer.d.ts +130 -0
  185. package/dist/services/images/optimizer.d.ts.map +1 -0
  186. package/dist/services/images/optimizer.js +228 -0
  187. package/dist/services/images/optimizer.js.map +1 -0
  188. package/dist/services/ocr/datalab.d.ts +64 -0
  189. package/dist/services/ocr/datalab.d.ts.map +1 -0
  190. package/dist/services/ocr/datalab.js +425 -0
  191. package/dist/services/ocr/datalab.js.map +1 -0
  192. package/dist/services/ocr/errors.d.ts +38 -0
  193. package/dist/services/ocr/errors.d.ts.map +1 -0
  194. package/dist/services/ocr/errors.js +83 -0
  195. package/dist/services/ocr/errors.js.map +1 -0
  196. package/dist/services/ocr/file-manager.d.ts +76 -0
  197. package/dist/services/ocr/file-manager.d.ts.map +1 -0
  198. package/dist/services/ocr/file-manager.js +238 -0
  199. package/dist/services/ocr/file-manager.js.map +1 -0
  200. package/dist/services/ocr/form-fill.d.ts +48 -0
  201. package/dist/services/ocr/form-fill.d.ts.map +1 -0
  202. package/dist/services/ocr/form-fill.js +213 -0
  203. package/dist/services/ocr/form-fill.js.map +1 -0
  204. package/dist/services/ocr/processor.d.ts +95 -0
  205. package/dist/services/ocr/processor.d.ts.map +1 -0
  206. package/dist/services/ocr/processor.js +259 -0
  207. package/dist/services/ocr/processor.js.map +1 -0
  208. package/dist/services/provenance/agent-metadata.d.ts +82 -0
  209. package/dist/services/provenance/agent-metadata.d.ts.map +1 -0
  210. package/dist/services/provenance/agent-metadata.js +106 -0
  211. package/dist/services/provenance/agent-metadata.js.map +1 -0
  212. package/dist/services/provenance/chain-hash.d.ts +57 -0
  213. package/dist/services/provenance/chain-hash.d.ts.map +1 -0
  214. package/dist/services/provenance/chain-hash.js +131 -0
  215. package/dist/services/provenance/chain-hash.js.map +1 -0
  216. package/dist/services/provenance/exporter.d.ts +202 -0
  217. package/dist/services/provenance/exporter.d.ts.map +1 -0
  218. package/dist/services/provenance/exporter.js +457 -0
  219. package/dist/services/provenance/exporter.js.map +1 -0
  220. package/dist/services/provenance/index.d.ts +15 -0
  221. package/dist/services/provenance/index.d.ts.map +1 -0
  222. package/dist/services/provenance/index.js +17 -0
  223. package/dist/services/provenance/index.js.map +1 -0
  224. package/dist/services/provenance/tracker.d.ts +138 -0
  225. package/dist/services/provenance/tracker.d.ts.map +1 -0
  226. package/dist/services/provenance/tracker.js +293 -0
  227. package/dist/services/provenance/tracker.js.map +1 -0
  228. package/dist/services/provenance/verifier.d.ts +153 -0
  229. package/dist/services/provenance/verifier.d.ts.map +1 -0
  230. package/dist/services/provenance/verifier.js +536 -0
  231. package/dist/services/provenance/verifier.js.map +1 -0
  232. package/dist/services/python-pool.d.ts +70 -0
  233. package/dist/services/python-pool.d.ts.map +1 -0
  234. package/dist/services/python-pool.js +265 -0
  235. package/dist/services/python-pool.js.map +1 -0
  236. package/dist/services/search/bm25.d.ts +180 -0
  237. package/dist/services/search/bm25.d.ts.map +1 -0
  238. package/dist/services/search/bm25.js +656 -0
  239. package/dist/services/search/bm25.js.map +1 -0
  240. package/dist/services/search/fusion.d.ts +103 -0
  241. package/dist/services/search/fusion.d.ts.map +1 -0
  242. package/dist/services/search/fusion.js +122 -0
  243. package/dist/services/search/fusion.js.map +1 -0
  244. package/dist/services/search/local-reranker.d.ts +30 -0
  245. package/dist/services/search/local-reranker.d.ts.map +1 -0
  246. package/dist/services/search/local-reranker.js +123 -0
  247. package/dist/services/search/local-reranker.js.map +1 -0
  248. package/dist/services/search/quality.d.ts +11 -0
  249. package/dist/services/search/quality.d.ts.map +1 -0
  250. package/dist/services/search/quality.js +17 -0
  251. package/dist/services/search/quality.js.map +1 -0
  252. package/dist/services/search/query-classifier.d.ts +34 -0
  253. package/dist/services/search/query-classifier.d.ts.map +1 -0
  254. package/dist/services/search/query-classifier.js +114 -0
  255. package/dist/services/search/query-classifier.js.map +1 -0
  256. package/dist/services/search/query-expander.d.ts +73 -0
  257. package/dist/services/search/query-expander.d.ts.map +1 -0
  258. package/dist/services/search/query-expander.js +281 -0
  259. package/dist/services/search/query-expander.js.map +1 -0
  260. package/dist/services/search/reranker.d.ts +44 -0
  261. package/dist/services/search/reranker.d.ts.map +1 -0
  262. package/dist/services/search/reranker.js +101 -0
  263. package/dist/services/search/reranker.js.map +1 -0
  264. package/dist/services/storage/database/annotation-operations.d.ts +113 -0
  265. package/dist/services/storage/database/annotation-operations.d.ts.map +1 -0
  266. package/dist/services/storage/database/annotation-operations.js +177 -0
  267. package/dist/services/storage/database/annotation-operations.js.map +1 -0
  268. package/dist/services/storage/database/approval-operations.d.ts +132 -0
  269. package/dist/services/storage/database/approval-operations.d.ts.map +1 -0
  270. package/dist/services/storage/database/approval-operations.js +206 -0
  271. package/dist/services/storage/database/approval-operations.js.map +1 -0
  272. package/dist/services/storage/database/chunk-operations.d.ts +132 -0
  273. package/dist/services/storage/database/chunk-operations.d.ts.map +1 -0
  274. package/dist/services/storage/database/chunk-operations.js +306 -0
  275. package/dist/services/storage/database/chunk-operations.js.map +1 -0
  276. package/dist/services/storage/database/cluster-operations.d.ts +97 -0
  277. package/dist/services/storage/database/cluster-operations.d.ts.map +1 -0
  278. package/dist/services/storage/database/cluster-operations.js +258 -0
  279. package/dist/services/storage/database/cluster-operations.js.map +1 -0
  280. package/dist/services/storage/database/comparison-operations.d.ts +41 -0
  281. package/dist/services/storage/database/comparison-operations.d.ts.map +1 -0
  282. package/dist/services/storage/database/comparison-operations.js +65 -0
  283. package/dist/services/storage/database/comparison-operations.js.map +1 -0
  284. package/dist/services/storage/database/converters.d.ts +36 -0
  285. package/dist/services/storage/database/converters.d.ts.map +1 -0
  286. package/dist/services/storage/database/converters.js +244 -0
  287. package/dist/services/storage/database/converters.js.map +1 -0
  288. package/dist/services/storage/database/document-operations.d.ts +145 -0
  289. package/dist/services/storage/database/document-operations.d.ts.map +1 -0
  290. package/dist/services/storage/database/document-operations.js +498 -0
  291. package/dist/services/storage/database/document-operations.js.map +1 -0
  292. package/dist/services/storage/database/embedding-operations.d.ts +130 -0
  293. package/dist/services/storage/database/embedding-operations.d.ts.map +1 -0
  294. package/dist/services/storage/database/embedding-operations.js +315 -0
  295. package/dist/services/storage/database/embedding-operations.js.map +1 -0
  296. package/dist/services/storage/database/extraction-operations.d.ts +47 -0
  297. package/dist/services/storage/database/extraction-operations.d.ts.map +1 -0
  298. package/dist/services/storage/database/extraction-operations.js +85 -0
  299. package/dist/services/storage/database/extraction-operations.js.map +1 -0
  300. package/dist/services/storage/database/form-fill-operations.d.ts +58 -0
  301. package/dist/services/storage/database/form-fill-operations.d.ts.map +1 -0
  302. package/dist/services/storage/database/form-fill-operations.js +116 -0
  303. package/dist/services/storage/database/form-fill-operations.js.map +1 -0
  304. package/dist/services/storage/database/helpers.d.ts +29 -0
  305. package/dist/services/storage/database/helpers.d.ts.map +1 -0
  306. package/dist/services/storage/database/helpers.js +55 -0
  307. package/dist/services/storage/database/helpers.js.map +1 -0
  308. package/dist/services/storage/database/image-operations.d.ts +202 -0
  309. package/dist/services/storage/database/image-operations.d.ts.map +1 -0
  310. package/dist/services/storage/database/image-operations.js +484 -0
  311. package/dist/services/storage/database/image-operations.js.map +1 -0
  312. package/dist/services/storage/database/index.d.ts +13 -0
  313. package/dist/services/storage/database/index.d.ts.map +1 -0
  314. package/dist/services/storage/database/index.js +16 -0
  315. package/dist/services/storage/database/index.js.map +1 -0
  316. package/dist/services/storage/database/lock-operations.d.ts +59 -0
  317. package/dist/services/storage/database/lock-operations.d.ts.map +1 -0
  318. package/dist/services/storage/database/lock-operations.js +89 -0
  319. package/dist/services/storage/database/lock-operations.js.map +1 -0
  320. package/dist/services/storage/database/obligation-operations.d.ts +88 -0
  321. package/dist/services/storage/database/obligation-operations.d.ts.map +1 -0
  322. package/dist/services/storage/database/obligation-operations.js +206 -0
  323. package/dist/services/storage/database/obligation-operations.js.map +1 -0
  324. package/dist/services/storage/database/ocr-operations.d.ts +33 -0
  325. package/dist/services/storage/database/ocr-operations.d.ts.map +1 -0
  326. package/dist/services/storage/database/ocr-operations.js +70 -0
  327. package/dist/services/storage/database/ocr-operations.js.map +1 -0
  328. package/dist/services/storage/database/playbook-operations.d.ts +72 -0
  329. package/dist/services/storage/database/playbook-operations.d.ts.map +1 -0
  330. package/dist/services/storage/database/playbook-operations.js +247 -0
  331. package/dist/services/storage/database/playbook-operations.js.map +1 -0
  332. package/dist/services/storage/database/provenance-operations.d.ts +112 -0
  333. package/dist/services/storage/database/provenance-operations.d.ts.map +1 -0
  334. package/dist/services/storage/database/provenance-operations.js +251 -0
  335. package/dist/services/storage/database/provenance-operations.js.map +1 -0
  336. package/dist/services/storage/database/service.d.ts +142 -0
  337. package/dist/services/storage/database/service.d.ts.map +1 -0
  338. package/dist/services/storage/database/service.js +310 -0
  339. package/dist/services/storage/database/service.js.map +1 -0
  340. package/dist/services/storage/database/static-operations.d.ts +30 -0
  341. package/dist/services/storage/database/static-operations.d.ts.map +1 -0
  342. package/dist/services/storage/database/static-operations.js +218 -0
  343. package/dist/services/storage/database/static-operations.js.map +1 -0
  344. package/dist/services/storage/database/stats-operations.d.ts +101 -0
  345. package/dist/services/storage/database/stats-operations.d.ts.map +1 -0
  346. package/dist/services/storage/database/stats-operations.js +394 -0
  347. package/dist/services/storage/database/stats-operations.js.map +1 -0
  348. package/dist/services/storage/database/tag-operations.d.ts +76 -0
  349. package/dist/services/storage/database/tag-operations.d.ts.map +1 -0
  350. package/dist/services/storage/database/tag-operations.js +178 -0
  351. package/dist/services/storage/database/tag-operations.js.map +1 -0
  352. package/dist/services/storage/database/types.d.ts +286 -0
  353. package/dist/services/storage/database/types.d.ts.map +1 -0
  354. package/dist/services/storage/database/types.js +39 -0
  355. package/dist/services/storage/database/types.js.map +1 -0
  356. package/dist/services/storage/database/upload-operations.d.ts +71 -0
  357. package/dist/services/storage/database/upload-operations.d.ts.map +1 -0
  358. package/dist/services/storage/database/upload-operations.js +124 -0
  359. package/dist/services/storage/database/upload-operations.js.map +1 -0
  360. package/dist/services/storage/database/user-operations.d.ts +102 -0
  361. package/dist/services/storage/database/user-operations.d.ts.map +1 -0
  362. package/dist/services/storage/database/user-operations.js +151 -0
  363. package/dist/services/storage/database/user-operations.js.map +1 -0
  364. package/dist/services/storage/database/workflow-operations.d.ts +98 -0
  365. package/dist/services/storage/database/workflow-operations.d.ts.map +1 -0
  366. package/dist/services/storage/database/workflow-operations.js +157 -0
  367. package/dist/services/storage/database/workflow-operations.js.map +1 -0
  368. package/dist/services/storage/database.d.ts +16 -0
  369. package/dist/services/storage/database.d.ts.map +1 -0
  370. package/dist/services/storage/database.js +15 -0
  371. package/dist/services/storage/database.js.map +1 -0
  372. package/dist/services/storage/index.d.ts +10 -0
  373. package/dist/services/storage/index.d.ts.map +1 -0
  374. package/dist/services/storage/index.js +10 -0
  375. package/dist/services/storage/index.js.map +1 -0
  376. package/dist/services/storage/migrations/index.d.ts +16 -0
  377. package/dist/services/storage/migrations/index.d.ts.map +1 -0
  378. package/dist/services/storage/migrations/index.js +20 -0
  379. package/dist/services/storage/migrations/index.js.map +1 -0
  380. package/dist/services/storage/migrations/operations.d.ts +40 -0
  381. package/dist/services/storage/migrations/operations.d.ts.map +1 -0
  382. package/dist/services/storage/migrations/operations.js +2910 -0
  383. package/dist/services/storage/migrations/operations.js.map +1 -0
  384. package/dist/services/storage/migrations/schema-definitions.d.ts +306 -0
  385. package/dist/services/storage/migrations/schema-definitions.d.ts.map +1 -0
  386. package/dist/services/storage/migrations/schema-definitions.js +1006 -0
  387. package/dist/services/storage/migrations/schema-definitions.js.map +1 -0
  388. package/dist/services/storage/migrations/schema-helpers.d.ts +50 -0
  389. package/dist/services/storage/migrations/schema-helpers.d.ts.map +1 -0
  390. package/dist/services/storage/migrations/schema-helpers.js +176 -0
  391. package/dist/services/storage/migrations/schema-helpers.js.map +1 -0
  392. package/dist/services/storage/migrations/types.d.ts +15 -0
  393. package/dist/services/storage/migrations/types.d.ts.map +1 -0
  394. package/dist/services/storage/migrations/types.js +21 -0
  395. package/dist/services/storage/migrations/types.js.map +1 -0
  396. package/dist/services/storage/migrations/verification.d.ts +20 -0
  397. package/dist/services/storage/migrations/verification.d.ts.map +1 -0
  398. package/dist/services/storage/migrations/verification.js +78 -0
  399. package/dist/services/storage/migrations/verification.js.map +1 -0
  400. package/dist/services/storage/migrations.d.ts +16 -0
  401. package/dist/services/storage/migrations.d.ts.map +1 -0
  402. package/dist/services/storage/migrations.js +17 -0
  403. package/dist/services/storage/migrations.js.map +1 -0
  404. package/dist/services/storage/types.d.ts +12 -0
  405. package/dist/services/storage/types.d.ts.map +1 -0
  406. package/dist/services/storage/types.js +5 -0
  407. package/dist/services/storage/types.js.map +1 -0
  408. package/dist/services/storage/vector.d.ts +208 -0
  409. package/dist/services/storage/vector.d.ts.map +1 -0
  410. package/dist/services/storage/vector.js +526 -0
  411. package/dist/services/storage/vector.js.map +1 -0
  412. package/dist/services/vlm/pipeline.d.ts +194 -0
  413. package/dist/services/vlm/pipeline.d.ts.map +1 -0
  414. package/dist/services/vlm/pipeline.js +800 -0
  415. package/dist/services/vlm/pipeline.js.map +1 -0
  416. package/dist/services/vlm/prompts.d.ts +171 -0
  417. package/dist/services/vlm/prompts.d.ts.map +1 -0
  418. package/dist/services/vlm/prompts.js +229 -0
  419. package/dist/services/vlm/prompts.js.map +1 -0
  420. package/dist/services/vlm/service.d.ts +174 -0
  421. package/dist/services/vlm/service.d.ts.map +1 -0
  422. package/dist/services/vlm/service.js +256 -0
  423. package/dist/services/vlm/service.js.map +1 -0
  424. package/dist/services/webhook-delivery.d.ts +4 -0
  425. package/dist/services/webhook-delivery.d.ts.map +1 -0
  426. package/dist/services/webhook-delivery.js +140 -0
  427. package/dist/services/webhook-delivery.js.map +1 -0
  428. package/dist/tools/chunks.d.ts +19 -0
  429. package/dist/tools/chunks.d.ts.map +1 -0
  430. package/dist/tools/chunks.js +392 -0
  431. package/dist/tools/chunks.js.map +1 -0
  432. package/dist/tools/clm.d.ts +16 -0
  433. package/dist/tools/clm.d.ts.map +1 -0
  434. package/dist/tools/clm.js +668 -0
  435. package/dist/tools/clm.js.map +1 -0
  436. package/dist/tools/clustering.d.ts +13 -0
  437. package/dist/tools/clustering.d.ts.map +1 -0
  438. package/dist/tools/clustering.js +498 -0
  439. package/dist/tools/clustering.js.map +1 -0
  440. package/dist/tools/collaboration.d.ts +15 -0
  441. package/dist/tools/collaboration.d.ts.map +1 -0
  442. package/dist/tools/collaboration.js +516 -0
  443. package/dist/tools/collaboration.js.map +1 -0
  444. package/dist/tools/comparison.d.ts +13 -0
  445. package/dist/tools/comparison.d.ts.map +1 -0
  446. package/dist/tools/comparison.js +735 -0
  447. package/dist/tools/comparison.js.map +1 -0
  448. package/dist/tools/compliance.d.ts +15 -0
  449. package/dist/tools/compliance.d.ts.map +1 -0
  450. package/dist/tools/compliance.js +640 -0
  451. package/dist/tools/compliance.js.map +1 -0
  452. package/dist/tools/config.d.ts +19 -0
  453. package/dist/tools/config.d.ts.map +1 -0
  454. package/dist/tools/config.js +213 -0
  455. package/dist/tools/config.js.map +1 -0
  456. package/dist/tools/database.d.ts +62 -0
  457. package/dist/tools/database.d.ts.map +1 -0
  458. package/dist/tools/database.js +288 -0
  459. package/dist/tools/database.js.map +1 -0
  460. package/dist/tools/documents.d.ts +61 -0
  461. package/dist/tools/documents.d.ts.map +1 -0
  462. package/dist/tools/documents.js +1624 -0
  463. package/dist/tools/documents.js.map +1 -0
  464. package/dist/tools/embeddings.d.ts +14 -0
  465. package/dist/tools/embeddings.d.ts.map +1 -0
  466. package/dist/tools/embeddings.js +626 -0
  467. package/dist/tools/embeddings.js.map +1 -0
  468. package/dist/tools/evaluation.d.ts +25 -0
  469. package/dist/tools/evaluation.d.ts.map +1 -0
  470. package/dist/tools/evaluation.js +523 -0
  471. package/dist/tools/evaluation.js.map +1 -0
  472. package/dist/tools/events.d.ts +16 -0
  473. package/dist/tools/events.d.ts.map +1 -0
  474. package/dist/tools/events.js +493 -0
  475. package/dist/tools/events.js.map +1 -0
  476. package/dist/tools/extraction-structured.d.ts +13 -0
  477. package/dist/tools/extraction-structured.d.ts.map +1 -0
  478. package/dist/tools/extraction-structured.js +390 -0
  479. package/dist/tools/extraction-structured.js.map +1 -0
  480. package/dist/tools/extraction.d.ts +24 -0
  481. package/dist/tools/extraction.d.ts.map +1 -0
  482. package/dist/tools/extraction.js +424 -0
  483. package/dist/tools/extraction.js.map +1 -0
  484. package/dist/tools/file-management.d.ts +14 -0
  485. package/dist/tools/file-management.d.ts.map +1 -0
  486. package/dist/tools/file-management.js +523 -0
  487. package/dist/tools/file-management.js.map +1 -0
  488. package/dist/tools/form-fill.d.ts +13 -0
  489. package/dist/tools/form-fill.d.ts.map +1 -0
  490. package/dist/tools/form-fill.js +250 -0
  491. package/dist/tools/form-fill.js.map +1 -0
  492. package/dist/tools/health.d.ts +19 -0
  493. package/dist/tools/health.d.ts.map +1 -0
  494. package/dist/tools/health.js +229 -0
  495. package/dist/tools/health.js.map +1 -0
  496. package/dist/tools/images.d.ts +54 -0
  497. package/dist/tools/images.d.ts.map +1 -0
  498. package/dist/tools/images.js +787 -0
  499. package/dist/tools/images.js.map +1 -0
  500. package/dist/tools/ingestion.d.ts +94 -0
  501. package/dist/tools/ingestion.d.ts.map +1 -0
  502. package/dist/tools/ingestion.js +1659 -0
  503. package/dist/tools/ingestion.js.map +1 -0
  504. package/dist/tools/intelligence.d.ts +18 -0
  505. package/dist/tools/intelligence.d.ts.map +1 -0
  506. package/dist/tools/intelligence.js +1039 -0
  507. package/dist/tools/intelligence.js.map +1 -0
  508. package/dist/tools/provenance.d.ts +51 -0
  509. package/dist/tools/provenance.d.ts.map +1 -0
  510. package/dist/tools/provenance.js +691 -0
  511. package/dist/tools/provenance.js.map +1 -0
  512. package/dist/tools/reports.d.ts +41 -0
  513. package/dist/tools/reports.d.ts.map +1 -0
  514. package/dist/tools/reports.js +1394 -0
  515. package/dist/tools/reports.js.map +1 -0
  516. package/dist/tools/search.d.ts +35 -0
  517. package/dist/tools/search.d.ts.map +1 -0
  518. package/dist/tools/search.js +2528 -0
  519. package/dist/tools/search.js.map +1 -0
  520. package/dist/tools/shared.d.ts +52 -0
  521. package/dist/tools/shared.d.ts.map +1 -0
  522. package/dist/tools/shared.js +54 -0
  523. package/dist/tools/shared.js.map +1 -0
  524. package/dist/tools/tags.d.ts +15 -0
  525. package/dist/tools/tags.d.ts.map +1 -0
  526. package/dist/tools/tags.js +287 -0
  527. package/dist/tools/tags.js.map +1 -0
  528. package/dist/tools/timeline.d.ts +15 -0
  529. package/dist/tools/timeline.d.ts.map +1 -0
  530. package/dist/tools/timeline.js +14 -0
  531. package/dist/tools/timeline.js.map +1 -0
  532. package/dist/tools/users.d.ts +14 -0
  533. package/dist/tools/users.d.ts.map +1 -0
  534. package/dist/tools/users.js +257 -0
  535. package/dist/tools/users.js.map +1 -0
  536. package/dist/tools/vlm.d.ts +40 -0
  537. package/dist/tools/vlm.d.ts.map +1 -0
  538. package/dist/tools/vlm.js +475 -0
  539. package/dist/tools/vlm.js.map +1 -0
  540. package/dist/tools/workflow.d.ts +16 -0
  541. package/dist/tools/workflow.d.ts.map +1 -0
  542. package/dist/tools/workflow.js +495 -0
  543. package/dist/tools/workflow.js.map +1 -0
  544. package/dist/utils/backoff.d.ts +53 -0
  545. package/dist/utils/backoff.d.ts.map +1 -0
  546. package/dist/utils/backoff.js +78 -0
  547. package/dist/utils/backoff.js.map +1 -0
  548. package/dist/utils/config-persistence.d.ts +33 -0
  549. package/dist/utils/config-persistence.d.ts.map +1 -0
  550. package/dist/utils/config-persistence.js +61 -0
  551. package/dist/utils/config-persistence.js.map +1 -0
  552. package/dist/utils/hash.d.ts +65 -0
  553. package/dist/utils/hash.d.ts.map +1 -0
  554. package/dist/utils/hash.js +146 -0
  555. package/dist/utils/hash.js.map +1 -0
  556. package/dist/utils/math.d.ts +21 -0
  557. package/dist/utils/math.d.ts.map +1 -0
  558. package/dist/utils/math.js +39 -0
  559. package/dist/utils/math.js.map +1 -0
  560. package/dist/utils/validation.d.ts +697 -0
  561. package/dist/utils/validation.d.ts.map +1 -0
  562. package/dist/utils/validation.js +529 -0
  563. package/dist/utils/validation.js.map +1 -0
  564. package/package.json +96 -0
  565. package/python/.gitkeep +0 -0
  566. package/python/__init__.py +104 -0
  567. package/python/clustering_worker.py +440 -0
  568. package/python/docx_image_extractor.py +524 -0
  569. package/python/embedding_worker.py +552 -0
  570. package/python/file_manager_worker.py +564 -0
  571. package/python/form_fill_worker.py +399 -0
  572. package/python/gpu_utils.py +582 -0
  573. package/python/image_extractor.py +317 -0
  574. package/python/image_optimizer.py +444 -0
  575. package/python/ocr_worker.py +712 -0
  576. package/python/pyproject.toml +76 -0
  577. package/python/requirements.txt +51 -0
  578. package/python/reranker_worker.py +87 -0
@@ -0,0 +1,566 @@
1
+ /**
2
+ * Hybrid Section-Aware Chunking Service for OCR Provenance MCP System
3
+ *
4
+ * Uses markdown structure (headings, paragraphs, tables), JSON block data
5
+ * (for atomic region detection), and page offsets (for page tracking) to
6
+ * produce semantically coherent chunks with provenance records (chain_depth=2).
7
+ *
8
+ * @module services/chunking/chunker
9
+ */
10
+ import { DEFAULT_CHUNKING_CONFIG, getOverlapCharacters, } from '../../models/chunk.js';
11
+ import { ProvenanceType, } from '../../models/provenance.js';
12
+ import { parseMarkdownBlocks, buildSectionHierarchy, getPageNumberForOffset, } from './markdown-parser.js';
13
+ import { findAtomicRegions, isOffsetInAtomicRegion, extractTableStructures, extractHeadersFromMarkdown, countTableDimensionsFromMarkdown, extractFirstDataRow, generateTableSummary, } from './json-block-analyzer.js';
14
+ import { normalizeHeadingLevels } from './heading-normalizer.js';
15
+ import { mergeHeadingOnlyChunks } from './chunk-merger.js';
16
+ /**
17
+ * Strip HTML tags from text and collapse whitespace.
18
+ * Used to clean table chunk text before FTS5 indexing.
19
+ */
20
+ function stripHtmlForFTS(text) {
21
+ return text.replace(/<[^>]+>/g, '').replace(/\s{2,}/g, ' ');
22
+ }
23
+ /**
24
+ * Create table metadata directly from chunk text by parsing pipe-delimited markdown.
25
+ * Used as a fallback when offset-based table structure matching fails
26
+ * (common with DOCX tables where locateBlockInMarkdown cannot find the table).
27
+ */
28
+ function createTableMetadataFromText(chunkText) {
29
+ // Only attempt if text contains pipe-delimited table patterns
30
+ const pipeLines = chunkText.split('\n').filter(l => l.includes('|'));
31
+ if (pipeLines.length < 2) {
32
+ return null;
33
+ }
34
+ const columnHeaders = extractHeadersFromMarkdown(chunkText);
35
+ const { rowCount, columnCount } = countTableDimensionsFromMarkdown(chunkText);
36
+ const firstRowValues = extractFirstDataRow(chunkText);
37
+ const summary = generateTableSummary(columnHeaders, rowCount, firstRowValues);
38
+ return {
39
+ columnHeaders,
40
+ rowCount,
41
+ columnCount: columnCount > 0 ? columnCount : columnHeaders.length,
42
+ summary,
43
+ };
44
+ }
45
+ function createEmptyAccumulator(startOffset) {
46
+ return {
47
+ text: '',
48
+ blocks: [],
49
+ startOffset,
50
+ contentTypes: new Set(),
51
+ };
52
+ }
53
+ function accumulatorHasContent(acc) {
54
+ return acc.text.trim().length > 0;
55
+ }
56
+ function addBlockToAccumulator(acc, block) {
57
+ if (acc.text.length > 0) {
58
+ acc.text += '\n\n';
59
+ }
60
+ acc.text += block.text;
61
+ acc.blocks.push(block);
62
+ acc.contentTypes.add(mapBlockTypeToContentType(block.type));
63
+ }
64
+ function mapBlockTypeToContentType(blockType) {
65
+ switch (blockType) {
66
+ case 'heading': return 'heading';
67
+ case 'table': return 'table';
68
+ case 'code': return 'code';
69
+ case 'list': return 'list';
70
+ case 'paragraph': return 'text';
71
+ default: return 'text';
72
+ }
73
+ }
74
+ // ---------------------------------------------------------------------------
75
+ // Sentence boundary detection
76
+ // ---------------------------------------------------------------------------
77
+ /**
78
+ * Find a sentence boundary position for splitting text.
79
+ *
80
+ * Scans backward from `maxPos` looking for sentence-ending punctuation,
81
+ * paragraph breaks, line breaks, or spaces. Returns the position just
82
+ * after the boundary character (i.e., the start of the next sentence).
83
+ *
84
+ * @param text - The text to scan
85
+ * @param maxPos - Maximum position (typically chunkSize)
86
+ * @returns Position to split at
87
+ */
88
+ function findSentenceBoundary(text, maxPos) {
89
+ const searchStart = Math.max(0, maxPos - 500);
90
+ // Priority 1: Sentence endings (. ? !) followed by whitespace
91
+ for (let i = maxPos; i >= searchStart; i--) {
92
+ const ch = text[i];
93
+ if ((ch === '.' || ch === '?' || ch === '!') && i + 1 < text.length) {
94
+ const next = text[i + 1];
95
+ if (next === ' ' || next === '\n') {
96
+ return i + 1; // Split after the punctuation
97
+ }
98
+ }
99
+ }
100
+ // Priority 2: Paragraph break (\n\n)
101
+ for (let i = maxPos; i >= searchStart + 1; i--) {
102
+ if (text[i] === '\n' && text[i - 1] === '\n') {
103
+ return i + 1;
104
+ }
105
+ }
106
+ // Priority 3: Line break (\n)
107
+ for (let i = maxPos; i >= searchStart; i--) {
108
+ if (text[i] === '\n') {
109
+ return i + 1;
110
+ }
111
+ }
112
+ // Priority 4: Any space
113
+ for (let i = maxPos; i >= searchStart; i--) {
114
+ if (text[i] === ' ') {
115
+ return i + 1;
116
+ }
117
+ }
118
+ // Last resort: force split at maxPos
119
+ return maxPos;
120
+ }
121
+ // ---------------------------------------------------------------------------
122
+ // Page info determination
123
+ // ---------------------------------------------------------------------------
124
+ /**
125
+ * Determine page number and page range for a character span.
126
+ */
127
+ function determinePageInfoForSpan(startOffset, endOffset, pageOffsets) {
128
+ if (pageOffsets.length === 0) {
129
+ return { pageNumber: null, pageRange: null };
130
+ }
131
+ const startPage = getPageNumberForOffset(startOffset, pageOffsets);
132
+ const endPage = getPageNumberForOffset(Math.max(startOffset, endOffset - 1), pageOffsets);
133
+ if (startPage === null) {
134
+ return { pageNumber: null, pageRange: null };
135
+ }
136
+ if (endPage === null || startPage === endPage) {
137
+ return { pageNumber: startPage, pageRange: null };
138
+ }
139
+ return {
140
+ pageNumber: startPage,
141
+ pageRange: `${startPage}-${endPage}`,
142
+ };
143
+ }
144
+ // ---------------------------------------------------------------------------
145
+ // Main hybrid chunking function
146
+ // ---------------------------------------------------------------------------
147
+ /**
148
+ * Hybrid section-aware chunking.
149
+ *
150
+ * Uses markdown structure (headings, paragraphs, tables), JSON block data
151
+ * (for atomic region detection), and page offsets (for page tracking) to
152
+ * produce semantically coherent chunks.
153
+ *
154
+ * @param text - Full markdown text from OCR output
155
+ * @param pageOffsets - Page offset information for page number assignment
156
+ * @param jsonBlocks - JSON block hierarchy from Datalab OCR (may be null)
157
+ * @param config - Chunking configuration (default: 2000 chars, 10% overlap)
158
+ * @returns Array of ChunkResult with section context, content types, and page info
159
+ */
160
+ export function chunkHybridSectionAware(text, pageOffsets, jsonBlocks, config = DEFAULT_CHUNKING_CONFIG) {
161
+ // 1. Empty text returns empty array
162
+ if (text.length === 0) {
163
+ return [];
164
+ }
165
+ // 2. Parse markdown blocks
166
+ const blocks = parseMarkdownBlocks(text, pageOffsets);
167
+ // 3. If blocks is empty but text is not, something is wrong
168
+ if (blocks.length === 0) {
169
+ throw new Error(`Markdown parser returned no blocks for non-empty text (${text.length} chars)`);
170
+ }
171
+ // 3.5. Normalize heading levels if configured
172
+ if (config.headingNormalization) {
173
+ normalizeHeadingLevels(blocks, config.headingNormalization);
174
+ }
175
+ // 4. Build section hierarchy
176
+ const sections = buildSectionHierarchy(blocks);
177
+ // 5. Find atomic regions from JSON blocks
178
+ const atomicRegions = findAtomicRegions(jsonBlocks, text, pageOffsets);
179
+ // 5.5. Extract table structures for column header context (Task 7.2)
180
+ const tableStructures = extractTableStructures(jsonBlocks, text, pageOffsets);
181
+ /**
182
+ * Find a table structure whose offset range overlaps a given block.
183
+ */
184
+ function findTableStructureForBlock(block) {
185
+ for (const ts of tableStructures) {
186
+ // Check if block overlaps this table structure
187
+ if (block.startOffset < ts.endOffset && block.endOffset > ts.startOffset) {
188
+ return ts;
189
+ }
190
+ }
191
+ return null;
192
+ }
193
+ /**
194
+ * Find table metadata for a chunk based on its offset range.
195
+ * Returns metadata if the chunk overlaps with a known table structure.
196
+ */
197
+ function findTableMetadata(offset, length) {
198
+ const end = offset + length;
199
+ for (const ts of tableStructures) {
200
+ // Check if chunk overlaps with table structure
201
+ if (offset <= ts.endOffset && end >= ts.startOffset) {
202
+ return {
203
+ columnHeaders: ts.columnHeaders,
204
+ rowCount: ts.rowCount,
205
+ columnCount: ts.columnCount,
206
+ summary: ts.summary,
207
+ caption: ts.caption,
208
+ continuationOf: ts.continuationOf,
209
+ };
210
+ }
211
+ }
212
+ return null;
213
+ }
214
+ /**
215
+ * Build a column header prefix string for a table chunk.
216
+ * Format: "[Table: col1 | col2 | col3] "
217
+ */
218
+ function buildTableHeaderPrefix(ts) {
219
+ if (ts.columnHeaders.length === 0)
220
+ return '';
221
+ return `[Table: ${ts.columnHeaders.join(' | ')}] `;
222
+ }
223
+ // 6. Walk blocks, accumulating into chunks
224
+ const chunks = [];
225
+ let accumulator = createEmptyAccumulator(0);
226
+ let currentSectionPath = null;
227
+ let currentHeadingText = null;
228
+ let currentHeadingLevel = null;
229
+ let chunkIndex = 0;
230
+ const overlapSize = getOverlapCharacters(config);
231
+ /**
232
+ * Flush the accumulator as a chunk and reset it.
233
+ */
234
+ function flushAccumulator(isAtomic) {
235
+ if (!accumulatorHasContent(accumulator)) {
236
+ return;
237
+ }
238
+ const chunkText = accumulator.text;
239
+ const startOff = accumulator.startOffset;
240
+ const endOff = startOff + chunkText.length;
241
+ const pageInfo = determinePageInfoForSpan(startOff, endOff, pageOffsets);
242
+ // Check if this flushed chunk overlaps with a table structure
243
+ const hasTableContent = accumulator.contentTypes.has('table');
244
+ const tableMetaForFlushed = hasTableContent
245
+ ? (findTableMetadata(startOff, endOff - startOff) ?? createTableMetadataFromText(chunkText))
246
+ : null;
247
+ chunks.push({
248
+ index: chunkIndex++,
249
+ text: chunkText,
250
+ startOffset: startOff,
251
+ endOffset: endOff,
252
+ overlapWithPrevious: 0, // Set in post-processing
253
+ overlapWithNext: 0, // Set in post-processing
254
+ pageNumber: pageInfo.pageNumber,
255
+ pageRange: pageInfo.pageRange,
256
+ headingContext: currentHeadingText,
257
+ headingLevel: currentHeadingLevel,
258
+ sectionPath: currentSectionPath,
259
+ contentTypes: Array.from(accumulator.contentTypes),
260
+ isAtomic,
261
+ tableMetadata: tableMetaForFlushed,
262
+ });
263
+ }
264
+ /**
265
+ * Emit a single block as an atomic chunk (table, code, or JSON-detected region).
266
+ * For table blocks, prepends column header context if available (Task 7.2).
267
+ */
268
+ function emitAtomicChunk(block) {
269
+ // Guard: reject empty blocks (same as flushAccumulator's accumulatorHasContent)
270
+ if (block.text.trim().length === 0) {
271
+ console.error(`[chunker] Skipping empty atomic block at offset ${block.startOffset}-${block.endOffset} ` +
272
+ `(type=${block.type}, raw length=${block.text.length})`);
273
+ return;
274
+ }
275
+ const startOff = block.startOffset;
276
+ const endOff = block.endOffset;
277
+ const pageInfo = determinePageInfoForSpan(startOff, endOff, pageOffsets);
278
+ // Task 7.2: Prepend column header context for table chunks
279
+ let chunkText = block.text;
280
+ if (block.type === 'table') {
281
+ // Strip HTML tags from table text for clean FTS indexing
282
+ chunkText = stripHtmlForFTS(chunkText);
283
+ const ts = findTableStructureForBlock(block);
284
+ if (ts) {
285
+ const prefix = buildTableHeaderPrefix(ts);
286
+ if (prefix.length > 0) {
287
+ chunkText = prefix + chunkText;
288
+ }
289
+ }
290
+ }
291
+ // Post-processing guard: HTML stripping or prefix may leave empty text
292
+ if (chunkText.trim().length === 0) {
293
+ console.error(`[chunker] Atomic chunk became empty after processing at offset ${startOff}-${endOff} ` +
294
+ `(type=${block.type}, original length=${block.text.length})`);
295
+ return;
296
+ }
297
+ const tableMetaForAtomicChunk = block.type === 'table'
298
+ ? (findTableMetadata(startOff, endOff - startOff) ?? createTableMetadataFromText(chunkText))
299
+ : null;
300
+ chunks.push({
301
+ index: chunkIndex++,
302
+ text: chunkText,
303
+ startOffset: startOff,
304
+ endOffset: endOff,
305
+ overlapWithPrevious: 0,
306
+ overlapWithNext: 0,
307
+ pageNumber: pageInfo.pageNumber,
308
+ pageRange: pageInfo.pageRange,
309
+ headingContext: currentHeadingText,
310
+ headingLevel: currentHeadingLevel,
311
+ sectionPath: currentSectionPath,
312
+ contentTypes: [mapBlockTypeToContentType(block.type)],
313
+ isAtomic: true,
314
+ tableMetadata: tableMetaForAtomicChunk,
315
+ });
316
+ }
317
+ /**
318
+ * Emit an atomic block with size awareness: if the block exceeds
319
+ * maxChunkSize, split it at line boundaries (row breaks for tables,
320
+ * line breaks for code). Each sub-chunk inherits atomic status.
321
+ * For table blocks, prepends column header context to each sub-chunk (Task 7.2).
322
+ */
323
+ function emitSizedAtomicChunk(block) {
324
+ if (block.text.length <= config.maxChunkSize) {
325
+ emitAtomicChunk(block);
326
+ return;
327
+ }
328
+ // Task 7.2: Get table header prefix for table blocks
329
+ let tablePrefix = '';
330
+ if (block.type === 'table') {
331
+ const ts = findTableStructureForBlock(block);
332
+ if (ts) {
333
+ tablePrefix = buildTableHeaderPrefix(ts);
334
+ }
335
+ }
336
+ // Split oversized atomic block at line boundaries
337
+ // Strip HTML tags from table text for clean FTS indexing
338
+ const blockText = block.type === 'table' ? stripHtmlForFTS(block.text) : block.text;
339
+ let pos = 0;
340
+ while (pos < blockText.length) {
341
+ let endPos;
342
+ if (blockText.length - pos <= config.maxChunkSize) {
343
+ // Remaining text fits in one chunk
344
+ endPos = blockText.length;
345
+ }
346
+ else {
347
+ // Find last newline before maxChunkSize boundary
348
+ endPos = blockText.lastIndexOf('\n', pos + config.maxChunkSize);
349
+ if (endPos <= pos) {
350
+ // No newline found within range, force split at maxChunkSize
351
+ endPos = pos + config.maxChunkSize;
352
+ }
353
+ }
354
+ let chunkText = blockText.slice(pos, endPos);
355
+ if (chunkText.trim().length > 0) {
356
+ // Task 7.2: Prepend column header context to each table sub-chunk
357
+ if (tablePrefix.length > 0) {
358
+ chunkText = tablePrefix + chunkText;
359
+ }
360
+ const startOff = block.startOffset + pos;
361
+ const endOff = block.startOffset + endPos;
362
+ const pageInfo = determinePageInfoForSpan(startOff, endOff, pageOffsets);
363
+ const tableMetaForSubChunk = block.type === 'table'
364
+ ? (findTableMetadata(startOff, endOff - startOff) ?? createTableMetadataFromText(chunkText))
365
+ : null;
366
+ chunks.push({
367
+ index: chunkIndex++,
368
+ text: chunkText,
369
+ startOffset: startOff,
370
+ endOffset: endOff,
371
+ overlapWithPrevious: 0,
372
+ overlapWithNext: 0,
373
+ pageNumber: pageInfo.pageNumber,
374
+ pageRange: pageInfo.pageRange,
375
+ headingContext: currentHeadingText,
376
+ headingLevel: currentHeadingLevel,
377
+ sectionPath: currentSectionPath,
378
+ contentTypes: [mapBlockTypeToContentType(block.type)],
379
+ isAtomic: true,
380
+ tableMetadata: tableMetaForSubChunk,
381
+ });
382
+ }
383
+ // Advance past the split point (skip newline if present)
384
+ pos = endPos < blockText.length && blockText[endPos] === '\n' ? endPos + 1 : endPos;
385
+ }
386
+ }
387
+ for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) {
388
+ const block = blocks[blockIdx];
389
+ // Skip empty and page_marker blocks
390
+ if (block.type === 'empty' || block.type === 'page_marker') {
391
+ continue;
392
+ }
393
+ // Get section info for this block
394
+ const sectionNode = sections.get(blockIdx);
395
+ if (sectionNode) {
396
+ currentSectionPath = sectionNode.path;
397
+ }
398
+ if (block.type === 'heading') {
399
+ // Flush accumulator before starting new section
400
+ flushAccumulator(false);
401
+ // Update heading context
402
+ currentHeadingText = block.headingText;
403
+ currentHeadingLevel = block.headingLevel;
404
+ // Start new accumulator with the heading
405
+ accumulator = createEmptyAccumulator(block.startOffset);
406
+ addBlockToAccumulator(accumulator, block);
407
+ }
408
+ else if (block.type === 'table' || block.type === 'code') {
409
+ // Size-aware atomic treatment: only emit as atomic if block is large enough
410
+ // to produce meaningful standalone embeddings. Small tables/code blocks are
411
+ // merged into surrounding content for better embedding quality.
412
+ const minAtomicSize = Math.floor(config.chunkSize / 4);
413
+ if (block.text.length >= minAtomicSize) {
414
+ // Large table/code → atomic chunk (with oversized splitting)
415
+ flushAccumulator(false);
416
+ emitSizedAtomicChunk(block);
417
+ accumulator = createEmptyAccumulator(block.endOffset);
418
+ }
419
+ else {
420
+ // Small table/code → treat as regular content, merge into accumulator
421
+ addBlockToAccumulator(accumulator, block);
422
+ // Check if accumulator exceeds chunk size (same logic as paragraph branch)
423
+ if (accumulator.text.length > config.chunkSize) {
424
+ const splitPos = findSentenceBoundary(accumulator.text, config.chunkSize);
425
+ const fullText = accumulator.text;
426
+ const savedStartOffset = accumulator.startOffset;
427
+ const savedContentTypes = new Set(accumulator.contentTypes);
428
+ accumulator.text = fullText.slice(0, splitPos);
429
+ flushAccumulator(false);
430
+ const remainder = fullText.slice(splitPos);
431
+ accumulator = createEmptyAccumulator(savedStartOffset + splitPos);
432
+ accumulator.text = remainder;
433
+ accumulator.contentTypes = savedContentTypes;
434
+ }
435
+ }
436
+ }
437
+ else {
438
+ // Regular content (paragraph, list)
439
+ // Check if this block overlaps an atomic region from JSON blocks
440
+ const atomicRegion = isOffsetInAtomicRegion(block.startOffset, atomicRegions);
441
+ if (atomicRegion) {
442
+ // This was detected as part of an atomic region by JSON analysis
443
+ flushAccumulator(false);
444
+ emitAtomicChunk(block);
445
+ accumulator = createEmptyAccumulator(block.endOffset);
446
+ }
447
+ else {
448
+ // Add to accumulator
449
+ addBlockToAccumulator(accumulator, block);
450
+ // Check if accumulator exceeds chunk size
451
+ if (accumulator.text.length > config.chunkSize) {
452
+ // Need to split - find sentence boundary
453
+ const splitPos = findSentenceBoundary(accumulator.text, config.chunkSize);
454
+ // Save state before flush
455
+ const fullText = accumulator.text;
456
+ const savedStartOffset = accumulator.startOffset;
457
+ const savedContentTypes = new Set(accumulator.contentTypes);
458
+ // Truncate accumulator text to split point and flush
459
+ accumulator.text = fullText.slice(0, splitPos);
460
+ flushAccumulator(false);
461
+ // Keep the remainder in a new accumulator
462
+ const remainder = fullText.slice(splitPos);
463
+ accumulator = createEmptyAccumulator(savedStartOffset + splitPos);
464
+ accumulator.text = remainder;
465
+ accumulator.contentTypes = savedContentTypes;
466
+ // If remainder still exceeds maxChunkSize, keep splitting
467
+ while (accumulator.text.length > config.chunkSize) {
468
+ const innerSplitPos = findSentenceBoundary(accumulator.text, config.chunkSize);
469
+ const innerFullText = accumulator.text;
470
+ const innerStartOffset = accumulator.startOffset;
471
+ const innerContentTypes = new Set(accumulator.contentTypes);
472
+ accumulator.text = innerFullText.slice(0, innerSplitPos);
473
+ flushAccumulator(false);
474
+ const innerRemainder = innerFullText.slice(innerSplitPos);
475
+ accumulator = createEmptyAccumulator(innerStartOffset + innerSplitPos);
476
+ accumulator.text = innerRemainder;
477
+ accumulator.contentTypes = innerContentTypes;
478
+ }
479
+ }
480
+ }
481
+ }
482
+ }
483
+ // Flush any remaining content
484
+ flushAccumulator(false);
485
+ // 8.5. Merge heading-only tiny chunks with neighbors
486
+ const mergedChunks = mergeHeadingOnlyChunks(chunks, config.minChunkSize ?? 100);
487
+ // Replace chunks array contents with merged results
488
+ chunks.length = 0;
489
+ chunks.push(...mergedChunks);
490
+ // 9. Set overlap values for non-atomic chunks
491
+ for (let i = 0; i < chunks.length; i++) {
492
+ const chunk = chunks[i];
493
+ if (chunk.isAtomic) {
494
+ // Atomic chunks never participate in overlap
495
+ chunk.overlapWithPrevious = 0;
496
+ chunk.overlapWithNext = 0;
497
+ continue;
498
+ }
499
+ // Set overlapWithPrevious for non-first, non-atomic chunks
500
+ if (i > 0 && !chunks[i - 1].isAtomic) {
501
+ chunk.overlapWithPrevious = overlapSize;
502
+ }
503
+ // Set overlapWithNext for non-last, non-atomic chunks
504
+ if (i < chunks.length - 1 && !chunks[i + 1].isAtomic) {
505
+ chunk.overlapWithNext = overlapSize;
506
+ }
507
+ }
508
+ return chunks;
509
+ }
510
+ // ---------------------------------------------------------------------------
511
+ // Provenance creation
512
+ // ---------------------------------------------------------------------------
513
+ /**
514
+ * Create provenance parameters for a chunk.
515
+ *
516
+ * Generates a CreateProvenanceParams object suitable for creating
517
+ * a CHUNK provenance record (chain_depth=2).
518
+ *
519
+ * @param params - Chunk provenance parameters
520
+ * @returns CreateProvenanceParams ready for insertProvenance
521
+ */
522
+ export function createChunkProvenance(params) {
523
+ const { chunk, chunkTextHash, ocrProvenanceId, documentProvenanceId, ocrContentHash, fileHash, totalChunks, processingDurationMs, config = DEFAULT_CHUNKING_CONFIG, } = params;
524
+ // Build location information
525
+ const location = {
526
+ chunk_index: chunk.index,
527
+ character_start: chunk.startOffset,
528
+ character_end: chunk.endOffset,
529
+ };
530
+ // Add page info only if available
531
+ if (chunk.pageNumber !== null) {
532
+ location.page_number = chunk.pageNumber;
533
+ }
534
+ if (chunk.pageRange !== null) {
535
+ location.page_range = chunk.pageRange;
536
+ }
537
+ return {
538
+ type: ProvenanceType.CHUNK,
539
+ source_type: 'CHUNKING',
540
+ source_id: ocrProvenanceId,
541
+ root_document_id: documentProvenanceId,
542
+ content_hash: chunkTextHash,
543
+ input_hash: ocrContentHash,
544
+ file_hash: fileHash,
545
+ processor: 'chunker',
546
+ processor_version: '2.0.0',
547
+ processing_params: {
548
+ chunk_size: config.chunkSize,
549
+ overlap_percent: config.overlapPercent,
550
+ max_chunk_size: config.maxChunkSize,
551
+ strategy: 'hybrid_section',
552
+ chunk_index: chunk.index,
553
+ total_chunks: totalChunks,
554
+ character_start: chunk.startOffset,
555
+ character_end: chunk.endOffset,
556
+ heading_context: chunk.headingContext ?? null,
557
+ section_path: chunk.sectionPath ?? null,
558
+ is_atomic: chunk.isAtomic,
559
+ content_types: chunk.contentTypes,
560
+ },
561
+ processing_duration_ms: processingDurationMs ?? null,
562
+ location,
563
+ };
564
+ }
565
+ export { DEFAULT_CHUNKING_CONFIG } from '../../models/chunk.js';
566
+ //# sourceMappingURL=chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../../src/services/chunking/chunker.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAGL,uBAAuB,EACvB,oBAAoB,GACrB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EACL,cAAc,GAIf,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACL,mBAAmB,EACnB,qBAAqB,EACrB,sBAAsB,GAEvB,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EACL,iBAAiB,EACjB,sBAAsB,EACtB,sBAAsB,EACtB,0BAA0B,EAC1B,gCAAgC,EAChC,mBAAmB,EACnB,oBAAoB,GAErB,MAAM,0BAA0B,CAAC;AAClC,OAAO,EAAE,sBAAsB,EAAE,MAAM,yBAAyB,CAAC;AACjE,OAAO,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAC;AAE3D;;;GAGG;AACH,SAAS,eAAe,CAAC,IAAY;IACnC,OAAO,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;AAC9D,CAAC;AAED;;;;GAIG;AACH,SAAS,2BAA2B,CAAC,SAAiB;IACpD,8DAA8D;IAC9D,MAAM,SAAS,GAAG,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC;IACrE,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,aAAa,GAAG,0BAA0B,CAAC,SAAS,CAAC,CAAC;IAC5D,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,GAAG,gCAAgC,CAAC,SAAS,CAAC,CAAC;IAC9E,MAAM,cAAc,GAAG,mBAAmB,CAAC,SAAS,CAAC,CAAC;IACtD,MAAM,OAAO,GAAG,oBAAoB,CAAC,aAAa,EAAE,QAAQ,EAAE,cAAc,CAAC,CAAC;IAE9E,OAAO;QACL,aAAa;QACb,QAAQ;QACR,WAAW,EAAE,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,aAAa,CAAC,MAAM;QACjE,OAAO;KACR,CAAC;AACJ,CAAC;AAqCD,SAAS,sBAAsB,CAAC,WAAmB;IACjD,OAAO;QACL,IAAI,EAAE,EAAE;QACR,MAAM,EAAE,EAAE;QACV,WAAW;QACX,YAAY,EAAE,IAAI,GAAG,EAAE;KACxB,CAAC;AACJ,CAAC;AAED,SAAS,qBAAqB,CAAC,GAAgB;IAC7C,OAAO,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;AACpC,CAAC;AAED,SAAS,qBAAqB,CAAC,GAAgB,EAAE,KAAoB;IACnE,IAAI,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,GAAG,CAAC,IAAI,IAAI,MAAM,CAAC;IACrB,CAAC;IACD,GAAG,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,CAAC;IACvB,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACvB,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,yBAAyB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;AAC9D,CAAC;AAED,SAAS,yBAAyB,CAAC,SAAiB;IAClD,QAAQ,SAAS,EAAE,CAAC;QAClB,KAAK,SAAS,CAAC,CAAC,OAAO,SAAS,CAAC;QACjC,KAAK,OAAO,CAAC,CAAC,OAAO,OAAO,CAAC;QAC7B,KAAK,MAAM,CAAC,CAAC,OAAO,MAAM,CAAC;QAC3B,KAAK,MAAM,CAAC,CAAC,OAAO,MAAM,CAAC;QAC3B,KAAK,WAAW,CAAC,CAAC,OAAO,MAAM,CAAC;QAChC,OAAO,CAAC,CAAC,OAAO,MAAM,CAAC;IACzB,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,8BAA8B;AAC9B,8EAA8E;AAE9E;;;;;;;;;;GAUG;AACH,SAAS,oBAAoB,CAAC,IAAY,EAAE,MAAc;IACxD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,GAAG,GAAG,CAAC,CAAC;IAE9C,8DAA8D;IAC9D,KAAK,IAAI,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACnB,IAAI,CAAC,EAAE,KAAK,GAAG,IAAI,EAAE,KAAK,GAAG,IAAI,EAAE,KAAK,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YACpE,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACzB,IAAI,IAAI,KAAK,GAAG,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;gBAClC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,8BAA8B;YAC9C,CAAC;QACH,CAAC;IACH,CAAC;IAED,qCAAqC;IACrC,KAAK,IAAI,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/C,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IAED,8BAA8B;IAC9B,KAAK,IAAI,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACrB,OAAO,CAAC,GAAG,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IAED,wBAAwB;IACxB,KAAK,IAAI,CAAC,GAAG,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;YACpB,OAAO,CAAC,GAAG,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IAED,qCAAqC;IACrC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,8EAA8E;AAC9E,0BAA0B;AAC1B,8EAA8E;AAE9E;;GAEG;AACH,SAAS,wBAAwB,CAC/B,WAAmB,EACnB,SAAiB,EACjB,WAAyB;IAEzB,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IAC/C,CAAC;IAED,MAAM,SAAS,GAAG,sBAAsB,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC;IACnE,MAAM,OAAO,GAAG,sBAAsB,CACpC,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,SAAS,GAAG,CAAC,CAAC,EACpC,WAAW,CACZ,CAAC;IAEF,IAAI,SAAS,KAAK,IAAI,EAAE,CAAC;QACvB,OAAO,EAAE,UAAU,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IAC/C,CAAC;IAED,IAAI,OAAO,KAAK,IAAI,IAAI,SAAS,KAAK,OAAO,EAAE,CAAC;QAC9C,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;IACpD,CAAC;IAED,OAAO;QACL,UAAU,EAAE,SAAS;QACrB,SAAS,EAAE,GAAG,SAAS,IAAI,OAAO,EAAE;KACrC,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,gCAAgC;AAChC,8EAA8E;AAE9E;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,uBAAuB,CACrC,IAAY,EACZ,WAAyB,EACzB,UAA0C,EAC1C,SAAyB,uBAAuB;IAEhD,oCAAoC;IACpC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,2BAA2B;IAC3B,MAAM,MAAM,GAAG,mBAAmB,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IAEtD,4DAA4D;IAC5D,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,MAAM,IAAI,KAAK,CACb,0DAA0D,IAAI,CAAC,MAAM,SAAS,CAC/E,CAAC;IACJ,CAAC;IAED,8CAA8C;IAC9C,IAAI,MAAM,CAAC,oBAAoB,EAAE,CAAC;QAChC,sBAAsB,CAAC,MAAM,EAAE,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAC9D,CAAC;IAED,6BAA6B;IAC7B,MAAM,QAAQ,GAAG,qBAAqB,CAAC,MAAM,CAAC,CAAC;IAE/C,0CAA0C;IAC1C,MAAM,aAAa,GAAG,iBAAiB,CAAC,UAAU,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;IAEvE,qEAAqE;IACrE,MAAM,eAAe,GAAG,sBAAsB,CAAC,UAAU,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;IAE9E;;OAEG;IACH,SAAS,0BAA0B,CAAC,KAAoB;QACtD,KAAK,MAAM,EAAE,IAAI,eAAe,EAAE,CAAC;YACjC,+CAA+C;YAC/C,IAAI,KAAK,CAAC,WAAW,GAAG,EAAE,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC;gBACzE,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;;OAGG;IACH,SAAS,iBAAiB,CACxB,MAAc,EACd,MAAc;QAEd,MAAM,GAAG,GAAG,MAAM,GAAG,MAAM,CAAC;QAC5B,KAAK,MAAM,EAAE,IAAI,eAAe,EAAE,CAAC;YACjC,+CAA+C;YAC/C,IAAI,MAAM,IAAI,EAAE,CAAC,SAAS,IAAI,GAAG,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;gBACpD,OAAO;oBACL,aAAa,EAAE,EAAE,CAAC,aAAa;oBAC/B,QAAQ,EAAE,EAAE,CAAC,QAAQ;oBACrB,WAAW,EAAE,EAAE,CAAC,WAAW;oBAC3B,OAAO,EAAE,EAAE,CAAC,OAAO;oBACnB,OAAO,EAAE,EAAE,CAAC,OAAO;oBACnB,cAAc,EAAE,EAAE,CAAC,cAAc;iBAClC,CAAC;YACJ,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;;OAGG;IACH,SAAS,sBAAsB,CAAC,EAAkB;QAChD,IAAI,EAAE,CAAC,aAAa,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAC7C,OAAO,WAAW,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;IACrD,CAAC;IAED,2CAA2C;IAC3C,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,IAAI,WAAW,GAAG,sBAAsB,CAAC,CAAC,CAAC,CAAC;IAC5C,IAAI,kBAAkB,GAAkB,IAAI,CAAC;IAC7C,IAAI,kBAAkB,GAAkB,IAAI,CAAC;IAC7C,IAAI,mBAAmB,GAAkB,IAAI,CAAC;IAC9C,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,MAAM,WAAW,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAC;IAEjD;;OAEG;IACH,SAAS,gBAAgB,CAAC,QAAiB;QACzC,IAAI,CAAC,qBAAqB,CAAC,WAAW,CAAC,EAAE,CAAC;YACxC,OAAO;QACT,CAAC;QAED,MAAM,SAAS,GAAG,WAAW,CAAC,IAAI,CAAC;QACnC,MAAM,QAAQ,GAAG,WAAW,CAAC,WAAW,CAAC;QACzC,MAAM,MAAM,GAAG,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC;QAE3C,MAAM,QAAQ,GAAG,wBAAwB,CAAC,QAAQ,EAAE,MAAM,EAAE,WAAW,CAAC,CAAC;QAEzE,8DAA8D;QAC9D,MAAM,eAAe,GAAG,WAAW,CAAC,YAAY,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC9D,MAAM,mBAAmB,GAAG,eAAe;YACzC,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,CAAC,IAAI,2BAA2B,CAAC,SAAS,CAAC,CAAC;YAC5F,CAAC,CAAC,IAAI,CAAC;QAET,MAAM,CAAC,IAAI,CAAC;YACV,KAAK,EAAE,UAAU,EAAE;YACnB,IAAI,EAAE,SAAS;YACf,WAAW,EAAE,QAAQ;YACrB,SAAS,EAAE,MAAM;YACjB,mBAAmB,EAAE,CAAC,EAAE,yBAAyB;YACjD,eAAe,EAAE,CAAC,EAAM,yBAAyB;YACjD,UAAU,EAAE,QAAQ,CAAC,UAAU;YAC/B,SAAS,EAAE,QAAQ,CAAC,SAAS;YAC7B,cAAc,EAAE,kBAAkB;YAClC,YAAY,EAAE,mBAAmB;YACjC,WAAW,EAAE,kBAAkB;YAC/B,YAAY,EAAE,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,YAAY,CAAC;YAClD,QAAQ;YACR,aAAa,EAAE,mBAAmB;SACnC,CAAC,CAAC;IACL,CAAC;IAED;;;OAGG;IACH,SAAS,eAAe,CAAC,KAAoB;QAC3C,gFAAgF;QAChF,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACnC,OAAO,CAAC,KAAK,CACX,mDAAmD,KAAK,CAAC,WAAW,IAAI,KAAK,CAAC,SAAS,GAAG;gBAC1F,SAAS,KAAK,CAAC,IAAI,gBAAgB,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CACxD,CAAC;YACF,OAAO;QACT,CAAC;QAED,MAAM,QAAQ,GAAG,KAAK,CAAC,WAAW,CAAC;QACnC,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,CAAC;QAC/B,MAAM,QAAQ,GAAG,wBAAwB,CAAC,QAAQ,EAAE,MAAM,EAAE,WAAW,CAAC,CAAC;QAEzE,2DAA2D;QAC3D,IAAI,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC;QAC3B,IAAI,KAAK,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YAC3B,yDAAyD;YACzD,SAAS,GAAG,eAAe,CAAC,SAAS,CAAC,CAAC;YACvC,MAAM,EAAE,GAAG,0BAA0B,CAAC,KAAK,CAAC,CAAC;YAC7C,IAAI,EAAE,EAAE,CAAC;gBACP,MAAM,MAAM,GAAG,sBAAsB,CAAC,EAAE,CAAC,CAAC;gBAC1C,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACtB,SAAS,GAAG,MAAM,GAAG,SAAS,CAAC;gBACjC,CAAC;YACH,CAAC;QACH,CAAC;QAED,uEAAuE;QACvE,IAAI,SAAS,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,OAAO,CAAC,KAAK,CACX,kEAAkE,QAAQ,IAAI,MAAM,GAAG;gBACvF,SAAS,KAAK,CAAC,IAAI,qBAAqB,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAC7D,CAAC;YACF,OAAO;QACT,CAAC;QAED,MAAM,uBAAuB,GAAG,KAAK,CAAC,IAAI,KAAK,OAAO;YACpD,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,CAAC,IAAI,2BAA2B,CAAC,SAAS,CAAC,CAAC;YAC5F,CAAC,CAAC,IAAI,CAAC;QAET,MAAM,CAAC,IAAI,CAAC;YACV,KAAK,EAAE,UAAU,EAAE;YACnB,IAAI,EAAE,SAAS;YACf,WAAW,EAAE,QAAQ;YACrB,SAAS,EAAE,MAAM;YACjB,mBAAmB,EAAE,CAAC;YACtB,eAAe,EAAE,CAAC;YAClB,UAAU,EAAE,QAAQ,CAAC,UAAU;YAC/B,SAAS,EAAE,QAAQ,CAAC,SAAS;YAC7B,cAAc,EAAE,kBAAkB;YAClC,YAAY,EAAE,mBAAmB;YACjC,WAAW,EAAE,kBAAkB;YAC/B,YAAY,EAAE,CAAC,yBAAyB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACrD,QAAQ,EAAE,IAAI;YACd,aAAa,EAAE,uBAAuB;SACvC,CAAC,CAAC;IACL,CAAC;IAED;;;;;OAKG;IACH,SAAS,oBAAoB,CAAC,KAAoB;QAChD,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;YAC7C,eAAe,CAAC,KAAK,CAAC,CAAC;YACvB,OAAO;QACT,CAAC;QAED,qDAAqD;QACrD,IAAI,WAAW,GAAG,EAAE,CAAC;QACrB,IAAI,KAAK,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YAC3B,MAAM,EAAE,GAAG,0BAA0B,CAAC,KAAK,CAAC,CAAC;YAC7C,IAAI,EAAE,EAAE,CAAC;gBACP,WAAW,GAAG,sBAAsB,CAAC,EAAE,CAAC,CAAC;YAC3C,CAAC;QACH,CAAC;QAED,kDAAkD;QAClD,yDAAyD;QACzD,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,KAAK,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC;QACpF,IAAI,GAAG,GAAG,CAAC,CAAC;QAEZ,OAAO,GAAG,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC;YAC9B,IAAI,MAAc,CAAC;YAEnB,IAAI,SAAS,CAAC,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;gBAClD,mCAAmC;gBACnC,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC;YAC5B,CAAC;iBAAM,CAAC;gBACN,iDAAiD;gBACjD,MAAM,GAAG,SAAS,CAAC,WAAW,CAAC,IAAI,EAAE,GAAG,GAAG,MAAM,CAAC,YAAY,CAAC,CAAC;gBAChE,IAAI,MAAM,IAAI,GAAG,EAAE,CAAC;oBAClB,6DAA6D;oBAC7D,MAAM,GAAG,GAAG,GAAG,MAAM,CAAC,YAAY,CAAC;gBACrC,CAAC;YACH,CAAC;YAED,IAAI,SAAS,GAAG,SAAS,CAAC,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;YAC7C,IAAI,SAAS,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAChC,kEAAkE;gBAClE,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC3B,SAAS,GAAG,WAAW,GAAG,SAAS,CAAC;gBACtC,CAAC;gBAED,MAAM,QAAQ,GAAG,KAAK,CAAC,WAAW,GAAG,GAAG,CAAC;gBACzC,MAAM,MAAM,GAAG,KAAK,CAAC,WAAW,GAAG,MAAM,CAAC;gBAC1C,MAAM,QAAQ,GAAG,wBAAwB,CAAC,QAAQ,EAAE,MAAM,EAAE,WAAW,CAAC,CAAC;gBAEzE,MAAM,oBAAoB,GAAG,KAAK,CAAC,IAAI,KAAK,OAAO;oBACjD,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,QAAQ,CAAC,IAAI,2BAA2B,CAAC,SAAS,CAAC,CAAC;oBAC5F,CAAC,CAAC,IAAI,CAAC;gBAET,MAAM,CAAC,IAAI,CAAC;oBACV,KAAK,EAAE,UAAU,EAAE;oBACnB,IAAI,EAAE,SAAS;oBACf,WAAW,EAAE,QAAQ;oBACrB,SAAS,EAAE,MAAM;oBACjB,mBAAmB,EAAE,CAAC;oBACtB,eAAe,EAAE,CAAC;oBAClB,UAAU,EAAE,QAAQ,CAAC,UAAU;oBAC/B,SAAS,EAAE,QAAQ,CAAC,SAAS;oBAC7B,cAAc,EAAE,kBAAkB;oBAClC,YAAY,EAAE,mBAAmB;oBACjC,WAAW,EAAE,kBAAkB;oBAC/B,YAAY,EAAE,CAAC,yBAAyB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;oBACrD,QAAQ,EAAE,IAAI;oBACd,aAAa,EAAE,oBAAoB;iBACpC,CAAC,CAAC;YACL,CAAC;YAED,yDAAyD;YACzD,GAAG,GAAG,MAAM,GAAG,SAAS,CAAC,MAAM,IAAI,SAAS,CAAC,MAAM,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QACtF,CAAC;IACH,CAAC;IAED,KAAK,IAAI,QAAQ,GAAG,CAAC,EAAE,QAAQ,GAAG,MAAM,CAAC,MAAM,EAAE,QAAQ,EAAE,EAAE,CAAC;QAC5D,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;QAE/B,oCAAoC;QACpC,IAAI,KAAK,CAAC,IAAI,KAAK,OAAO,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YAC3D,SAAS;QACX,CAAC;QAED,kCAAkC;QAClC,MAAM,WAAW,GAAG,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,WAAW,EAAE,CAAC;YAChB,kBAAkB,GAAG,WAAW,CAAC,IAAI,CAAC;QACxC,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;YAC7B,gDAAgD;YAChD,gBAAgB,CAAC,KAAK,CAAC,CAAC;YAExB,yBAAyB;YACzB,kBAAkB,GAAG,KAAK,CAAC,WAAW,CAAC;YACvC,mBAAmB,GAAG,KAAK,CAAC,YAAY,CAAC;YAEzC,yCAAyC;YACzC,WAAW,GAAG,sBAAsB,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YACxD,qBAAqB,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC;QAE5C,CAAC;aAAM,IAAI,KAAK,CAAC,IAAI,KAAK,OAAO,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YAC3D,4EAA4E;YAC5E,4EAA4E;YAC5E,gEAAgE;YAChE,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC;YAEvD,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,IAAI,aAAa,EAAE,CAAC;gBACvC,6DAA6D;gBAC7D,gBAAgB,CAAC,KAAK,CAAC,CAAC;gBACxB,oBAAoB,CAAC,KAAK,CAAC,CAAC;gBAC5B,WAAW,GAAG,sBAAsB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YACxD,CAAC;iBAAM,CAAC;gBACN,sEAAsE;gBACtE,qBAAqB,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC;gBAE1C,2EAA2E;gBAC3E,IAAI,WAAW,CAAC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;oBAC/C,MAAM,QAAQ,GAAG,oBAAoB,CAAC,WAAW,CAAC,IAAI,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC;oBAC1E,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC;oBAClC,MAAM,gBAAgB,GAAG,WAAW,CAAC,WAAW,CAAC;oBACjD,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,YAAY,CAAC,CAAC;oBAE5D,WAAW,CAAC,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;oBAC/C,gBAAgB,CAAC,KAAK,CAAC,CAAC;oBAExB,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;oBAC3C,WAAW,GAAG,sBAAsB,CAAC,gBAAgB,GAAG,QAAQ,CAAC,CAAC;oBAClE,WAAW,CAAC,IAAI,GAAG,SAAS,CAAC;oBAC7B,WAAW,CAAC,YAAY,GAAG,iBAAiB,CAAC;gBAC/C,CAAC;YACH,CAAC;QAEH,CAAC;aAAM,CAAC;YACN,oCAAoC;YACpC,iEAAiE;YACjE,MAAM,YAAY,GAAG,sBAAsB,CAAC,KAAK,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC;YAC9E,IAAI,YAAY,EAAE,CAAC;gBACjB,iEAAiE;gBACjE,gBAAgB,CAAC,KAAK,CAAC,CAAC;gBACxB,eAAe,CAAC,KAAK,CAAC,CAAC;gBACvB,WAAW,GAAG,sBAAsB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;YACxD,CAAC;iBAAM,CAAC;gBACN,qBAAqB;gBACrB,qBAAqB,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC;gBAE1C,0CAA0C;gBAC1C,IAAI,WAAW,CAAC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;oBAC/C,yCAAyC;oBACzC,MAAM,QAAQ,GAAG,oBAAoB,CAAC,WAAW,CAAC,IAAI,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC;oBAE1E,0BAA0B;oBAC1B,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC;oBAClC,MAAM,gBAAgB,GAAG,WAAW,CAAC,WAAW,CAAC;oBACjD,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,YAAY,CAAC,CAAC;oBAE5D,qDAAqD;oBACrD,WAAW,CAAC,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;oBAC/C,gBAAgB,CAAC,KAAK,CAAC,CAAC;oBAExB,0CAA0C;oBAC1C,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;oBAC3C,WAAW,GAAG,sBAAsB,CAAC,gBAAgB,GAAG,QAAQ,CAAC,CAAC;oBAClE,WAAW,CAAC,IAAI,GAAG,SAAS,CAAC;oBAC7B,WAAW,CAAC,YAAY,GAAG,iBAAiB,CAAC;oBAE7C,0DAA0D;oBAC1D,OAAO,WAAW,CAAC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;wBAClD,MAAM,aAAa,GAAG,oBAAoB,CACxC,WAAW,CAAC,IAAI,EAChB,MAAM,CAAC,SAAS,CACjB,CAAC;wBACF,MAAM,aAAa,GAAG,WAAW,CAAC,IAAI,CAAC;wBACvC,MAAM,gBAAgB,GAAG,WAAW,CAAC,WAAW,CAAC;wBACjD,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,YAAY,CAAC,CAAC;wBAE5D,WAAW,CAAC,IAAI,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC;wBACzD,gBAAgB,CAAC,KAAK,CAAC,CAAC;wBAExB,MAAM,cAAc,GAAG,aAAa,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;wBAC1D,WAAW,GAAG,sBAAsB,CAAC,gBAAgB,GAAG,aAAa,CAAC,CAAC;wBACvE,WAAW,CAAC,IAAI,GAAG,cAAc,CAAC;wBAClC,WAAW,CAAC,YAAY,GAAG,iBAAiB,CAAC;oBAC/C,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,8BAA8B;IAC9B,gBAAgB,CAAC,KAAK,CAAC,CAAC;IAExB,qDAAqD;IACrD,MAAM,YAAY,GAAG,sBAAsB,CAAC,MAAM,EAAE,MAAM,CAAC,YAAY,IAAI,GAAG,CAAC,CAAC;IAChF,oDAAoD;IACpD,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC;IAClB,MAAM,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;IAE7B,8CAA8C;IAC9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAExB,IAAI,KAAK,CAAC,QAAQ,EAAE,CAAC;YACnB,6CAA6C;YAC7C,KAAK,CAAC,mBAAmB,GAAG,CAAC,CAAC;YAC9B,KAAK,CAAC,eAAe,GAAG,CAAC,CAAC;YAC1B,SAAS;QACX,CAAC;QAED,2DAA2D;QAC3D,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;YACrC,KAAK,CAAC,mBAAmB,GAAG,WAAW,CAAC;QAC1C,CAAC;QAED,sDAAsD;QACtD,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC;YACrD,KAAK,CAAC,eAAe,GAAG,WAAW,CAAC;QACtC,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E;;;;;;;;GAQG;AACH,MAAM,UAAU,qBAAqB,CAAC,MAA6B;IACjE,MAAM,EACJ,KAAK,EACL,aAAa,EACb,eAAe,EACf,oBAAoB,EACpB,cAAc,EACd,QAAQ,EACR,WAAW,EACX,oBAAoB,EACpB,MAAM,GAAG,uBAAuB,GACjC,GAAG,MAAM,CAAC;IAEX,6BAA6B;IAC7B,MAAM,QAAQ,GAAuB;QACnC,WAAW,EAAE,KAAK,CAAC,KAAK;QACxB,eAAe,EAAE,KAAK,CAAC,WAAW;QAClC,aAAa,EAAE,KAAK,CAAC,SAAS;KAC/B,CAAC;IAEF,kCAAkC;IAClC,IAAI,KAAK,CAAC,UAAU,KAAK,IAAI,EAAE,CAAC;QAC9B,QAAQ,CAAC,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC;IAC1C,CAAC;IACD,IAAI,KAAK,CAAC,SAAS,KAAK,IAAI,EAAE,CAAC;QAC7B,QAAQ,CAAC,UAAU,GAAG,KAAK,CAAC,SAAS,CAAC;IACxC,CAAC;IAED,OAAO;QACL,IAAI,EAAE,cAAc,CAAC,KAAK;QAC1B,WAAW,EAAE,UAAwB;QACrC,SAAS,EAAE,eAAe;QAC1B,gBAAgB,EAAE,oBAAoB;QACtC,YAAY,EAAE,aAAa;QAC3B,UAAU,EAAE,cAAc;QAC1B,SAAS,EAAE,QAAQ;QACnB,SAAS,EAAE,SAAS;QACpB,iBAAiB,EAAE,OAAO;QAC1B,iBAAiB,EAAE;YACjB,UAAU,EAAE,MAAM,CAAC,SAAS;YAC5B,eAAe,EAAE,MAAM,CAAC,cAAc;YACtC,cAAc,EAAE,MAAM,CAAC,YAAY;YACnC,QAAQ,EAAE,gBAAgB;YAC1B,WAAW,EAAE,KAAK,CAAC,KAAK;YACxB,YAAY,EAAE,WAAW;YACzB,eAAe,EAAE,KAAK,CAAC,WAAW;YAClC,aAAa,EAAE,KAAK,CAAC,SAAS;YAC9B,eAAe,EAAE,KAAK,CAAC,cAAc,IAAI,IAAI;YAC7C,YAAY,EAAE,KAAK,CAAC,WAAW,IAAI,IAAI;YACvC,SAAS,EAAE,KAAK,CAAC,QAAQ;YACzB,aAAa,EAAE,KAAK,CAAC,YAAY;SAClC;QACD,sBAAsB,EAAE,oBAAoB,IAAI,IAAI;QACpD,QAAQ;KACT,CAAC;AACJ,CAAC;AAID,OAAO,EAAE,uBAAuB,EAAE,MAAM,uBAAuB,CAAC"}
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Heading Level Normalizer for Section-Aware Chunking
3
+ *
4
+ * Fixes inconsistent heading levels from Datalab OCR by detecting
5
+ * repeating heading patterns (e.g., "ARTICLE N") and normalizing
6
+ * their heading levels to the mode (most common) level within each group.
7
+ *
8
+ * @module services/chunking/heading-normalizer
9
+ */
10
+ import { MarkdownBlock } from './markdown-parser.js';
11
+ /** Configuration for heading normalization */
12
+ export interface HeadingNormalizationConfig {
13
+ /** Enable heading normalization (default: false) */
14
+ enabled: boolean;
15
+ /** Minimum pattern group size to trigger normalization (default: 3) */
16
+ minPatternCount?: number;
17
+ }
18
+ /**
19
+ * Normalize heading levels in-place for consistent section hierarchy.
20
+ *
21
+ * Groups headings by structural patterns (ARTICLE N, Section N.N, etc.),
22
+ * then normalizes each group to use the mode heading level. This fixes
23
+ * Datalab OCR inconsistencies where identical structural headings get
24
+ * assigned different levels (e.g., ARTICLE 1 as H1 but ARTICLE 5 as H3).
25
+ *
26
+ * Only mutates `block.headingLevel` - never modifies `block.text`.
27
+ *
28
+ * @param blocks - Parsed markdown blocks (mutated in-place)
29
+ * @param config - Normalization configuration
30
+ * @returns The same blocks array (for chaining convenience)
31
+ */
32
+ export declare function normalizeHeadingLevels(blocks: MarkdownBlock[], config: HeadingNormalizationConfig): MarkdownBlock[];
33
+ //# sourceMappingURL=heading-normalizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"heading-normalizer.d.ts","sourceRoot":"","sources":["../../../src/services/chunking/heading-normalizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAErD,8CAA8C;AAC9C,MAAM,WAAW,0BAA0B;IACzC,oDAAoD;IACpD,OAAO,EAAE,OAAO,CAAC;IACjB,uEAAuE;IACvE,eAAe,CAAC,EAAE,MAAM,CAAC;CAC1B;AAwDD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,sBAAsB,CACpC,MAAM,EAAE,aAAa,EAAE,EACvB,MAAM,EAAE,0BAA0B,GACjC,aAAa,EAAE,CA6CjB"}