@usejunior/docx-core 0.9.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (335) hide show
  1. package/LICENSE +202 -21
  2. package/NOTICE +2 -0
  3. package/README.md +2 -2
  4. package/dist/.tsbuildinfo +1 -1
  5. package/dist/atomizer.d.ts +28 -8
  6. package/dist/atomizer.d.ts.map +1 -1
  7. package/dist/atomizer.js +96 -25
  8. package/dist/atomizer.js.map +1 -1
  9. package/dist/baselines/atomizer/auxiliaryIdCollision.d.ts +99 -0
  10. package/dist/baselines/atomizer/auxiliaryIdCollision.d.ts.map +1 -0
  11. package/dist/baselines/atomizer/auxiliaryIdCollision.js +415 -0
  12. package/dist/baselines/atomizer/auxiliaryIdCollision.js.map +1 -0
  13. package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -1
  14. package/dist/baselines/atomizer/documentReconstructor.js +333 -112
  15. package/dist/baselines/atomizer/documentReconstructor.js.map +1 -1
  16. package/dist/baselines/atomizer/formattingFidelity.d.ts +99 -0
  17. package/dist/baselines/atomizer/formattingFidelity.d.ts.map +1 -0
  18. package/dist/baselines/atomizer/formattingFidelity.js +449 -0
  19. package/dist/baselines/atomizer/formattingFidelity.js.map +1 -0
  20. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.d.ts +37 -0
  21. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.d.ts.map +1 -0
  22. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.js +189 -0
  23. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.js.map +1 -0
  24. package/dist/baselines/atomizer/inPlaceModifier-containers.d.ts +74 -0
  25. package/dist/baselines/atomizer/inPlaceModifier-containers.d.ts.map +1 -0
  26. package/dist/baselines/atomizer/inPlaceModifier-containers.js +171 -0
  27. package/dist/baselines/atomizer/inPlaceModifier-containers.js.map +1 -0
  28. package/dist/baselines/atomizer/inPlaceModifier-deletion.d.ts +88 -0
  29. package/dist/baselines/atomizer/inPlaceModifier-deletion.d.ts.map +1 -0
  30. package/dist/baselines/atomizer/inPlaceModifier-deletion.js +326 -0
  31. package/dist/baselines/atomizer/inPlaceModifier-deletion.js.map +1 -0
  32. package/dist/baselines/atomizer/inPlaceModifier-postprocess.d.ts +85 -0
  33. package/dist/baselines/atomizer/inPlaceModifier-postprocess.d.ts.map +1 -0
  34. package/dist/baselines/atomizer/inPlaceModifier-postprocess.js +402 -0
  35. package/dist/baselines/atomizer/inPlaceModifier-postprocess.js.map +1 -0
  36. package/dist/baselines/atomizer/inPlaceModifier-presplit.d.ts +39 -0
  37. package/dist/baselines/atomizer/inPlaceModifier-presplit.d.ts.map +1 -0
  38. package/dist/baselines/atomizer/inPlaceModifier-presplit.js +265 -0
  39. package/dist/baselines/atomizer/inPlaceModifier-presplit.js.map +1 -0
  40. package/dist/baselines/atomizer/inPlaceModifier-shared.d.ts +62 -0
  41. package/dist/baselines/atomizer/inPlaceModifier-shared.d.ts.map +1 -0
  42. package/dist/baselines/atomizer/inPlaceModifier-shared.js +139 -0
  43. package/dist/baselines/atomizer/inPlaceModifier-shared.js.map +1 -0
  44. package/dist/baselines/atomizer/inPlaceModifier-wrappers.d.ts +198 -0
  45. package/dist/baselines/atomizer/inPlaceModifier-wrappers.d.ts.map +1 -0
  46. package/dist/baselines/atomizer/inPlaceModifier-wrappers.js +475 -0
  47. package/dist/baselines/atomizer/inPlaceModifier-wrappers.js.map +1 -0
  48. package/dist/baselines/atomizer/inPlaceModifier.d.ts +6 -290
  49. package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -1
  50. package/dist/baselines/atomizer/inPlaceModifier.js +23 -1828
  51. package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -1
  52. package/dist/baselines/atomizer/pipeline.d.ts +36 -2
  53. package/dist/baselines/atomizer/pipeline.d.ts.map +1 -1
  54. package/dist/baselines/atomizer/pipeline.js +216 -144
  55. package/dist/baselines/atomizer/pipeline.js.map +1 -1
  56. package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts.map +1 -1
  57. package/dist/baselines/atomizer/trackChangesAcceptorAst.js +199 -173
  58. package/dist/baselines/atomizer/trackChangesAcceptorAst.js.map +1 -1
  59. package/dist/baselines/wmlcomparer/DotnetCli.d.ts.map +1 -1
  60. package/dist/baselines/wmlcomparer/DotnetCli.js +7 -0
  61. package/dist/baselines/wmlcomparer/DotnetCli.js.map +1 -1
  62. package/dist/cli/compare-two.d.ts.map +1 -1
  63. package/dist/cli/compare-two.js +3 -1
  64. package/dist/cli/compare-two.js.map +1 -1
  65. package/dist/cli/conformance-adapter.d.ts +3 -0
  66. package/dist/cli/conformance-adapter.d.ts.map +1 -0
  67. package/dist/cli/conformance-adapter.js +93 -0
  68. package/dist/cli/conformance-adapter.js.map +1 -0
  69. package/dist/cli/index.d.ts.map +1 -1
  70. package/dist/cli/index.js +5 -1
  71. package/dist/cli/index.js.map +1 -1
  72. package/dist/compare-types.d.ts +197 -0
  73. package/dist/compare-types.d.ts.map +1 -0
  74. package/dist/compare-types.js +2 -0
  75. package/dist/compare-types.js.map +1 -0
  76. package/dist/core-types.d.ts +5 -1
  77. package/dist/core-types.d.ts.map +1 -1
  78. package/dist/core-types.js +5 -1
  79. package/dist/core-types.js.map +1 -1
  80. package/dist/footnotes.d.ts +8 -3
  81. package/dist/footnotes.d.ts.map +1 -1
  82. package/dist/footnotes.js +8 -3
  83. package/dist/footnotes.js.map +1 -1
  84. package/dist/generation/compile.d.ts +21 -0
  85. package/dist/generation/compile.d.ts.map +1 -0
  86. package/dist/generation/compile.js +46 -0
  87. package/dist/generation/compile.js.map +1 -0
  88. package/dist/generation/context.d.ts +42 -0
  89. package/dist/generation/context.d.ts.map +1 -0
  90. package/dist/generation/context.js +65 -0
  91. package/dist/generation/context.js.map +1 -0
  92. package/dist/generation/emit/comments-part.d.ts +36 -0
  93. package/dist/generation/emit/comments-part.d.ts.map +1 -0
  94. package/dist/generation/emit/comments-part.js +116 -0
  95. package/dist/generation/emit/comments-part.js.map +1 -0
  96. package/dist/generation/emit/document-part.d.ts +24 -0
  97. package/dist/generation/emit/document-part.d.ts.map +1 -0
  98. package/dist/generation/emit/document-part.js +60 -0
  99. package/dist/generation/emit/document-part.js.map +1 -0
  100. package/dist/generation/emit/emit-context.d.ts +26 -0
  101. package/dist/generation/emit/emit-context.d.ts.map +1 -0
  102. package/dist/generation/emit/emit-context.js +19 -0
  103. package/dist/generation/emit/emit-context.js.map +1 -0
  104. package/dist/generation/emit/header-footer-part.d.ts +23 -0
  105. package/dist/generation/emit/header-footer-part.d.ts.map +1 -0
  106. package/dist/generation/emit/header-footer-part.js +57 -0
  107. package/dist/generation/emit/header-footer-part.js.map +1 -0
  108. package/dist/generation/emit/numbering-part.d.ts +29 -0
  109. package/dist/generation/emit/numbering-part.d.ts.map +1 -0
  110. package/dist/generation/emit/numbering-part.js +100 -0
  111. package/dist/generation/emit/numbering-part.js.map +1 -0
  112. package/dist/generation/emit/package-parts.d.ts +24 -0
  113. package/dist/generation/emit/package-parts.d.ts.map +1 -0
  114. package/dist/generation/emit/package-parts.js +121 -0
  115. package/dist/generation/emit/package-parts.js.map +1 -0
  116. package/dist/generation/emit/paragraph.d.ts +24 -0
  117. package/dist/generation/emit/paragraph.d.ts.map +1 -0
  118. package/dist/generation/emit/paragraph.js +63 -0
  119. package/dist/generation/emit/paragraph.js.map +1 -0
  120. package/dist/generation/emit/properties.d.ts +34 -0
  121. package/dist/generation/emit/properties.d.ts.map +1 -0
  122. package/dist/generation/emit/properties.js +138 -0
  123. package/dist/generation/emit/properties.js.map +1 -0
  124. package/dist/generation/emit/run.d.ts +15 -0
  125. package/dist/generation/emit/run.d.ts.map +1 -0
  126. package/dist/generation/emit/run.js +71 -0
  127. package/dist/generation/emit/run.js.map +1 -0
  128. package/dist/generation/emit/section.d.ts +29 -0
  129. package/dist/generation/emit/section.d.ts.map +1 -0
  130. package/dist/generation/emit/section.js +117 -0
  131. package/dist/generation/emit/section.js.map +1 -0
  132. package/dist/generation/emit/settings-part.d.ts +13 -0
  133. package/dist/generation/emit/settings-part.d.ts.map +1 -0
  134. package/dist/generation/emit/settings-part.js +24 -0
  135. package/dist/generation/emit/settings-part.js.map +1 -0
  136. package/dist/generation/emit/styles-part.d.ts +16 -0
  137. package/dist/generation/emit/styles-part.d.ts.map +1 -0
  138. package/dist/generation/emit/styles-part.js +80 -0
  139. package/dist/generation/emit/styles-part.js.map +1 -0
  140. package/dist/generation/emit/table.d.ts +26 -0
  141. package/dist/generation/emit/table.d.ts.map +1 -0
  142. package/dist/generation/emit/table.js +196 -0
  143. package/dist/generation/emit/table.js.map +1 -0
  144. package/dist/generation/errors.d.ts +22 -0
  145. package/dist/generation/errors.d.ts.map +1 -0
  146. package/dist/generation/errors.js +29 -0
  147. package/dist/generation/errors.js.map +1 -0
  148. package/dist/generation/index.d.ts +13 -0
  149. package/dist/generation/index.d.ts.map +1 -0
  150. package/dist/generation/index.js +12 -0
  151. package/dist/generation/index.js.map +1 -0
  152. package/dist/generation/ordering.d.ts +46 -0
  153. package/dist/generation/ordering.d.ts.map +1 -0
  154. package/dist/generation/ordering.js +119 -0
  155. package/dist/generation/ordering.js.map +1 -0
  156. package/dist/generation/recipes.d.ts +47 -0
  157. package/dist/generation/recipes.d.ts.map +1 -0
  158. package/dist/generation/recipes.js +84 -0
  159. package/dist/generation/recipes.js.map +1 -0
  160. package/dist/generation/structural-checks.d.ts +24 -0
  161. package/dist/generation/structural-checks.d.ts.map +1 -0
  162. package/dist/generation/structural-checks.js +318 -0
  163. package/dist/generation/structural-checks.js.map +1 -0
  164. package/dist/generation/types.d.ts +217 -0
  165. package/dist/generation/types.d.ts.map +1 -0
  166. package/dist/generation/types.js +16 -0
  167. package/dist/generation/types.js.map +1 -0
  168. package/dist/generation/validate-spec.d.ts +27 -0
  169. package/dist/generation/validate-spec.d.ts.map +1 -0
  170. package/dist/generation/validate-spec.js +307 -0
  171. package/dist/generation/validate-spec.js.map +1 -0
  172. package/dist/index.d.ts +9 -150
  173. package/dist/index.d.ts.map +1 -1
  174. package/dist/index.js +14 -0
  175. package/dist/index.js.map +1 -1
  176. package/dist/integration/generation-probes.d.ts +15 -0
  177. package/dist/integration/generation-probes.d.ts.map +1 -0
  178. package/dist/integration/generation-probes.js +84 -0
  179. package/dist/integration/generation-probes.js.map +1 -0
  180. package/dist/integration/libreoffice-oracle.d.ts +49 -0
  181. package/dist/integration/libreoffice-oracle.d.ts.map +1 -0
  182. package/dist/integration/libreoffice-oracle.js +290 -0
  183. package/dist/integration/libreoffice-oracle.js.map +1 -0
  184. package/dist/integration/synthetic-docx-fixture.d.ts +72 -0
  185. package/dist/integration/synthetic-docx-fixture.d.ts.map +1 -1
  186. package/dist/integration/synthetic-docx-fixture.js +131 -4
  187. package/dist/integration/synthetic-docx-fixture.js.map +1 -1
  188. package/dist/primitives/accept_changes.d.ts +4 -3
  189. package/dist/primitives/accept_changes.d.ts.map +1 -1
  190. package/dist/primitives/accept_changes.js +163 -77
  191. package/dist/primitives/accept_changes.js.map +1 -1
  192. package/dist/primitives/comments.d.ts +12 -3
  193. package/dist/primitives/comments.d.ts.map +1 -1
  194. package/dist/primitives/comments.js +374 -97
  195. package/dist/primitives/comments.js.map +1 -1
  196. package/dist/primitives/content_fingerprint.d.ts +29 -0
  197. package/dist/primitives/content_fingerprint.d.ts.map +1 -0
  198. package/dist/primitives/content_fingerprint.js +63 -0
  199. package/dist/primitives/content_fingerprint.js.map +1 -0
  200. package/dist/primitives/document.d.ts +94 -15
  201. package/dist/primitives/document.d.ts.map +1 -1
  202. package/dist/primitives/document.js +373 -36
  203. package/dist/primitives/document.js.map +1 -1
  204. package/dist/primitives/document_view-comments.d.ts +18 -0
  205. package/dist/primitives/document_view-comments.d.ts.map +1 -0
  206. package/dist/primitives/document_view-comments.js +160 -0
  207. package/dist/primitives/document_view-comments.js.map +1 -0
  208. package/dist/primitives/document_view-headings.d.ts +45 -0
  209. package/dist/primitives/document_view-headings.d.ts.map +1 -0
  210. package/dist/primitives/document_view-headings.js +247 -0
  211. package/dist/primitives/document_view-headings.js.map +1 -0
  212. package/dist/primitives/document_view-styles.d.ts +11 -0
  213. package/dist/primitives/document_view-styles.d.ts.map +1 -0
  214. package/dist/primitives/document_view-styles.js +104 -0
  215. package/dist/primitives/document_view-styles.js.map +1 -0
  216. package/dist/primitives/document_view-toon.d.ts +37 -0
  217. package/dist/primitives/document_view-toon.d.ts.map +1 -0
  218. package/dist/primitives/document_view-toon.js +199 -0
  219. package/dist/primitives/document_view-toon.js.map +1 -0
  220. package/dist/primitives/document_view-types.d.ts +152 -0
  221. package/dist/primitives/document_view-types.d.ts.map +1 -0
  222. package/dist/primitives/document_view-types.js +2 -0
  223. package/dist/primitives/document_view-types.js.map +1 -0
  224. package/dist/primitives/document_view.d.ts +8 -106
  225. package/dist/primitives/document_view.d.ts.map +1 -1
  226. package/dist/primitives/document_view.js +153 -312
  227. package/dist/primitives/document_view.js.map +1 -1
  228. package/dist/primitives/dom-helpers.d.ts +9 -0
  229. package/dist/primitives/dom-helpers.d.ts.map +1 -1
  230. package/dist/primitives/dom-helpers.js +10 -1
  231. package/dist/primitives/dom-helpers.js.map +1 -1
  232. package/dist/primitives/footnotes.d.ts +4 -3
  233. package/dist/primitives/footnotes.d.ts.map +1 -1
  234. package/dist/primitives/footnotes.js +232 -44
  235. package/dist/primitives/footnotes.js.map +1 -1
  236. package/dist/primitives/formatting_tags.d.ts +7 -0
  237. package/dist/primitives/formatting_tags.d.ts.map +1 -1
  238. package/dist/primitives/formatting_tags.js +22 -11
  239. package/dist/primitives/formatting_tags.js.map +1 -1
  240. package/dist/primitives/index.d.ts +10 -0
  241. package/dist/primitives/index.d.ts.map +1 -1
  242. package/dist/primitives/index.js +9 -0
  243. package/dist/primitives/index.js.map +1 -1
  244. package/dist/primitives/layout.d.ts +4 -3
  245. package/dist/primitives/layout.d.ts.map +1 -1
  246. package/dist/primitives/layout.js +45 -3
  247. package/dist/primitives/layout.js.map +1 -1
  248. package/dist/primitives/merge_runs.d.ts +21 -3
  249. package/dist/primitives/merge_runs.d.ts.map +1 -1
  250. package/dist/primitives/merge_runs.js +32 -10
  251. package/dist/primitives/merge_runs.js.map +1 -1
  252. package/dist/primitives/minimal_save.d.ts +38 -0
  253. package/dist/primitives/minimal_save.d.ts.map +1 -0
  254. package/dist/primitives/minimal_save.js +323 -0
  255. package/dist/primitives/minimal_save.js.map +1 -0
  256. package/dist/primitives/namespaces.d.ts +47 -0
  257. package/dist/primitives/namespaces.d.ts.map +1 -1
  258. package/dist/primitives/namespaces.js +52 -0
  259. package/dist/primitives/namespaces.js.map +1 -1
  260. package/dist/primitives/reject_changes.d.ts +6 -4
  261. package/dist/primitives/reject_changes.d.ts.map +1 -1
  262. package/dist/primitives/reject_changes.js +187 -91
  263. package/dist/primitives/reject_changes.js.map +1 -1
  264. package/dist/primitives/revision-parts.d.ts +7 -0
  265. package/dist/primitives/revision-parts.d.ts.map +1 -0
  266. package/dist/primitives/revision-parts.js +27 -0
  267. package/dist/primitives/revision-parts.js.map +1 -0
  268. package/dist/primitives/revision-vocabulary.d.ts +7 -0
  269. package/dist/primitives/revision-vocabulary.d.ts.map +1 -0
  270. package/dist/primitives/revision-vocabulary.js +39 -0
  271. package/dist/primitives/revision-vocabulary.js.map +1 -0
  272. package/dist/primitives/schema-corpus-capture.d.ts +19 -0
  273. package/dist/primitives/schema-corpus-capture.d.ts.map +1 -0
  274. package/dist/primitives/schema-corpus-capture.js +29 -0
  275. package/dist/primitives/schema-corpus-capture.js.map +1 -0
  276. package/dist/primitives/sectPrAudit.d.ts +19 -0
  277. package/dist/primitives/sectPrAudit.d.ts.map +1 -0
  278. package/dist/primitives/sectPrAudit.js +165 -0
  279. package/dist/primitives/sectPrAudit.js.map +1 -0
  280. package/dist/primitives/semantic_tags.d.ts +7 -0
  281. package/dist/primitives/semantic_tags.d.ts.map +1 -1
  282. package/dist/primitives/semantic_tags.js +23 -4
  283. package/dist/primitives/semantic_tags.js.map +1 -1
  284. package/dist/primitives/serialize_html.d.ts +37 -0
  285. package/dist/primitives/serialize_html.d.ts.map +1 -0
  286. package/dist/primitives/serialize_html.js +395 -0
  287. package/dist/primitives/serialize_html.js.map +1 -0
  288. package/dist/primitives/serialize_markdown.d.ts +16 -0
  289. package/dist/primitives/serialize_markdown.d.ts.map +1 -0
  290. package/dist/primitives/serialize_markdown.js +300 -0
  291. package/dist/primitives/serialize_markdown.js.map +1 -0
  292. package/dist/primitives/serialize_plaintext.d.ts +15 -0
  293. package/dist/primitives/serialize_plaintext.d.ts.map +1 -0
  294. package/dist/primitives/serialize_plaintext.js +154 -0
  295. package/dist/primitives/serialize_plaintext.js.map +1 -0
  296. package/dist/primitives/styles.d.ts +15 -0
  297. package/dist/primitives/styles.d.ts.map +1 -1
  298. package/dist/primitives/styles.js +33 -22
  299. package/dist/primitives/styles.js.map +1 -1
  300. package/dist/primitives/tables.d.ts.map +1 -1
  301. package/dist/primitives/tables.js +13 -3
  302. package/dist/primitives/tables.js.map +1 -1
  303. package/dist/primitives/text.d.ts +2 -1
  304. package/dist/primitives/text.d.ts.map +1 -1
  305. package/dist/primitives/text.js +116 -12
  306. package/dist/primitives/text.js.map +1 -1
  307. package/dist/primitives/track-changes-emitter.d.ts +148 -0
  308. package/dist/primitives/track-changes-emitter.d.ts.map +1 -0
  309. package/dist/primitives/track-changes-emitter.js +291 -0
  310. package/dist/primitives/track-changes-emitter.js.map +1 -0
  311. package/dist/primitives/validate_ai_revisions.d.ts +35 -0
  312. package/dist/primitives/validate_ai_revisions.d.ts.map +1 -0
  313. package/dist/primitives/validate_ai_revisions.js +323 -0
  314. package/dist/primitives/validate_ai_revisions.js.map +1 -0
  315. package/dist/primitives/xml-helpers.d.ts +29 -0
  316. package/dist/primitives/xml-helpers.d.ts.map +1 -0
  317. package/dist/primitives/xml-helpers.js +35 -0
  318. package/dist/primitives/xml-helpers.js.map +1 -0
  319. package/dist/primitives/xml.d.ts +5 -0
  320. package/dist/primitives/xml.d.ts.map +1 -1
  321. package/dist/primitives/xml.js +5 -0
  322. package/dist/primitives/xml.js.map +1 -1
  323. package/dist/primitives/zip.d.ts +1 -0
  324. package/dist/primitives/zip.d.ts.map +1 -1
  325. package/dist/primitives/zip.js +21 -3
  326. package/dist/primitives/zip.js.map +1 -1
  327. package/dist/shared/field-structure.d.ts +14 -0
  328. package/dist/shared/field-structure.d.ts.map +1 -0
  329. package/dist/shared/field-structure.js +166 -0
  330. package/dist/shared/field-structure.js.map +1 -0
  331. package/dist/shared/ooxml/namespaces.d.ts +4 -1
  332. package/dist/shared/ooxml/namespaces.d.ts.map +1 -1
  333. package/dist/shared/ooxml/namespaces.js +4 -1
  334. package/dist/shared/ooxml/namespaces.js.map +1 -1
  335. package/package.json +13 -9
@@ -1,22 +1,26 @@
1
1
  import { OOXML, W } from './namespaces.js';
2
+ import { getAttributeSafe, getFirstChild } from './xml-helpers.js';
2
3
  import { getParagraphText, getParagraphRuns } from './text.js';
3
- import { extractListLabel, stripListLabel, LabelType } from './list_labels.js';
4
+ import { extractListLabel, stripListLabel } from './list_labels.js';
4
5
  import { parseNumberingXml, computeListLabelForParagraph } from './numbering.js';
5
6
  import { parseStylesXml, extractParagraphFormatting, extractEffectiveRunFormatting } from './styles.js';
6
7
  import { HIGHLIGHT_TAG } from './semantic_tags.js';
7
8
  import { computeModalBaseline, computeParagraphFontBaseline, emitFormattingTags, mergeAdjacentTags } from './formatting_tags.js';
8
9
  import { isReservedFootnote } from './footnotes.js';
9
- const SHORT_HEADER_MAX_LENGTH = 50;
10
- const MAX_HEADER_TEXT_LENGTH = 60;
11
- const STYLE_EXAMPLE_TEXT_PREVIEW_LENGTH = 50;
10
+ import { deriveHeading, detectRunInHeader, detectTitleCapsCentered, extractHeaderInfo, suppressSignatureClusters, } from './document_view-headings.js';
11
+ import { discoverStyles, fingerprintKey } from './document_view-styles.js';
12
+ import { findTaggedTextInsertionIndex } from './document_view-comments.js';
13
+ export { discoverStyles } from './document_view-styles.js';
14
+ export { INLINE_COMMENT_MARKER_RUNTIME, TOON_INLINE_TAG_RE, collectInlineCommentMarkers, tokenizeToonInline } from './document_view-comments.js';
15
+ export { collectTableMarkerInfo, formatTableMarker, formatToonCommentEndnoteLines, formatToonCommentLines, formatToonCommentsEndnotesBlock, formatToonDataLine, renderToon, renderToonWithCommentEndnotes, } from './document_view-toon.js';
12
16
  function getWAttr(el, localName) {
13
- return el.getAttributeNS(OOXML.W_NS, localName) ?? el.getAttribute(`w:${localName}`) ?? el.getAttribute(localName);
17
+ return getAttributeSafe(el, OOXML.W_NS, localName, 'w');
14
18
  }
15
19
  function runHighlightVal(run) {
16
- const rPr = run.getElementsByTagNameNS(OOXML.W_NS, W.rPr).item(0);
20
+ const rPr = getFirstChild(run, OOXML.W_NS, W.rPr);
17
21
  if (!rPr)
18
22
  return null;
19
- const h = rPr.getElementsByTagNameNS(OOXML.W_NS, W.highlight).item(0);
23
+ const h = getFirstChild(rPr, OOXML.W_NS, W.highlight);
20
24
  if (!h)
21
25
  return null;
22
26
  const v = getWAttr(h, 'val');
@@ -46,284 +50,6 @@ function emitHighlightTagsFromParagraph(p) {
46
50
  out.push(`</${HIGHLIGHT_TAG}>`);
47
51
  return out.join('');
48
52
  }
49
- function fingerprintKey(fp) {
50
- // Stable JSON-ish key used for Map lookups.
51
- return `${fp.list_level}|${fp.left_indent_pt.toFixed(1)}|${fp.first_line_indent_pt.toFixed(1)}|${fp.style_name}|${fp.alignment}`;
52
- }
53
- /**
54
- * v0.3: Compact style fingerprint token.
55
- * Concatenates style name, list level, alignment, and indentation for token-efficient LLM context.
56
- * Example: "Normal:L-1:LEFT:I0:H0"
57
- */
58
- function computeFingerprintToken(fp, styleId) {
59
- const name = styleId || fp.style_name || 'body';
60
- const level = `L${fp.list_level}`;
61
- const align = fp.alignment;
62
- const indent = `I${Math.round(fp.left_indent_pt)}`;
63
- const hanging = `H${Math.round(fp.first_line_indent_pt)}`;
64
- return `${name}:${level}:${align}:${indent}:${hanging}`;
65
- }
66
- // Pattern-based header detection fallback (ported from Python ingestor._extract_header_info).
67
- const HEADER_PATTERN = /^([A-Z][^.!?:]*(?:\s+[A-Z][^.!?:]*)*)([.:]?)(?:\s|$)/;
68
- function extractHeaderInfo(cleanText) {
69
- if (!cleanText || cleanText.length < 2)
70
- return { header_text: null, header_style: null };
71
- if (!/^[A-Z]/.test(cleanText))
72
- return { header_text: null, header_style: null };
73
- const stripped = cleanText.trim();
74
- if (stripped.length <= SHORT_HEADER_MAX_LENGTH) {
75
- if (stripped.endsWith('.'))
76
- return { header_text: stripped.slice(0, -1), header_style: 'title_with_period' };
77
- if (stripped.endsWith(':'))
78
- return { header_text: stripped.slice(0, -1), header_style: 'title_with_colon' };
79
- const words = stripped.split(/\s+/);
80
- if (words.length <= 5)
81
- return { header_text: stripped, header_style: 'title_bare' };
82
- return { header_text: null, header_style: null };
83
- }
84
- const m = HEADER_PATTERN.exec(stripped);
85
- if (!m)
86
- return { header_text: null, header_style: null };
87
- const headerText = (m[1] ?? '').trim();
88
- const terminator = m[2] ?? '';
89
- const remaining = stripped.slice(m[0].length);
90
- if (!remaining || headerText.length > MAX_HEADER_TEXT_LENGTH)
91
- return { header_text: null, header_style: null };
92
- if (terminator === '.')
93
- return { header_text: headerText, header_style: 'title_with_period' };
94
- if (terminator === ':')
95
- return { header_text: headerText, header_style: 'title_with_colon' };
96
- return { header_text: headerText, header_style: 'title_bare' };
97
- }
98
- function detectRunInHeader(params) {
99
- const { paragraph, paragraphPPr, paragraphStyleId, styles } = params;
100
- const punct = new Set(['.', ':', '-']);
101
- // Use visible runs only (field code text stripped in getParagraphRuns()).
102
- const runs = getParagraphRuns(paragraph);
103
- if (runs.length === 0)
104
- return null;
105
- // Group by run element, preserving order.
106
- const orderedUniqueRuns = [];
107
- const seen = new Set();
108
- for (const tr of runs) {
109
- if (!seen.has(tr.r)) {
110
- seen.add(tr.r);
111
- orderedUniqueRuns.push(tr.r);
112
- }
113
- }
114
- let headerText = '';
115
- let formatting = null;
116
- let headerCharCount = 0;
117
- for (const r of orderedUniqueRuns) {
118
- const fmt = extractEffectiveRunFormatting({ run: r, paragraphPPr, paragraphStyleId, styles });
119
- const isHeaderStyle = fmt.bold || fmt.underline;
120
- if (!isHeaderStyle)
121
- break;
122
- // Accumulate run text.
123
- const ts = Array.from(r.getElementsByTagNameNS(OOXML.W_NS, W.t));
124
- for (const t of ts) {
125
- const tc = t.textContent ?? '';
126
- headerText += tc;
127
- headerCharCount += tc.length;
128
- }
129
- if (!formatting)
130
- formatting = { bold: fmt.bold, italic: fmt.italic, underline: fmt.underline };
131
- }
132
- const trimmed = headerText.trim();
133
- if (!trimmed)
134
- return null;
135
- if (!punct.has(trimmed[trimmed.length - 1]))
136
- return null;
137
- if (!formatting)
138
- return null;
139
- return { raw_text: trimmed, formatting, headerCharCount };
140
- }
141
- function inferSemanticName(params) {
142
- const { fp, nodes } = params;
143
- // Find first label_type if present.
144
- let labelType = null;
145
- for (const n of nodes) {
146
- if (n.list_metadata.label_type) {
147
- labelType = n.list_metadata.label_type;
148
- break;
149
- }
150
- }
151
- const listLevel = fp.list_level;
152
- if (listLevel >= 0) {
153
- if (listLevel === 0) {
154
- if (labelType === LabelType.ARTICLE)
155
- return { base_id: 'article', display_name: 'Article Heading' };
156
- if (labelType === LabelType.SECTION)
157
- return { base_id: 'section', display_name: 'Section Heading' };
158
- if (labelType === LabelType.ROMAN)
159
- return { base_id: 'roman_section', display_name: 'Roman Numeral Section' };
160
- return { base_id: 'top_level', display_name: 'Top-Level List Item' };
161
- }
162
- if (listLevel === 1) {
163
- if (labelType === LabelType.LETTER)
164
- return { base_id: 'subsection', display_name: 'Subsection (a)/(A)' };
165
- if (labelType === LabelType.NUMBER)
166
- return { base_id: 'subsection_number', display_name: 'Numbered Subsection' };
167
- if (labelType === LabelType.ROMAN)
168
- return { base_id: 'subsection_roman', display_name: 'Roman Subsection' };
169
- return { base_id: 'level_1', display_name: `Level ${listLevel} List Item` };
170
- }
171
- if (labelType === LabelType.ROMAN)
172
- return { base_id: `level_${listLevel}_roman`, display_name: `Level ${listLevel} Roman` };
173
- if (labelType === LabelType.LETTER)
174
- return { base_id: `level_${listLevel}_letter`, display_name: `Level ${listLevel} Letter` };
175
- return { base_id: `level_${listLevel}`, display_name: `Level ${listLevel} List Item` };
176
- }
177
- // Non-list.
178
- const styleName = fp.style_name.toLowerCase().replace(/\s+/g, '_');
179
- if (fp.left_indent_pt > 0)
180
- return { base_id: 'indent_block', display_name: 'Indented Block' };
181
- if (styleName.includes('heading') || styleName.includes('title'))
182
- return { base_id: 'heading', display_name: 'Heading' };
183
- if (styleName.includes('quote') || styleName.includes('block'))
184
- return { base_id: 'block_quote', display_name: 'Block Quote' };
185
- return { base_id: 'body', display_name: 'Body Text' };
186
- }
187
- export function discoverStyles(nodes) {
188
- const groups = new Map();
189
- for (const n of nodes) {
190
- const key = fingerprintKey(n.style_fingerprint);
191
- const g = groups.get(key);
192
- if (g)
193
- g.nodes.push(n);
194
- else
195
- groups.set(key, { fp: n.style_fingerprint, nodes: [n] });
196
- }
197
- const used = {};
198
- const styles = new Map();
199
- const fpToStyle = new Map();
200
- for (const [fpKey, g] of groups.entries()) {
201
- const { base_id, display_name } = inferSemanticName({ fp: g.fp, nodes: g.nodes });
202
- let styleId = base_id;
203
- if (used[base_id] !== undefined) {
204
- used[base_id] += 1;
205
- styleId = `${base_id}_${used[base_id]}`;
206
- }
207
- else {
208
- used[base_id] = 0;
209
- }
210
- const median = g.nodes[Math.floor(g.nodes.length / 2)];
211
- const info = {
212
- style_id: styleId,
213
- display_name,
214
- fingerprint: g.fp,
215
- example_node_id: median.id,
216
- example_text: median.clean_text.slice(0, STYLE_EXAMPLE_TEXT_PREVIEW_LENGTH),
217
- count: g.nodes.length,
218
- dominant_alignment: g.fp.alignment,
219
- };
220
- styles.set(styleId, info);
221
- fpToStyle.set(fpKey, styleId);
222
- }
223
- return { styles, fingerprint_to_style: fpToStyle };
224
- }
225
- function headerStripFromText(params) {
226
- // Mirrors Python TOONRenderer header stripping.
227
- const { header } = params;
228
- let { text } = params;
229
- if (!header)
230
- return text;
231
- const headerNorm = header.trim().toLowerCase();
232
- const textLower = text.toLowerCase();
233
- for (const punct of [':', '.', '-', ';', '']) {
234
- const testPrefix = `${headerNorm}${punct}`;
235
- if (textLower.startsWith(testPrefix)) {
236
- text = text.slice(testPrefix.length).trimStart();
237
- return text;
238
- }
239
- }
240
- if (text.startsWith(header)) {
241
- text = text.slice(header.length).replace(/^[.:\-;]+/, '').trimStart();
242
- }
243
- return text;
244
- }
245
- /**
246
- * Format a single toon data line for one DocumentViewNode.
247
- * Handles table-context-aware style (th/td) and header stripping.
248
- */
249
- export function formatToonDataLine(n, options) {
250
- let text = n.tagged_text;
251
- if (n.header)
252
- text = headerStripFromText({ header: n.header, text });
253
- let header = n.header;
254
- if (header && !text) {
255
- text = header;
256
- header = '';
257
- }
258
- const tc = n.table_context;
259
- let style;
260
- if (tc) {
261
- style = tc.is_header_row
262
- ? `th(${tc.row_index},${tc.col_index})`
263
- : `td(${tc.row_index},${tc.col_index})`;
264
- }
265
- else {
266
- style = options?.compact
267
- ? computeFingerprintToken(n.style_fingerprint, n.style)
268
- : n.style;
269
- }
270
- return `${n.id} | ${n.list_label} | ${header} | ${style} | ${text}`;
271
- }
272
- /**
273
- * Collect table marker info (dimensions) from nodes for #TABLE markers.
274
- * Column headers are NOT included in the marker — they appear once in the th() rows.
275
- */
276
- export function collectTableMarkerInfo(nodes) {
277
- const info = new Map();
278
- for (const n of nodes) {
279
- const tc = n.table_context;
280
- if (!tc)
281
- continue;
282
- if (!info.has(tc.table_index)) {
283
- info.set(tc.table_index, {
284
- id: tc.table_id,
285
- totalRows: tc.total_rows,
286
- totalCols: tc.total_cols,
287
- });
288
- }
289
- }
290
- return info;
291
- }
292
- /**
293
- * Format a #TABLE marker line from collected table info.
294
- * Headers are omitted — they appear exactly once in the th(0,N) data rows.
295
- */
296
- export function formatTableMarker(info) {
297
- return `#TABLE ${info.id} | ${info.totalRows} rows × ${info.totalCols} cols`;
298
- }
299
- export function renderToon(nodes, options = {}) {
300
- const lines = ['#SCHEMA id | list_label | header | style | text'];
301
- // Pre-scan: collect table marker info for #TABLE lines
302
- const tableInfo = collectTableMarkerInfo(nodes);
303
- let currentTableIndex = null;
304
- for (const n of nodes) {
305
- const tc = n.table_context;
306
- const nodeTableIndex = tc ? tc.table_index : null;
307
- // Close previous table if we left it or moved to a different table
308
- if (currentTableIndex !== null && nodeTableIndex !== currentTableIndex) {
309
- lines.push('#END_TABLE');
310
- currentTableIndex = null;
311
- }
312
- // Open new table if entering one
313
- if (nodeTableIndex !== null && currentTableIndex === null) {
314
- const info = tableInfo.get(nodeTableIndex);
315
- if (info)
316
- lines.push(formatTableMarker(info));
317
- currentTableIndex = nodeTableIndex;
318
- }
319
- lines.push(formatToonDataLine(n, options));
320
- }
321
- // Close any open table at end
322
- if (currentTableIndex !== null) {
323
- lines.push('#END_TABLE');
324
- }
325
- return lines.join('\n');
326
- }
327
53
  export function buildDocumentView(params) {
328
54
  const { documentXml, stylesXml, numberingXml, opts } = params;
329
55
  const includeSemantic = opts?.include_semantic_tags ?? true;
@@ -334,7 +60,7 @@ export function buildDocumentView(params) {
334
60
  void numberingModel;
335
61
  const counters = new Map();
336
62
  void counters;
337
- const body = documentXml.getElementsByTagNameNS(OOXML.W_NS, W.body).item(0);
63
+ const body = getFirstChild(documentXml, OOXML.W_NS, W.body);
338
64
  if (!body)
339
65
  return { nodes: [], styles: { styles: new Map(), fingerprint_to_style: new Map() } };
340
66
  const paragraphs = Array.from(body.getElementsByTagNameNS(OOXML.W_NS, W.p));
@@ -357,9 +83,7 @@ function resolveRunHyperlinkUrl(runEl, relsMap) {
357
83
  if (!parent || parent.localName !== W.hyperlink)
358
84
  return null;
359
85
  // r:id attribute can be namespaced or prefixed.
360
- const rId = parent.getAttributeNS(OOXML.R_NS, 'id') ??
361
- parent.getAttribute('r:id') ??
362
- null;
86
+ const rId = getAttributeSafe(parent, OOXML.R_NS, 'id', 'r', { bareFallback: false });
363
87
  if (!rId)
364
88
  return null;
365
89
  return relsMap.get(rId) ?? null;
@@ -439,9 +163,11 @@ function buildFootnoteDisplayMap(documentXml, footnotesXml) {
439
163
  return map;
440
164
  }
441
165
  /**
442
- * Compute footnote marker insertion points for a paragraph.
443
- * Returns an array of { offset, marker } sorted by offset descending
444
- * for safe right-to-left insertion into the text string.
166
+ * Compute the footnote references a paragraph visibly anchors, in document
167
+ * order. This is the single derivation of "which footnotes does this paragraph
168
+ * reference, and with what display number" — the view injects [^N] markers
169
+ * from it AND exposes it as DocumentViewNode.footnote_refs so consumers
170
+ * (read_file's clean_text suffix) never re-walk the DOM. @see #393
445
171
  *
446
172
  * Self-contained: only inspects the paragraph DOM for w:footnoteReference
447
173
  * elements. Does NOT modify getParagraphRuns or getParagraphText.
@@ -500,28 +226,85 @@ function getFootnoteMarkersForParagraph(p, displayMap) {
500
226
  if (displayNum != null) {
501
227
  markers.push({
502
228
  offset: visibleOffset + runVisibleLen,
503
- marker: `[^${displayNum}]`,
229
+ id: footnoteId,
230
+ display: displayNum,
504
231
  });
505
232
  }
506
233
  }
507
234
  visibleOffset += runVisibleLen;
508
235
  }
509
- // Sort descending by offset for safe right-to-left insertion
510
- markers.sort((a, b) => b.offset - a.offset);
511
236
  return markers;
512
237
  }
513
238
  /**
514
- * Inject footnote markers into a text string at the given offsets.
515
- * Markers must be sorted descending by offset.
239
+ * Paragraph content that makes a text-empty paragraph meaningful on its own:
240
+ * an endnote or comment anchored to the paragraph (the comment range markers
241
+ * are what `getComments` resolves `anchored_paragraph_id`/`end_paragraph_id`
242
+ * from, so dropping their paragraph leaves a dangling anchor ID no node_ids
243
+ * probe can resolve), or embedded visual content (DrawingML drawing, VML
244
+ * picture, embedded object). Dropping such a paragraph from the document view
245
+ * severs the anchored note/comment from every read surface and silently
246
+ * hides images.
247
+ *
248
+ * Footnote references are handled separately via the display map so their
249
+ * [^N] markers render; the shapes here only need the node to exist.
250
+ * @see #383
251
+ */
252
+ const ANCHORING_CONTENT = [
253
+ W.endnoteReference,
254
+ W.commentReference,
255
+ W.commentRangeStart,
256
+ W.commentRangeEnd,
257
+ W.drawing,
258
+ W.pict,
259
+ W.object,
260
+ ];
261
+ /**
262
+ * True when `el` sits inside a `w:del` or `w:moveFrom` revision wrapper below
263
+ * the paragraph. Deleted/moved-from content is invisible to the view's text
264
+ * extraction (`getParagraphText` reads `w:t`, never `w:delText`), so an
265
+ * anchor that only survives inside a tracked deletion — e.g. the
266
+ * `w:commentReference` a tracked comment-delete leaves under `w:del` — must
267
+ * not resurrect its paragraph as a blank visible node.
268
+ */
269
+ function isInsideRemovedRevisionWrapper(el, paragraph) {
270
+ let cur = el.parentNode;
271
+ while (cur && cur !== paragraph) {
272
+ if (cur.namespaceURI === OOXML.W_NS && (cur.localName === W.del || cur.localName === W.moveFrom)) {
273
+ return true;
274
+ }
275
+ cur = cur.parentNode;
276
+ }
277
+ return false;
278
+ }
279
+ function paragraphHasAnchoringContent(p) {
280
+ return ANCHORING_CONTENT.some((localName) => {
281
+ const els = p.getElementsByTagNameNS(OOXML.W_NS, localName);
282
+ for (let i = 0; i < els.length; i++) {
283
+ if (!isInsideRemovedRevisionWrapper(els.item(i), p))
284
+ return true;
285
+ }
286
+ return false;
287
+ });
288
+ }
289
+ /**
290
+ * Inject [^N] footnote markers into a text string at the given offsets.
291
+ * Markers arrive in document order; insertion happens right-to-left (offset
292
+ * descending) so earlier offsets stay valid as text grows.
293
+ *
294
+ * Offsets are *visible*-character offsets (they count document text, not the inline
295
+ * formatting tags emitted by `emitFormattingTags`). When `text` carries formatting tags
296
+ * we therefore map each visible offset to a tag-aware insertion index, exactly as the
297
+ * comment-marker path does (`findTaggedTextInsertionIndex`). A naive `slice(offset)` would
298
+ * land the `[^n]` marker inside a tag or mid-word once formatting is present.
516
299
  */
517
300
  function injectFootnoteMarkers(text, markers) {
518
301
  if (markers.length === 0)
519
302
  return text;
303
+ const descending = [...markers].sort((a, b) => b.offset - a.offset);
520
304
  let result = text;
521
- for (const { offset, marker } of markers) {
522
- // Clamp offset to text length
523
- const pos = Math.min(offset, result.length);
524
- result = result.slice(0, pos) + marker + result.slice(pos);
305
+ for (const { offset, display } of descending) {
306
+ const insertionIndex = findTaggedTextInsertionIndex(result, offset);
307
+ result = result.slice(0, insertionIndex) + `[^${display}]` + result.slice(insertionIndex);
525
308
  }
526
309
  return result;
527
310
  }
@@ -544,7 +327,7 @@ export function buildNodesForDocumentView(params) {
544
327
  const allBodyRuns = [];
545
328
  if (showFormatting) {
546
329
  for (const { p } of paragraphs) {
547
- const paraPPr = p.getElementsByTagNameNS(OOXML.W_NS, W.pPr).item(0);
330
+ const paraPPr = getFirstChild(p, OOXML.W_NS, W.pPr);
548
331
  const paraFmt = extractParagraphFormatting(paraPPr ?? null, stylesModel);
549
332
  const runs = buildAnnotatedRuns({
550
333
  p,
@@ -593,20 +376,34 @@ export function buildNodesForDocumentView(params) {
593
376
  const nodes = [];
594
377
  for (let idx = 0; idx < paragraphs.length; idx++) {
595
378
  const { id, p, tableContext } = paragraphs[idx];
596
- const paraPPr = p.getElementsByTagNameNS(OOXML.W_NS, W.pPr).item(0);
379
+ const paraPPr = getFirstChild(p, OOXML.W_NS, W.pPr);
597
380
  const paraFmt = extractParagraphFormatting(paraPPr ?? null, stylesModel);
598
381
  // Visible clean text (field codes stripped).
599
382
  const fullText = getParagraphText(p).replace(/\r/g, '').replace(/\n/g, '').trim();
600
- // Preserve empty table cell paragraphs for structural completeness.
601
- if (!fullText && !tableContext)
383
+ // Computed once per paragraph: gates the empty-paragraph skip below, drives
384
+ // the [^N] marker injection, and is exposed as node.footnote_refs.
385
+ const fnMarkers = getFootnoteMarkersForParagraph(p, footnoteDisplayMap);
386
+ // Preserve empty table cell paragraphs for structural completeness, and
387
+ // text-empty paragraphs that carry anchoring content — a visible footnote
388
+ // reference (its [^N] marker renders via the injection pass below), an
389
+ // endnote reference, a comment reference or comment range marker, or an
390
+ // embedded drawing/picture/object. Dropping those loses the anchored
391
+ // note/comment/image from every rendering of the document view. Anchors
392
+ // that survive only inside a tracked deletion don't count, and paragraphs
393
+ // that are empty for spacing only are still skipped.
394
+ // @see #185, #383
395
+ if (!fullText &&
396
+ !tableContext &&
397
+ fnMarkers.length === 0 &&
398
+ !paragraphHasAnchoringContent(p))
602
399
  continue;
603
400
  // Numbering (auto-numbered) info from numPr.
604
401
  let numId = null;
605
402
  let ilvl = null;
606
- const numPr = paraPPr ? paraPPr.getElementsByTagNameNS(OOXML.W_NS, W.numPr).item(0) : null;
403
+ const numPr = paraPPr ? getFirstChild(paraPPr, OOXML.W_NS, W.numPr) : null;
607
404
  if (numPr) {
608
- const numIdEl = numPr.getElementsByTagNameNS(OOXML.W_NS, W.numId).item(0);
609
- const ilvlEl = numPr.getElementsByTagNameNS(OOXML.W_NS, W.ilvl).item(0);
405
+ const numIdEl = getFirstChild(numPr, OOXML.W_NS, W.numId);
406
+ const ilvlEl = getFirstChild(numPr, OOXML.W_NS, W.ilvl);
610
407
  const numIdVal = numIdEl ? getWAttr(numIdEl, 'val') : null;
611
408
  const ilvlVal = ilvlEl ? getWAttr(ilvlEl, 'val') : null;
612
409
  if (numIdVal)
@@ -649,7 +446,13 @@ export function buildNodesForDocumentView(params) {
649
446
  let headerFormatting = null;
650
447
  let headerCharCount = 0;
651
448
  try {
652
- const hdr = detectRunInHeader({ paragraph: p, paragraphPPr: paraPPr ?? null, paragraphStyleId: paraFmt.styleId, styles: stylesModel });
449
+ // Skip in-table run-in header detection table cells are key/value
450
+ // layout and a bold prefix is a label, not a section heading.
451
+ // Mirrors the !tableContext gates on detectTitleCapsCentered and
452
+ // extractHeaderInfo below.
453
+ const hdr = tableContext
454
+ ? null
455
+ : detectRunInHeader({ paragraph: p, paragraphPPr: paraPPr ?? null, paragraphStyleId: paraFmt.styleId, styles: stylesModel });
653
456
  if (hdr) {
654
457
  headerText = hdr.raw_text.replace(/[.:\-]+$/g, '');
655
458
  headerStyle = 'run_in_header';
@@ -660,11 +463,39 @@ export function buildNodesForDocumentView(params) {
660
463
  catch {
661
464
  // ignore
662
465
  }
663
- if (!headerText) {
466
+ // Centered ALL-CAPS bold standalone titles (e.g. an NVCA SPA's
467
+ // `SERIES […] PREFERRED STOCK PURCHASE AGREEMENT`). Runs before
468
+ // extractHeaderInfo so the documented precedence (title_caps_centered
469
+ // outranks short standalone title_bare/title_with_period/title_with_colon)
470
+ // matches the implementation. Only fires when run_in_header did not match
471
+ // AND the paragraph has no list label AND is not in a table cell. The
472
+ // try/catch is defensive against malformed XML in user documents.
473
+ if (!headerText && !labelString && !tableContext) {
474
+ try {
475
+ const titleHdr = detectTitleCapsCentered({
476
+ paragraph: p,
477
+ paragraphPPr: paraPPr ?? null,
478
+ paragraphStyleId: paraFmt.styleId,
479
+ alignment: paraFmt.alignment,
480
+ cleanTextNoLabel,
481
+ styles: stylesModel,
482
+ });
483
+ if (titleHdr) {
484
+ headerText = titleHdr.raw_text;
485
+ headerStyle = 'title_caps_centered';
486
+ headerFormatting = titleHdr.formatting;
487
+ }
488
+ }
489
+ catch {
490
+ // ignore: malformed run/style data falls through to extractHeaderInfo.
491
+ }
492
+ }
493
+ if (!headerText && !tableContext) {
664
494
  const fallback = extractHeaderInfo(cleanTextNoLabel);
665
495
  headerText = fallback.header_text;
666
496
  headerStyle = fallback.header_style;
667
497
  }
498
+ const heading = deriveHeading(paraFmt.styleId, cleanTextNoLabel, headerText, headerStyle, tableContext != null);
668
499
  // ── Tag emission ──
669
500
  let tagged = cleanTextNoLabel;
670
501
  if (showFormatting) {
@@ -721,7 +552,7 @@ export function buildNodesForDocumentView(params) {
721
552
  }
722
553
  // Emit formatting tags from run-level metadata.
723
554
  const paraFontBaseline = computeParagraphFontBaseline(bodyRuns, { formattingMode });
724
- tagged = emitFormattingTags({ runs: bodyRuns, baseline: docBaseline, fontBaseline: paraFontBaseline });
555
+ tagged = emitFormattingTags({ runs: bodyRuns, baseline: docBaseline, fontBaseline: paraFontBaseline, formattingMode });
725
556
  tagged = mergeAdjacentTags(tagged);
726
557
  }
727
558
  else if (includeSemantic) {
@@ -774,10 +605,13 @@ export function buildNodesForDocumentView(params) {
774
605
  bodyFmt = null;
775
606
  }
776
607
  // Inject footnote [^N] markers into view text (view-only, not shared text primitives)
777
- const fnMarkers = getFootnoteMarkersForParagraph(p, footnoteDisplayMap);
778
608
  if (fnMarkers.length > 0) {
779
609
  tagged = injectFootnoteMarkers(tagged, fnMarkers);
780
610
  }
611
+ // Visible characters stripped from the raw paragraph head when extracting a manual
612
+ // label (label text + trailing whitespace). Auto-numbered paragraphs leave fullText
613
+ // intact, so this is 0 for them.
614
+ const visibleOffsetCorrection = isAutoNumbered ? 0 : Math.max(0, fullText.length - cleanTextNoLabel.length);
781
615
  const node = {
782
616
  id,
783
617
  list_label: labelString,
@@ -786,6 +620,7 @@ export function buildNodesForDocumentView(params) {
786
620
  text: tagged, // filled after header stripping at render time
787
621
  clean_text: cleanTextNoLabel,
788
622
  tagged_text: tagged,
623
+ visible_offset_correction: visibleOffsetCorrection > 0 ? visibleOffsetCorrection : undefined,
789
624
  list_metadata: {
790
625
  list_level: listLevel,
791
626
  label_type: labelType,
@@ -804,10 +639,16 @@ export function buildNodesForDocumentView(params) {
804
639
  header_formatting: headerFormatting,
805
640
  body_run_formatting: bodyFmt,
806
641
  };
642
+ if (heading)
643
+ node.heading = heading;
644
+ if (fnMarkers.length > 0) {
645
+ node.footnote_refs = fnMarkers.map(({ id: fnId, display }) => ({ id: fnId, display }));
646
+ }
807
647
  if (tableContext)
808
648
  node.table_context = tableContext;
809
649
  nodes.push(node);
810
650
  }
651
+ suppressSignatureClusters(nodes);
811
652
  const styles = discoverStyles(nodes);
812
653
  for (const n of nodes) {
813
654
  const sid = styles.fingerprint_to_style.get(fingerprintKey(n.style_fingerprint));