@usejunior/docx-core 0.9.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/dist/.tsbuildinfo +1 -1
  2. package/dist/atomizer.d.ts +15 -1
  3. package/dist/atomizer.d.ts.map +1 -1
  4. package/dist/atomizer.js +37 -1
  5. package/dist/atomizer.js.map +1 -1
  6. package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -1
  7. package/dist/baselines/atomizer/documentReconstructor.js +218 -90
  8. package/dist/baselines/atomizer/documentReconstructor.js.map +1 -1
  9. package/dist/baselines/atomizer/formattingFidelity.d.ts +99 -0
  10. package/dist/baselines/atomizer/formattingFidelity.d.ts.map +1 -0
  11. package/dist/baselines/atomizer/formattingFidelity.js +449 -0
  12. package/dist/baselines/atomizer/formattingFidelity.js.map +1 -0
  13. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.d.ts +37 -0
  14. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.d.ts.map +1 -0
  15. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.js +189 -0
  16. package/dist/baselines/atomizer/inPlaceModifier-bookmarks.js.map +1 -0
  17. package/dist/baselines/atomizer/inPlaceModifier-containers.d.ts +74 -0
  18. package/dist/baselines/atomizer/inPlaceModifier-containers.d.ts.map +1 -0
  19. package/dist/baselines/atomizer/inPlaceModifier-containers.js +171 -0
  20. package/dist/baselines/atomizer/inPlaceModifier-containers.js.map +1 -0
  21. package/dist/baselines/atomizer/inPlaceModifier-deletion.d.ts +88 -0
  22. package/dist/baselines/atomizer/inPlaceModifier-deletion.d.ts.map +1 -0
  23. package/dist/baselines/atomizer/inPlaceModifier-deletion.js +326 -0
  24. package/dist/baselines/atomizer/inPlaceModifier-deletion.js.map +1 -0
  25. package/dist/baselines/atomizer/inPlaceModifier-postprocess.d.ts +85 -0
  26. package/dist/baselines/atomizer/inPlaceModifier-postprocess.d.ts.map +1 -0
  27. package/dist/baselines/atomizer/inPlaceModifier-postprocess.js +402 -0
  28. package/dist/baselines/atomizer/inPlaceModifier-postprocess.js.map +1 -0
  29. package/dist/baselines/atomizer/inPlaceModifier-presplit.d.ts +39 -0
  30. package/dist/baselines/atomizer/inPlaceModifier-presplit.d.ts.map +1 -0
  31. package/dist/baselines/atomizer/inPlaceModifier-presplit.js +265 -0
  32. package/dist/baselines/atomizer/inPlaceModifier-presplit.js.map +1 -0
  33. package/dist/baselines/atomizer/inPlaceModifier-shared.d.ts +62 -0
  34. package/dist/baselines/atomizer/inPlaceModifier-shared.d.ts.map +1 -0
  35. package/dist/baselines/atomizer/inPlaceModifier-shared.js +139 -0
  36. package/dist/baselines/atomizer/inPlaceModifier-shared.js.map +1 -0
  37. package/dist/baselines/atomizer/inPlaceModifier-wrappers.d.ts +189 -0
  38. package/dist/baselines/atomizer/inPlaceModifier-wrappers.d.ts.map +1 -0
  39. package/dist/baselines/atomizer/inPlaceModifier-wrappers.js +427 -0
  40. package/dist/baselines/atomizer/inPlaceModifier-wrappers.js.map +1 -0
  41. package/dist/baselines/atomizer/inPlaceModifier.d.ts +6 -290
  42. package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -1
  43. package/dist/baselines/atomizer/inPlaceModifier.js +23 -1828
  44. package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -1
  45. package/dist/baselines/atomizer/pipeline.d.ts +76 -1
  46. package/dist/baselines/atomizer/pipeline.d.ts.map +1 -1
  47. package/dist/baselines/atomizer/pipeline.js +204 -27
  48. package/dist/baselines/atomizer/pipeline.js.map +1 -1
  49. package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts.map +1 -1
  50. package/dist/baselines/atomizer/trackChangesAcceptorAst.js +56 -160
  51. package/dist/baselines/atomizer/trackChangesAcceptorAst.js.map +1 -1
  52. package/dist/compare-types.d.ts +151 -0
  53. package/dist/compare-types.d.ts.map +1 -0
  54. package/dist/compare-types.js +2 -0
  55. package/dist/compare-types.js.map +1 -0
  56. package/dist/core-types.d.ts +5 -1
  57. package/dist/core-types.d.ts.map +1 -1
  58. package/dist/core-types.js +5 -1
  59. package/dist/core-types.js.map +1 -1
  60. package/dist/footnotes.d.ts +8 -3
  61. package/dist/footnotes.d.ts.map +1 -1
  62. package/dist/footnotes.js +8 -3
  63. package/dist/footnotes.js.map +1 -1
  64. package/dist/index.d.ts +6 -150
  65. package/dist/index.d.ts.map +1 -1
  66. package/dist/index.js +6 -0
  67. package/dist/index.js.map +1 -1
  68. package/dist/integration/libreoffice-oracle.d.ts +41 -0
  69. package/dist/integration/libreoffice-oracle.d.ts.map +1 -0
  70. package/dist/integration/libreoffice-oracle.js +282 -0
  71. package/dist/integration/libreoffice-oracle.js.map +1 -0
  72. package/dist/primitives/accept_changes.d.ts +2 -2
  73. package/dist/primitives/accept_changes.d.ts.map +1 -1
  74. package/dist/primitives/accept_changes.js +24 -79
  75. package/dist/primitives/accept_changes.js.map +1 -1
  76. package/dist/primitives/comments.d.ts +12 -3
  77. package/dist/primitives/comments.d.ts.map +1 -1
  78. package/dist/primitives/comments.js +374 -97
  79. package/dist/primitives/comments.js.map +1 -1
  80. package/dist/primitives/content_fingerprint.d.ts +29 -0
  81. package/dist/primitives/content_fingerprint.d.ts.map +1 -0
  82. package/dist/primitives/content_fingerprint.js +63 -0
  83. package/dist/primitives/content_fingerprint.js.map +1 -0
  84. package/dist/primitives/document.d.ts +56 -15
  85. package/dist/primitives/document.d.ts.map +1 -1
  86. package/dist/primitives/document.js +303 -32
  87. package/dist/primitives/document.js.map +1 -1
  88. package/dist/primitives/document_view-comments.d.ts +18 -0
  89. package/dist/primitives/document_view-comments.d.ts.map +1 -0
  90. package/dist/primitives/document_view-comments.js +159 -0
  91. package/dist/primitives/document_view-comments.js.map +1 -0
  92. package/dist/primitives/document_view-headings.d.ts +45 -0
  93. package/dist/primitives/document_view-headings.d.ts.map +1 -0
  94. package/dist/primitives/document_view-headings.js +247 -0
  95. package/dist/primitives/document_view-headings.js.map +1 -0
  96. package/dist/primitives/document_view-styles.d.ts +11 -0
  97. package/dist/primitives/document_view-styles.d.ts.map +1 -0
  98. package/dist/primitives/document_view-styles.js +104 -0
  99. package/dist/primitives/document_view-styles.js.map +1 -0
  100. package/dist/primitives/document_view-toon.d.ts +37 -0
  101. package/dist/primitives/document_view-toon.d.ts.map +1 -0
  102. package/dist/primitives/document_view-toon.js +199 -0
  103. package/dist/primitives/document_view-toon.js.map +1 -0
  104. package/dist/primitives/document_view-types.d.ts +137 -0
  105. package/dist/primitives/document_view-types.d.ts.map +1 -0
  106. package/dist/primitives/document_view-types.js +2 -0
  107. package/dist/primitives/document_view-types.js.map +1 -0
  108. package/dist/primitives/document_view.d.ts +8 -106
  109. package/dist/primitives/document_view.d.ts.map +1 -1
  110. package/dist/primitives/document_view.js +134 -301
  111. package/dist/primitives/document_view.js.map +1 -1
  112. package/dist/primitives/dom-helpers.d.ts +9 -0
  113. package/dist/primitives/dom-helpers.d.ts.map +1 -1
  114. package/dist/primitives/dom-helpers.js +10 -1
  115. package/dist/primitives/dom-helpers.js.map +1 -1
  116. package/dist/primitives/footnotes.d.ts +4 -3
  117. package/dist/primitives/footnotes.d.ts.map +1 -1
  118. package/dist/primitives/footnotes.js +232 -44
  119. package/dist/primitives/footnotes.js.map +1 -1
  120. package/dist/primitives/formatting_tags.d.ts +6 -0
  121. package/dist/primitives/formatting_tags.d.ts.map +1 -1
  122. package/dist/primitives/formatting_tags.js +6 -1
  123. package/dist/primitives/formatting_tags.js.map +1 -1
  124. package/dist/primitives/index.d.ts +6 -0
  125. package/dist/primitives/index.d.ts.map +1 -1
  126. package/dist/primitives/index.js +5 -0
  127. package/dist/primitives/index.js.map +1 -1
  128. package/dist/primitives/layout.d.ts +4 -3
  129. package/dist/primitives/layout.d.ts.map +1 -1
  130. package/dist/primitives/layout.js +32 -3
  131. package/dist/primitives/layout.js.map +1 -1
  132. package/dist/primitives/merge_runs.d.ts +21 -3
  133. package/dist/primitives/merge_runs.d.ts.map +1 -1
  134. package/dist/primitives/merge_runs.js +32 -10
  135. package/dist/primitives/merge_runs.js.map +1 -1
  136. package/dist/primitives/namespaces.d.ts +6 -0
  137. package/dist/primitives/namespaces.d.ts.map +1 -1
  138. package/dist/primitives/namespaces.js +9 -0
  139. package/dist/primitives/namespaces.js.map +1 -1
  140. package/dist/primitives/reject_changes.d.ts +2 -2
  141. package/dist/primitives/reject_changes.d.ts.map +1 -1
  142. package/dist/primitives/reject_changes.js +24 -81
  143. package/dist/primitives/reject_changes.js.map +1 -1
  144. package/dist/primitives/semantic_tags.d.ts +7 -0
  145. package/dist/primitives/semantic_tags.d.ts.map +1 -1
  146. package/dist/primitives/semantic_tags.js +21 -3
  147. package/dist/primitives/semantic_tags.js.map +1 -1
  148. package/dist/primitives/serialize_html.d.ts +36 -0
  149. package/dist/primitives/serialize_html.d.ts.map +1 -0
  150. package/dist/primitives/serialize_html.js +393 -0
  151. package/dist/primitives/serialize_html.js.map +1 -0
  152. package/dist/primitives/serialize_markdown.d.ts +16 -0
  153. package/dist/primitives/serialize_markdown.d.ts.map +1 -0
  154. package/dist/primitives/serialize_markdown.js +300 -0
  155. package/dist/primitives/serialize_markdown.js.map +1 -0
  156. package/dist/primitives/serialize_plaintext.d.ts +15 -0
  157. package/dist/primitives/serialize_plaintext.d.ts.map +1 -0
  158. package/dist/primitives/serialize_plaintext.js +154 -0
  159. package/dist/primitives/serialize_plaintext.js.map +1 -0
  160. package/dist/primitives/styles.js +22 -22
  161. package/dist/primitives/styles.js.map +1 -1
  162. package/dist/primitives/tables.d.ts.map +1 -1
  163. package/dist/primitives/tables.js +13 -3
  164. package/dist/primitives/tables.js.map +1 -1
  165. package/dist/primitives/text.d.ts +2 -1
  166. package/dist/primitives/text.d.ts.map +1 -1
  167. package/dist/primitives/text.js +116 -12
  168. package/dist/primitives/text.js.map +1 -1
  169. package/dist/primitives/track-changes-emitter.d.ts +139 -0
  170. package/dist/primitives/track-changes-emitter.d.ts.map +1 -0
  171. package/dist/primitives/track-changes-emitter.js +241 -0
  172. package/dist/primitives/track-changes-emitter.js.map +1 -0
  173. package/dist/primitives/xml-helpers.d.ts +29 -0
  174. package/dist/primitives/xml-helpers.d.ts.map +1 -0
  175. package/dist/primitives/xml-helpers.js +35 -0
  176. package/dist/primitives/xml-helpers.js.map +1 -0
  177. package/dist/shared/ooxml/namespaces.d.ts +4 -1
  178. package/dist/shared/ooxml/namespaces.d.ts.map +1 -1
  179. package/dist/shared/ooxml/namespaces.js +4 -1
  180. package/dist/shared/ooxml/namespaces.js.map +1 -1
  181. package/package.json +7 -6
@@ -0,0 +1,159 @@
1
+ export const INLINE_COMMENT_MARKER_RUNTIME = Symbol('inline_comment_marker_runtime');
2
+ // Matches the exact set of TOON inline formatting tags that emitFormattingTags() can emit:
3
+ // <b>, </b>, <i>, </i>, <u>, </u>, <highlight>, </highlight>,
4
+ // <a href="...">, </a>, <font ATTR=...>, </font>
5
+ // Anything else in the form `<...>` is literal document text (e.g., `<Borrower>` placeholders
6
+ // in legal templates, or stylesheet samples like `<font>`) and must be counted as visible
7
+ // characters, not skipped as markup.
8
+ //
9
+ // Note the opening `a`/`font` alternative requires `\s[^>]*` (mandatory attributes), because
10
+ // the formatter only emits `<a href="...">` and `<font ATTR=...>` — never bare `<a>` or
11
+ // `<font>`. Allowing the bare forms would cause literal `<a>` / `<font>` in document text to
12
+ // be silently skipped, shifting marker positions.
13
+ export const TOON_INLINE_TAG_RE = /^(?:<\/?(?:b|i|u|highlight)>|<\/(?:a|font)>|<(?:a|font)\s[^>]*>)/;
14
+ /**
15
+ * Split a TOON inline-tag string (`DocumentViewNode.tagged_text` produced with
16
+ * `show_formatting`) into an ordered list of `tag` and `text` tokens, using the exact same
17
+ * grammar (`TOON_INLINE_TAG_RE`) the formatter emits. Consecutive literal characters are
18
+ * coalesced into one `text` token. This is the shared tokenization primitive used by
19
+ * downstream serializers (Markdown today, HTML next) so they never reason about the tag
20
+ * grammar independently and drift from the emitter.
21
+ */
22
+ export function tokenizeToonInline(text) {
23
+ const tokens = [];
24
+ let buffer = '';
25
+ for (let i = 0; i < text.length; i++) {
26
+ const tagLen = toonTagLengthAt(text, i);
27
+ if (tagLen > 0) {
28
+ if (buffer) {
29
+ tokens.push({ kind: 'text', value: buffer });
30
+ buffer = '';
31
+ }
32
+ tokens.push({ kind: 'tag', value: text.slice(i, i + tagLen) });
33
+ i += tagLen - 1;
34
+ continue;
35
+ }
36
+ buffer += text[i];
37
+ }
38
+ if (buffer)
39
+ tokens.push({ kind: 'text', value: buffer });
40
+ return tokens;
41
+ }
42
+ function toonTagLengthAt(text, i) {
43
+ if (text[i] !== '<')
44
+ return 0;
45
+ const match = TOON_INLINE_TAG_RE.exec(text.slice(i));
46
+ return match ? match[0].length : 0;
47
+ }
48
+ export function countVisibleTextCharacters(text) {
49
+ let visibleCount = 0;
50
+ for (let i = 0; i < text.length; i++) {
51
+ const tagLen = toonTagLengthAt(text, i);
52
+ if (tagLen > 0) {
53
+ i += tagLen - 1;
54
+ continue;
55
+ }
56
+ visibleCount++;
57
+ }
58
+ return visibleCount;
59
+ }
60
+ export function findTaggedTextInsertionIndex(text, visibleOffset) {
61
+ if (visibleOffset <= 0)
62
+ return 0;
63
+ let visibleCount = 0;
64
+ for (let i = 0; i < text.length; i++) {
65
+ if (visibleCount === visibleOffset)
66
+ return i;
67
+ const tagLen = toonTagLengthAt(text, i);
68
+ if (tagLen > 0) {
69
+ i += tagLen - 1;
70
+ continue;
71
+ }
72
+ visibleCount++;
73
+ }
74
+ return text.length;
75
+ }
76
+ export function injectToonCommentMarkers(text, markers) {
77
+ if (markers.length === 0)
78
+ return text;
79
+ let result = text;
80
+ for (const { offset, marker } of markers) {
81
+ const insertionIndex = findTaggedTextInsertionIndex(result, offset);
82
+ result = result.slice(0, insertionIndex) + marker + result.slice(insertionIndex);
83
+ }
84
+ return result;
85
+ }
86
+ function collectInlineCommentMarkerCandidates(comments, paragraphIndexById, candidates) {
87
+ for (const comment of comments) {
88
+ const runtime = comment[INLINE_COMMENT_MARKER_RUNTIME];
89
+ if (comment.range && runtime && !runtime.suppressInlineMarkers) {
90
+ candidates.push({
91
+ id: comment.id,
92
+ startParagraphId: comment.range.startParagraphId,
93
+ endParagraphId: comment.range.endParagraphId,
94
+ startParagraphIndex: paragraphIndexById.get(comment.range.startParagraphId) ?? Number.MAX_SAFE_INTEGER,
95
+ startOffset: runtime.startVisibleOffset,
96
+ endOffset: runtime.endVisibleOffset,
97
+ });
98
+ }
99
+ if (comment.replies.length > 0) {
100
+ collectInlineCommentMarkerCandidates(comment.replies, paragraphIndexById, candidates);
101
+ }
102
+ }
103
+ }
104
+ function compareInlineCommentCloseOrder(left, right) {
105
+ if (left.startParagraphIndex !== right.startParagraphIndex) {
106
+ return right.startParagraphIndex - left.startParagraphIndex;
107
+ }
108
+ if (left.startOffset !== right.startOffset) {
109
+ return right.startOffset - left.startOffset;
110
+ }
111
+ return right.id - left.id;
112
+ }
113
+ export function collectInlineCommentMarkers(nodes) {
114
+ const paragraphIndexById = new Map();
115
+ for (let index = 0; index < nodes.length; index++) {
116
+ paragraphIndexById.set(nodes[index].id, index);
117
+ }
118
+ const candidates = [];
119
+ for (const node of nodes) {
120
+ if (node.comments && node.comments.length > 0) {
121
+ collectInlineCommentMarkerCandidates(node.comments, paragraphIndexById, candidates);
122
+ }
123
+ }
124
+ const groupedByParagraph = new Map();
125
+ for (const candidate of candidates) {
126
+ const startOffsets = groupedByParagraph.get(candidate.startParagraphId) ?? new Map();
127
+ const startGroup = startOffsets.get(candidate.startOffset) ?? { closes: [], opens: [] };
128
+ startGroup.opens.push(candidate);
129
+ startOffsets.set(candidate.startOffset, startGroup);
130
+ groupedByParagraph.set(candidate.startParagraphId, startOffsets);
131
+ const endOffsets = groupedByParagraph.get(candidate.endParagraphId) ?? new Map();
132
+ const endGroup = endOffsets.get(candidate.endOffset) ?? { closes: [], opens: [] };
133
+ endGroup.closes.push(candidate);
134
+ endOffsets.set(candidate.endOffset, endGroup);
135
+ groupedByParagraph.set(candidate.endParagraphId, endOffsets);
136
+ }
137
+ const markersByParagraph = new Map();
138
+ for (const [paragraphId, offsetGroups] of groupedByParagraph.entries()) {
139
+ const markers = [];
140
+ const sortedOffsets = Array.from(offsetGroups.keys()).sort((left, right) => right - left);
141
+ for (const offset of sortedOffsets) {
142
+ const group = offsetGroups.get(offset);
143
+ if (!group)
144
+ continue;
145
+ const closes = [...group.closes].sort(compareInlineCommentCloseOrder);
146
+ const opens = [...group.opens].sort((left, right) => left.id - right.id);
147
+ const marker = closes.map((comment) => `[cm-end:${comment.id}]`).join('') +
148
+ opens.map((comment) => `[cm-start:${comment.id}]`).join('');
149
+ if (!marker)
150
+ continue;
151
+ markers.push({ offset, marker });
152
+ }
153
+ if (markers.length > 0) {
154
+ markersByParagraph.set(paragraphId, markers);
155
+ }
156
+ }
157
+ return markersByParagraph;
158
+ }
159
+ //# sourceMappingURL=document_view-comments.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document_view-comments.js","sourceRoot":"","sources":["../../src/primitives/document_view-comments.ts"],"names":[],"mappings":"AAIA,MAAM,CAAC,MAAM,6BAA6B,GAAG,MAAM,CAAC,+BAA+B,CAAC,CAAC;AAYrF,2FAA2F;AAC3F,gEAAgE;AAChE,mDAAmD;AACnD,8FAA8F;AAC9F,0FAA0F;AAC1F,qCAAqC;AACrC,EAAE;AACF,6FAA6F;AAC7F,wFAAwF;AACxF,6FAA6F;AAC7F,kDAAkD;AAClD,MAAM,CAAC,MAAM,kBAAkB,GAAG,kEAAkE,CAAC;AAErG;;;;;;;GAOG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,MAAM,MAAM,GAAsB,EAAE,CAAC;IACrC,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,MAAM,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QACxC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;YACf,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;gBAC7C,MAAM,GAAG,EAAE,CAAC;YACd,CAAC;YACD,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC;YAC/D,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC;YAChB,SAAS;QACX,CAAC;QACD,MAAM,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IACD,IAAI,MAAM;QAAE,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;IACzD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,CAAS;IAC9C,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,GAAG;QAAE,OAAO,CAAC,CAAC;IAC9B,MAAM,KAAK,GAAG,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACrD,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;AACrC,CAAC;AAED,MAAM,UAAU,0BAA0B,CAAC,IAAY;IACrD,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,MAAM,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QACxC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;YACf,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC;YAChB,SAAS;QACX,CAAC;QACD,YAAY,EAAE,CAAC;IACjB,CAAC;IACD,OAAO,YAAY,CAAC;AACtB,CAAC;AAED,MAAM,UAAU,4BAA4B,CAAC,IAAY,EAAE,aAAqB;IAC9E,IAAI,aAAa,IAAI,CAAC;QAAE,OAAO,CAAC,CAAC;IAEjC,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,YAAY,KAAK,aAAa;YAAE,OAAO,CAAC,CAAC;QAE7C,MAAM,MAAM,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QACxC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;YACf,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC;YAChB,SAAS;QACX,CAAC;QAED,YAAY,EAAE,CAAC;IACjB,CAAC;IAED,OAAO,IAAI,CAAC,MAAM,CAAC;AACrB,CAAC;AAED,MAAM,UAAU,wBAAwB,CACtC,IAAY,EACZ,OAAqC;IAErC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEtC,IAAI,MAAM,GAAG,IAAI,CAAC;IAClB,KAAK,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;QACzC,MAAM,cAAc,GAAG,4BAA4B,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACpE,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,GAAG,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACnF,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAgBD,SAAS,oCAAoC,CAC3C,QAAwC,EACxC,kBAA+C,EAC/C,UAA0C;IAE1C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,OAAO,GAAI,OAA0C,CAAC,6BAA6B,CAAC,CAAC;QAC3F,IAAI,OAAO,CAAC,KAAK,IAAI,OAAO,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,CAAC;YAC/D,UAAU,CAAC,IAAI,CAAC;gBACd,EAAE,EAAE,OAAO,CAAC,EAAE;gBACd,gBAAgB,EAAE,OAAO,CAAC,KAAK,CAAC,gBAAgB;gBAChD,cAAc,EAAE,OAAO,CAAC,KAAK,CAAC,cAAc;gBAC5C,mBAAmB,EAAE,kBAAkB,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,MAAM,CAAC,gBAAgB;gBACtG,WAAW,EAAE,OAAO,CAAC,kBAAkB;gBACvC,SAAS,EAAE,OAAO,CAAC,gBAAgB;aACpC,CAAC,CAAC;QACL,CAAC;QAED,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/B,oCAAoC,CAAC,OAAO,CAAC,OAAO,EAAE,kBAAkB,EAAE,UAAU,CAAC,CAAC;QACxF,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,8BAA8B,CACrC,IAAkC,EAClC,KAAmC;IAEnC,IAAI,IAAI,CAAC,mBAAmB,KAAK,KAAK,CAAC,mBAAmB,EAAE,CAAC;QAC3D,OAAO,KAAK,CAAC,mBAAmB,GAAG,IAAI,CAAC,mBAAmB,CAAC;IAC9D,CAAC;IACD,IAAI,IAAI,CAAC,WAAW,KAAK,KAAK,CAAC,WAAW,EAAE,CAAC;QAC3C,OAAO,KAAK,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC;IAC9C,CAAC;IACD,OAAO,KAAK,CAAC,EAAE,GAAG,IAAI,CAAC,EAAE,CAAC;AAC5B,CAAC;AAED,MAAM,UAAU,2BAA2B,CACzC,KAAkC;IAElC,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAAkB,CAAC;IACrD,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC;QAClD,kBAAkB,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAE,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;IAClD,CAAC;IAED,MAAM,UAAU,GAAmC,EAAE,CAAC;IACtD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9C,oCAAoC,CAAC,IAAI,CAAC,QAAQ,EAAE,kBAAkB,EAAE,UAAU,CAAC,CAAC;QACtF,CAAC;IACH,CAAC;IAED,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAAiD,CAAC;IACpF,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,YAAY,GAAG,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,gBAAgB,CAAC,IAAI,IAAI,GAAG,EAAoC,CAAC;QACvH,MAAM,UAAU,GAAG,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;QACxF,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACjC,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,UAAU,CAAC,CAAC;QACpD,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,gBAAgB,EAAE,YAAY,CAAC,CAAC;QAEjE,MAAM,UAAU,GAAG,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,cAAc,CAAC,IAAI,IAAI,GAAG,EAAoC,CAAC;QACnH,MAAM,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;QAClF,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAChC,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC9C,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,cAAc,EAAE,UAAU,CAAC,CAAC;IAC/D,CAAC;IAED,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAA+B,CAAC;IAClE,KAAK,MAAM,CAAC,WAAW,EAAE,YAAY,CAAC,IAAI,kBAAkB,CAAC,OAAO,EAAE,EAAE,CAAC;QACvE,MAAM,OAAO,GAAwB,EAAE,CAAC;QACxC,MAAM,aAAa,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC;QAC1F,KAAK,MAAM,MAAM,IAAI,aAAa,EAAE,CAAC;YACnC,MAAM,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACvC,IAAI,CAAC,KAAK;gBAAE,SAAS;YAErB,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;YACtE,MAAM,KAAK,GAAG,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,EAAE,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC;YACzE,MAAM,MAAM,GACV,MAAM,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,WAAW,OAAO,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1D,KAAK,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,aAAa,OAAO,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAC9D,IAAI,CAAC,MAAM;gBAAE,SAAS;YACtB,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QACnC,CAAC;QAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,kBAAkB,CAAC,GAAG,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAED,OAAO,kBAAkB,CAAC;AAC5B,CAAC"}
@@ -0,0 +1,45 @@
1
+ import { type ParagraphAlignment, type StylesModel } from './styles.js';
2
+ import type { DocumentViewNode, HeaderFormatting, HeadingValue, HeuristicHeadingSource } from './document_view-types.js';
3
+ export type { HeaderFormatting, HeadingSource, HeadingValue, HeuristicHeadingSource } from './document_view-types.js';
4
+ export declare function extractHeaderInfo(cleanText: string): {
5
+ header_text: string | null;
6
+ header_style: HeuristicHeadingSource | null;
7
+ };
8
+ export declare function deriveHeading(paragraphStyleId: string | null, cleanText: string, headerText: string | null, headerStyle: HeuristicHeadingSource | null, isInTableCell: boolean): HeadingValue | undefined;
9
+ export declare function detectRunInHeader(params: {
10
+ paragraph: Element;
11
+ paragraphPPr: Element | null;
12
+ paragraphStyleId: string | null;
13
+ styles: StylesModel;
14
+ }): {
15
+ raw_text: string;
16
+ formatting: HeaderFormatting;
17
+ headerCharCount: number;
18
+ } | null;
19
+ /**
20
+ * Detect a centered, ALL-CAPS, bold standalone title (e.g. an NVCA SPA's
21
+ * `SERIES […] PREFERRED STOCK PURCHASE AGREEMENT` title).
22
+ *
23
+ * Strict gates only — fires only when the paragraph cannot be confused with
24
+ * body prose, a placeholder, or a signature line:
25
+ * - paragraph alignment is CENTER
26
+ * - clean text contains no lowercase letters
27
+ * - clean text contains ≥ 3 ASCII letters AND ≥ 2 whitespace-separated
28
+ * word-tokens (so single-token bracketed placeholders like `[COMPANY]`
29
+ * and underscore-only signature lines like `____________` are rejected)
30
+ * - clean text is non-empty and ≤ MAX_CENTERED_TITLE_LENGTH
31
+ * - all visible runs are bold (a single non-bold char disqualifies)
32
+ */
33
+ export declare function detectTitleCapsCentered(params: {
34
+ paragraph: Element;
35
+ paragraphPPr: Element | null;
36
+ paragraphStyleId: string | null;
37
+ alignment: ParagraphAlignment;
38
+ cleanTextNoLabel: string;
39
+ styles: StylesModel;
40
+ }): {
41
+ raw_text: string;
42
+ formatting: HeaderFormatting;
43
+ } | null;
44
+ export declare function suppressSignatureClusters(nodes: DocumentViewNode[]): void;
45
+ //# sourceMappingURL=document_view-headings.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document_view-headings.d.ts","sourceRoot":"","sources":["../../src/primitives/document_view-headings.ts"],"names":[],"mappings":"AAEA,OAAO,EAAiC,KAAK,kBAAkB,EAAE,KAAK,WAAW,EAAE,MAAM,aAAa,CAAC;AACvG,OAAO,KAAK,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,YAAY,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAEzH,YAAY,EAAE,gBAAgB,EAAE,aAAa,EAAE,YAAY,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AActH,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,GAAG;IAAE,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,YAAY,EAAE,sBAAsB,GAAG,IAAI,CAAA;CAAE,CA2BhI;AAED,wBAAgB,aAAa,CAC3B,gBAAgB,EAAE,MAAM,GAAG,IAAI,EAC/B,SAAS,EAAE,MAAM,EACjB,UAAU,EAAE,MAAM,GAAG,IAAI,EACzB,WAAW,EAAE,sBAAsB,GAAG,IAAI,EAC1C,aAAa,EAAE,OAAO,GACrB,YAAY,GAAG,SAAS,CA2B1B;AAED,wBAAgB,iBAAiB,CAAC,MAAM,EAAE;IACxC,SAAS,EAAE,OAAO,CAAC;IACnB,YAAY,EAAE,OAAO,GAAG,IAAI,CAAC;IAC7B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,MAAM,EAAE,WAAW,CAAC;CACrB,GAAG;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,gBAAgB,CAAC;IAAC,eAAe,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAyDrF;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,uBAAuB,CAAC,MAAM,EAAE;IAC9C,SAAS,EAAE,OAAO,CAAC;IACnB,YAAY,EAAE,OAAO,GAAG,IAAI,CAAC;IAC7B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,SAAS,EAAE,kBAAkB,CAAC;IAC9B,gBAAgB,EAAE,MAAM,CAAC;IACzB,MAAM,EAAE,WAAW,CAAC;CACrB,GAAG;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,gBAAgB,CAAA;CAAE,GAAG,IAAI,CA8C5D;AAWD,wBAAgB,yBAAyB,CAAC,KAAK,EAAE,gBAAgB,EAAE,GAAG,IAAI,CAqCzE"}
@@ -0,0 +1,247 @@
1
+ import { OOXML, W } from './namespaces.js';
2
+ import { getParagraphRuns } from './text.js';
3
+ import { extractEffectiveRunFormatting } from './styles.js';
4
+ const SHORT_HEADER_MAX_LENGTH = 50;
5
+ const MAX_HEADER_TEXT_LENGTH = 60;
6
+ // Centered ALL-CAPS titles (e.g. NVCA COI's `AMENDED AND RESTATED CERTIFICATE
7
+ // OF INCORPORATION OF FOO INC.`) routinely exceed 60 chars in real corporate
8
+ // documents. The 60-char cap on `extractHeaderInfo` exists to avoid emitting a
9
+ // "leading words = header" guess from long body prose, which doesn't apply to
10
+ // the standalone-title detector.
11
+ const MAX_CENTERED_TITLE_LENGTH = 120;
12
+ // Pattern-based header detection fallback (ported from Python ingestor._extract_header_info).
13
+ const HEADER_PATTERN = /^([A-Z][^.!?:]*(?:\s+[A-Z][^.!?:]*)*)([.:]?)(?:\s|$)/;
14
+ export function extractHeaderInfo(cleanText) {
15
+ if (!cleanText || cleanText.length < 2)
16
+ return { header_text: null, header_style: null };
17
+ if (!/^[A-Z]/.test(cleanText))
18
+ return { header_text: null, header_style: null };
19
+ const stripped = cleanText.trim();
20
+ if (stripped.length <= SHORT_HEADER_MAX_LENGTH) {
21
+ if (stripped.endsWith('.'))
22
+ return { header_text: stripped.slice(0, -1), header_style: 'title_with_period' };
23
+ if (stripped.endsWith(':'))
24
+ return { header_text: stripped.slice(0, -1), header_style: 'title_with_colon' };
25
+ const words = stripped.split(/\s+/);
26
+ if (words.length <= 5)
27
+ return { header_text: stripped, header_style: 'title_bare' };
28
+ return { header_text: null, header_style: null };
29
+ }
30
+ const m = HEADER_PATTERN.exec(stripped);
31
+ if (!m)
32
+ return { header_text: null, header_style: null };
33
+ const headerText = (m[1] ?? '').trim();
34
+ const terminator = m[2] ?? '';
35
+ const remaining = stripped.slice(m[0].length);
36
+ if (!remaining || headerText.length > MAX_HEADER_TEXT_LENGTH)
37
+ return { header_text: null, header_style: null };
38
+ if (terminator === '.')
39
+ return { header_text: headerText, header_style: 'title_with_period' };
40
+ if (terminator === ':')
41
+ return { header_text: headerText, header_style: 'title_with_colon' };
42
+ // Long-paragraph regex matches without an explicit terminator are body prose
43
+ // (e.g. "Termination of Section 2.2(d)(i) shall not affect ..."), not headers.
44
+ // Bare titles only fire from the short-paragraph branch above.
45
+ return { header_text: null, header_style: null };
46
+ }
47
+ export function deriveHeading(paragraphStyleId, cleanText, headerText, headerStyle, isInTableCell) {
48
+ const styleMatch = paragraphStyleId ? /^Heading([1-6])$/.exec(paragraphStyleId) : null;
49
+ if (styleMatch) {
50
+ return {
51
+ text: cleanText,
52
+ source: 'word_style',
53
+ level: Number.parseInt(styleMatch[1], 10),
54
+ };
55
+ }
56
+ // Inside table cells, heuristic detectors (run_in_header, title_with_period,
57
+ // title_with_colon, title_bare) routinely fire on ordinary label/value content
58
+ // — "Name", "Purchase Price:", "Name: Acme" — which are not structural document
59
+ // headings. We keep the per-detector explanation on list_metadata.header_style
60
+ // for debugging, but suppress heuristic promotion into the canonical heading
61
+ // predicate. Word built-in heading styles inside cells remain real headings.
62
+ if (isInTableCell)
63
+ return undefined;
64
+ if (headerText && headerStyle) {
65
+ return {
66
+ text: headerText,
67
+ source: headerStyle,
68
+ level: null,
69
+ };
70
+ }
71
+ return undefined;
72
+ }
73
+ export function detectRunInHeader(params) {
74
+ const { paragraph, paragraphPPr, paragraphStyleId, styles } = params;
75
+ const punct = new Set(['.', ':', '-']);
76
+ // Use visible runs only (field code text stripped in getParagraphRuns()).
77
+ const runs = getParagraphRuns(paragraph);
78
+ if (runs.length === 0)
79
+ return null;
80
+ // Group by run element, preserving order.
81
+ const orderedUniqueRuns = [];
82
+ const seen = new Set();
83
+ for (const tr of runs) {
84
+ if (!seen.has(tr.r)) {
85
+ seen.add(tr.r);
86
+ orderedUniqueRuns.push(tr.r);
87
+ }
88
+ }
89
+ // Walk runs once, splitting into bold/underline header-prefix text and
90
+ // everything-after body text. The header → body transition is what
91
+ // distinguishes a run-in header (bold prefix + body) from a fully-bold
92
+ // signature label or defined-term lead-in.
93
+ let headerText = '';
94
+ let bodyText = '';
95
+ let formatting = null;
96
+ let headerCharCount = 0;
97
+ let inHeader = true;
98
+ for (const r of orderedUniqueRuns) {
99
+ const fmt = extractEffectiveRunFormatting({ run: r, paragraphPPr, paragraphStyleId, styles });
100
+ const isHeaderStyle = fmt.bold || fmt.underline;
101
+ const ts = Array.from(r.getElementsByTagNameNS(OOXML.W_NS, W.t));
102
+ let runText = '';
103
+ for (const t of ts)
104
+ runText += t.textContent ?? '';
105
+ if (inHeader && isHeaderStyle) {
106
+ headerText += runText;
107
+ headerCharCount += runText.length;
108
+ if (!formatting)
109
+ formatting = { bold: fmt.bold, italic: fmt.italic, underline: fmt.underline };
110
+ }
111
+ else {
112
+ inHeader = false;
113
+ bodyText += runText;
114
+ }
115
+ }
116
+ const trimmed = headerText.trim();
117
+ if (!trimmed)
118
+ return null;
119
+ if (!punct.has(trimmed[trimmed.length - 1]))
120
+ return null;
121
+ if (!formatting)
122
+ return null;
123
+ // Require a real header-prefix → body transition: there must be non-whitespace
124
+ // body text after the bold/underline prefix. Trailing-whitespace-only "body"
125
+ // (e.g. a single bold run followed by a non-bold run that holds just `" "`)
126
+ // is not a transition — those are still whole-paragraph bold blocks
127
+ // (signature labels, all-bold short titles, etc.) and must be rejected.
128
+ if (!bodyText.trim())
129
+ return null;
130
+ return { raw_text: trimmed, formatting, headerCharCount };
131
+ }
132
+ /**
133
+ * Detect a centered, ALL-CAPS, bold standalone title (e.g. an NVCA SPA's
134
+ * `SERIES […] PREFERRED STOCK PURCHASE AGREEMENT` title).
135
+ *
136
+ * Strict gates only — fires only when the paragraph cannot be confused with
137
+ * body prose, a placeholder, or a signature line:
138
+ * - paragraph alignment is CENTER
139
+ * - clean text contains no lowercase letters
140
+ * - clean text contains ≥ 3 ASCII letters AND ≥ 2 whitespace-separated
141
+ * word-tokens (so single-token bracketed placeholders like `[COMPANY]`
142
+ * and underscore-only signature lines like `____________` are rejected)
143
+ * - clean text is non-empty and ≤ MAX_CENTERED_TITLE_LENGTH
144
+ * - all visible runs are bold (a single non-bold char disqualifies)
145
+ */
146
+ export function detectTitleCapsCentered(params) {
147
+ const { paragraph, paragraphPPr, paragraphStyleId, alignment, cleanTextNoLabel, styles } = params;
148
+ if (alignment !== 'CENTER')
149
+ return null;
150
+ const trimmed = cleanTextNoLabel.trim();
151
+ if (!trimmed)
152
+ return null;
153
+ if (trimmed.length > MAX_CENTERED_TITLE_LENGTH)
154
+ return null;
155
+ if (/[a-z]/.test(trimmed))
156
+ return null;
157
+ // Content gate: punctuation/underscore-only signature lines and bracketed
158
+ // single-token placeholders (`[COMPANY]`, `[___]`, `<NAME>`) must not
159
+ // classify as titles. Real titles are multi-word ALL-CAPS phrases.
160
+ const letterCount = (trimmed.match(/[A-Z]/g) ?? []).length;
161
+ if (letterCount < 3)
162
+ return null;
163
+ const wordTokens = trimmed.split(/\s+/).filter((w) => /[A-Z]/.test(w));
164
+ if (wordTokens.length < 2)
165
+ return null;
166
+ const runs = getParagraphRuns(paragraph);
167
+ if (runs.length === 0)
168
+ return null;
169
+ const orderedUniqueRuns = [];
170
+ const seen = new Set();
171
+ for (const tr of runs) {
172
+ if (!seen.has(tr.r)) {
173
+ seen.add(tr.r);
174
+ orderedUniqueRuns.push(tr.r);
175
+ }
176
+ }
177
+ let formatting = null;
178
+ let sawAnyText = false;
179
+ for (const r of orderedUniqueRuns) {
180
+ const ts = Array.from(r.getElementsByTagNameNS(OOXML.W_NS, W.t));
181
+ let runHasText = false;
182
+ for (const t of ts) {
183
+ if ((t.textContent ?? '').length > 0) {
184
+ runHasText = true;
185
+ break;
186
+ }
187
+ }
188
+ if (!runHasText)
189
+ continue;
190
+ const fmt = extractEffectiveRunFormatting({ run: r, paragraphPPr, paragraphStyleId, styles });
191
+ if (!fmt.bold)
192
+ return null;
193
+ sawAnyText = true;
194
+ if (!formatting)
195
+ formatting = { bold: fmt.bold, italic: fmt.italic, underline: fmt.underline };
196
+ }
197
+ if (!sawAnyText || !formatting)
198
+ return null;
199
+ return { raw_text: trimmed, formatting };
200
+ }
201
+ const SIGNATURE_LABEL_LINE_RE = /^[A-Z][a-zA-Z ]{0,28}:\s*$/;
202
+ const SIGNATURE_LABEL_PREFIX_RE = /^[A-Z]+(?::\s|$)/;
203
+ function isSignatureClusterLabel(text) {
204
+ const trimmed = text.trim();
205
+ if (!trimmed)
206
+ return false;
207
+ return SIGNATURE_LABEL_LINE_RE.test(trimmed) || SIGNATURE_LABEL_PREFIX_RE.test(trimmed);
208
+ }
209
+ export function suppressSignatureClusters(nodes) {
210
+ if (nodes.length < 4)
211
+ return;
212
+ const prefixMatches = new Array(nodes.length + 1).fill(0);
213
+ for (let idx = 0; idx < nodes.length; idx++) {
214
+ prefixMatches[idx + 1] = prefixMatches[idx] + (isSignatureClusterLabel(nodes[idx].clean_text) ? 1 : 0);
215
+ }
216
+ const coverage = new Array(nodes.length + 1).fill(0);
217
+ for (let start = 0; start <= nodes.length - 4; start++) {
218
+ for (let end = start + 3; end < nodes.length; end++) {
219
+ const runLength = end - start + 1;
220
+ const matchCount = prefixMatches[end + 1] - prefixMatches[start];
221
+ if ((matchCount * 4) < (runLength * 3))
222
+ continue;
223
+ coverage[start] += 1;
224
+ coverage[end + 1] -= 1;
225
+ }
226
+ }
227
+ let activeClusters = 0;
228
+ for (let idx = 0; idx < nodes.length; idx++) {
229
+ activeClusters += coverage[idx];
230
+ if (activeClusters <= 0)
231
+ continue;
232
+ const node = nodes[idx];
233
+ // The density gate authorizes us to clear *labels* inside the window;
234
+ // non-label neighbors (real headings, body text) keep their detected
235
+ // heading metadata regardless of paragraph style. This avoids erasing
236
+ // an adjacent section heading or body line that happens to fall inside
237
+ // a window meeting the density threshold.
238
+ if (!isSignatureClusterLabel(node.clean_text))
239
+ continue;
240
+ node.header = '';
241
+ node.header_formatting = null;
242
+ node.list_metadata.header_text = null;
243
+ node.list_metadata.header_style = null;
244
+ node.list_metadata.header_formatting = null;
245
+ }
246
+ }
247
+ //# sourceMappingURL=document_view-headings.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document_view-headings.js","sourceRoot":"","sources":["../../src/primitives/document_view-headings.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,6BAA6B,EAA6C,MAAM,aAAa,CAAC;AAKvG,MAAM,uBAAuB,GAAG,EAAE,CAAC;AACnC,MAAM,sBAAsB,GAAG,EAAE,CAAC;AAClC,8EAA8E;AAC9E,6EAA6E;AAC7E,+EAA+E;AAC/E,8EAA8E;AAC9E,iCAAiC;AACjC,MAAM,yBAAyB,GAAG,GAAG,CAAC;AAEtC,8FAA8F;AAC9F,MAAM,cAAc,GAAG,sDAAsD,CAAC;AAE9E,MAAM,UAAU,iBAAiB,CAAC,SAAiB;IACjD,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IACzF,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC;QAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IAEhF,MAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IAClC,IAAI,QAAQ,CAAC,MAAM,IAAI,uBAAuB,EAAE,CAAC;QAC/C,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,EAAE,WAAW,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,mBAAmB,EAAE,CAAC;QAC7G,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,EAAE,WAAW,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,kBAAkB,EAAE,CAAC;QAE5G,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACpC,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC;YAAE,OAAO,EAAE,WAAW,EAAE,QAAQ,EAAE,YAAY,EAAE,YAAY,EAAE,CAAC;QACpF,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IACnD,CAAC;IAED,MAAM,CAAC,GAAG,cAAc,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACxC,IAAI,CAAC,CAAC;QAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IACzD,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IACvC,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAC9B,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAC9C,IAAI,CAAC,SAAS,IAAI,UAAU,CAAC,MAAM,GAAG,sBAAsB;QAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IAE/G,IAAI,UAAU,KAAK,GAAG;QAAE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,mBAAmB,EAAE,CAAC;IAC9F,IAAI,UAAU,KAAK,GAAG;QAAE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,kBAAkB,EAAE,CAAC;IAC7F,6EAA6E;IAC7E,+EAA+E;IAC/E,+DAA+D;IAC/D,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;AACnD,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,gBAA+B,EAC/B,SAAiB,EACjB,UAAyB,EACzB,WAA0C,EAC1C,aAAsB;IAEtB,MAAM,UAAU,GAAG,gBAAgB,CAAC,CAAC,CAAC,kBAAkB,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACvF,IAAI,UAAU,EAAE,CAAC;QACf,OAAO;YACL,IAAI,EAAE,SAAS;YACf,MAAM,EAAE,YAAY;YACpB,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAE,EAAE,EAAE,CAAC;SAC3C,CAAC;IACJ,CAAC;IAED,6EAA6E;IAC7E,+EAA+E;IAC/E,gFAAgF;IAChF,+EAA+E;IAC/E,6EAA6E;IAC7E,6EAA6E;IAC7E,IAAI,aAAa;QAAE,OAAO,SAAS,CAAC;IAEpC,IAAI,UAAU,IAAI,WAAW,EAAE,CAAC;QAC9B,OAAO;YACL,IAAI,EAAE,UAAU;YAChB,MAAM,EAAE,WAAW;YACnB,KAAK,EAAE,IAAI;SACZ,CAAC;IACJ,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,MAKjC;IACC,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC;IACrE,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;IAEvC,0EAA0E;IAC1E,MAAM,IAAI,GAAG,gBAAgB,CAAC,SAAS,CAAC,CAAC;IACzC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEnC,0CAA0C;IAC1C,MAAM,iBAAiB,GAAc,EAAE,CAAC;IACxC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAW,CAAC;IAChC,KAAK,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC;QACtB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACpB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACf,iBAAiB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,uEAAuE;IACvE,mEAAmE;IACnE,uEAAuE;IACvE,2CAA2C;IAC3C,IAAI,UAAU,GAAG,EAAE,CAAC;IACpB,IAAI,QAAQ,GAAG,EAAE,CAAC;IAClB,IAAI,UAAU,GAA4B,IAAI,CAAC;IAC/C,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,QAAQ,GAAG,IAAI,CAAC;IAEpB,KAAK,MAAM,CAAC,IAAI,iBAAiB,EAAE,CAAC;QAClC,MAAM,GAAG,GAAG,6BAA6B,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC,CAAC;QAC9F,MAAM,aAAa,GAAG,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,SAAS,CAAC;QAChD,MAAM,EAAE,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,sBAAsB,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,KAAK,MAAM,CAAC,IAAI,EAAE;YAAE,OAAO,IAAI,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC;QAEnD,IAAI,QAAQ,IAAI,aAAa,EAAE,CAAC;YAC9B,UAAU,IAAI,OAAO,CAAC;YACtB,eAAe,IAAI,OAAO,CAAC,MAAM,CAAC;YAClC,IAAI,CAAC,UAAU;gBAAE,UAAU,GAAG,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,SAAS,EAAE,GAAG,CAAC,SAAS,EAAE,CAAC;QACjG,CAAC;aAAM,CAAC;YACN,QAAQ,GAAG,KAAK,CAAC;YACjB,QAAQ,IAAI,OAAO,CAAC;QACtB,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,EAAE,CAAC;IAClC,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC;QAAE,OAAO,IAAI,CAAC;IAC1D,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAC7B,+EAA+E;IAC/E,6EAA6E;IAC7E,4EAA4E;IAC5E,oEAAoE;IACpE,wEAAwE;IACxE,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;QAAE,OAAO,IAAI,CAAC;IAElC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,CAAC;AAC5D,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,uBAAuB,CAAC,MAOvC;IACC,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,gBAAgB,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC;IAClG,IAAI,SAAS,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IACxC,MAAM,OAAO,GAAG,gBAAgB,CAAC,IAAI,EAAE,CAAC;IACxC,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,IAAI,OAAO,CAAC,MAAM,GAAG,yBAAyB;QAAE,OAAO,IAAI,CAAC;IAC5D,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IACvC,0EAA0E;IAC1E,sEAAsE;IACtE,mEAAmE;IACnE,MAAM,WAAW,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IAC3D,IAAI,WAAW,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IACjC,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IACvE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAEvC,MAAM,IAAI,GAAG,gBAAgB,CAAC,SAAS,CAAC,CAAC;IACzC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACnC,MAAM,iBAAiB,GAAc,EAAE,CAAC;IACxC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAW,CAAC;IAChC,KAAK,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC;QACtB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACpB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACf,iBAAiB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,IAAI,UAAU,GAA4B,IAAI,CAAC;IAC/C,IAAI,UAAU,GAAG,KAAK,CAAC;IACvB,KAAK,MAAM,CAAC,IAAI,iBAAiB,EAAE,CAAC;QAClC,MAAM,EAAE,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,sBAAsB,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,IAAI,UAAU,GAAG,KAAK,CAAC;QACvB,KAAK,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC;YACnB,IAAI,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrC,UAAU,GAAG,IAAI,CAAC;gBAClB,MAAM;YACR,CAAC;QACH,CAAC;QACD,IAAI,CAAC,UAAU;YAAE,SAAS;QAC1B,MAAM,GAAG,GAAG,6BAA6B,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC,CAAC;QAC9F,IAAI,CAAC,GAAG,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAC3B,UAAU,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,UAAU;YAAE,UAAU,GAAG,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,SAAS,EAAE,GAAG,CAAC,SAAS,EAAE,CAAC;IACjG,CAAC;IACD,IAAI,CAAC,UAAU,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAE5C,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;AAC3C,CAAC;AAED,MAAM,uBAAuB,GAAG,4BAA4B,CAAC;AAC7D,MAAM,yBAAyB,GAAG,kBAAkB,CAAC;AAErD,SAAS,uBAAuB,CAAC,IAAY;IAC3C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,KAAK,CAAC;IAC3B,OAAO,uBAAuB,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,yBAAyB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;AAC1F,CAAC;AAED,MAAM,UAAU,yBAAyB,CAAC,KAAyB;IACjE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO;IAE7B,MAAM,aAAa,GAAG,IAAI,KAAK,CAAS,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClE,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;QAC5C,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,aAAa,CAAC,GAAG,CAAE,GAAG,CAAC,uBAAuB,CAAC,KAAK,CAAC,GAAG,CAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3G,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAS,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7D,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC;QACvD,KAAK,IAAI,GAAG,GAAG,KAAK,GAAG,CAAC,EAAE,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;YACpD,MAAM,SAAS,GAAG,GAAG,GAAG,KAAK,GAAG,CAAC,CAAC;YAClC,MAAM,UAAU,GAAG,aAAa,CAAC,GAAG,GAAG,CAAC,CAAE,GAAG,aAAa,CAAC,KAAK,CAAE,CAAC;YACnE,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,CAAC,SAAS,GAAG,CAAC,CAAC;gBAAE,SAAS;YACjD,QAAQ,CAAC,KAAK,CAAE,IAAI,CAAC,CAAC;YACtB,QAAQ,CAAC,GAAG,GAAG,CAAC,CAAE,IAAI,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,IAAI,cAAc,GAAG,CAAC,CAAC;IACvB,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;QAC5C,cAAc,IAAI,QAAQ,CAAC,GAAG,CAAE,CAAC;QACjC,IAAI,cAAc,IAAI,CAAC;YAAE,SAAS;QAElC,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAE,CAAC;QACzB,sEAAsE;QACtE,qEAAqE;QACrE,sEAAsE;QACtE,uEAAuE;QACvE,0CAA0C;QAC1C,IAAI,CAAC,uBAAuB,CAAC,IAAI,CAAC,UAAU,CAAC;YAAE,SAAS;QACxD,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;QACjB,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC;QAC9B,IAAI,CAAC,aAAa,CAAC,WAAW,GAAG,IAAI,CAAC;QACtC,IAAI,CAAC,aAAa,CAAC,YAAY,GAAG,IAAI,CAAC;QACvC,IAAI,CAAC,aAAa,CAAC,iBAAiB,GAAG,IAAI,CAAC;IAC9C,CAAC;AACH,CAAC"}
@@ -0,0 +1,11 @@
1
+ import type { DocumentStyles, DocumentViewNode, FormattingFingerprint } from './document_view-types.js';
2
+ export type { DocumentStyleInfo, DocumentStyles, FormattingFingerprint } from './document_view-types.js';
3
+ export declare function fingerprintKey(fp: FormattingFingerprint): string;
4
+ /**
5
+ * v0.3: Compact style fingerprint token.
6
+ * Concatenates style name, list level, alignment, and indentation for token-efficient LLM context.
7
+ * Example: "Normal:L-1:LEFT:I0:H0"
8
+ */
9
+ export declare function computeFingerprintToken(fp: FormattingFingerprint, styleId?: string): string;
10
+ export declare function discoverStyles(nodes: DocumentViewNode[]): DocumentStyles;
11
+ //# sourceMappingURL=document_view-styles.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document_view-styles.d.ts","sourceRoot":"","sources":["../../src/primitives/document_view-styles.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAqB,cAAc,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,MAAM,0BAA0B,CAAC;AAE3H,YAAY,EAAE,iBAAiB,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,0BAA0B,CAAC;AAIzG,wBAAgB,cAAc,CAAC,EAAE,EAAE,qBAAqB,GAAG,MAAM,CAGhE;AAED;;;;GAIG;AACH,wBAAgB,uBAAuB,CAAC,EAAE,EAAE,qBAAqB,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,CAO3F;AA6CD,wBAAgB,cAAc,CAAC,KAAK,EAAE,gBAAgB,EAAE,GAAG,cAAc,CAsCxE"}
@@ -0,0 +1,104 @@
1
+ import { LabelType } from './list_labels.js';
2
+ const STYLE_EXAMPLE_TEXT_PREVIEW_LENGTH = 50;
3
+ export function fingerprintKey(fp) {
4
+ // Stable JSON-ish key used for Map lookups.
5
+ return `${fp.list_level}|${fp.left_indent_pt.toFixed(1)}|${fp.first_line_indent_pt.toFixed(1)}|${fp.style_name}|${fp.alignment}`;
6
+ }
7
+ /**
8
+ * v0.3: Compact style fingerprint token.
9
+ * Concatenates style name, list level, alignment, and indentation for token-efficient LLM context.
10
+ * Example: "Normal:L-1:LEFT:I0:H0"
11
+ */
12
+ export function computeFingerprintToken(fp, styleId) {
13
+ const name = styleId || fp.style_name || 'body';
14
+ const level = `L${fp.list_level}`;
15
+ const align = fp.alignment;
16
+ const indent = `I${Math.round(fp.left_indent_pt)}`;
17
+ const hanging = `H${Math.round(fp.first_line_indent_pt)}`;
18
+ return `${name}:${level}:${align}:${indent}:${hanging}`;
19
+ }
20
+ function inferSemanticName(params) {
21
+ const { fp, nodes } = params;
22
+ // Find first label_type if present.
23
+ let labelType = null;
24
+ for (const n of nodes) {
25
+ if (n.list_metadata.label_type) {
26
+ labelType = n.list_metadata.label_type;
27
+ break;
28
+ }
29
+ }
30
+ const listLevel = fp.list_level;
31
+ if (listLevel >= 0) {
32
+ if (listLevel === 0) {
33
+ if (labelType === LabelType.ARTICLE)
34
+ return { base_id: 'article', display_name: 'Article Heading' };
35
+ if (labelType === LabelType.SECTION)
36
+ return { base_id: 'section', display_name: 'Section Heading' };
37
+ if (labelType === LabelType.ROMAN)
38
+ return { base_id: 'roman_section', display_name: 'Roman Numeral Section' };
39
+ return { base_id: 'top_level', display_name: 'Top-Level List Item' };
40
+ }
41
+ if (listLevel === 1) {
42
+ if (labelType === LabelType.LETTER)
43
+ return { base_id: 'subsection', display_name: 'Subsection (a)/(A)' };
44
+ if (labelType === LabelType.NUMBER)
45
+ return { base_id: 'subsection_number', display_name: 'Numbered Subsection' };
46
+ if (labelType === LabelType.ROMAN)
47
+ return { base_id: 'subsection_roman', display_name: 'Roman Subsection' };
48
+ return { base_id: 'level_1', display_name: `Level ${listLevel} List Item` };
49
+ }
50
+ if (labelType === LabelType.ROMAN)
51
+ return { base_id: `level_${listLevel}_roman`, display_name: `Level ${listLevel} Roman` };
52
+ if (labelType === LabelType.LETTER)
53
+ return { base_id: `level_${listLevel}_letter`, display_name: `Level ${listLevel} Letter` };
54
+ return { base_id: `level_${listLevel}`, display_name: `Level ${listLevel} List Item` };
55
+ }
56
+ // Non-list.
57
+ const styleName = fp.style_name.toLowerCase().replace(/\s+/g, '_');
58
+ if (fp.left_indent_pt > 0)
59
+ return { base_id: 'indent_block', display_name: 'Indented Block' };
60
+ if (styleName.includes('heading') || styleName.includes('title'))
61
+ return { base_id: 'heading', display_name: 'Heading' };
62
+ if (styleName.includes('quote') || styleName.includes('block'))
63
+ return { base_id: 'block_quote', display_name: 'Block Quote' };
64
+ return { base_id: 'body', display_name: 'Body Text' };
65
+ }
66
+ export function discoverStyles(nodes) {
67
+ const groups = new Map();
68
+ for (const n of nodes) {
69
+ const key = fingerprintKey(n.style_fingerprint);
70
+ const g = groups.get(key);
71
+ if (g)
72
+ g.nodes.push(n);
73
+ else
74
+ groups.set(key, { fp: n.style_fingerprint, nodes: [n] });
75
+ }
76
+ const used = {};
77
+ const styles = new Map();
78
+ const fpToStyle = new Map();
79
+ for (const [fpKey, g] of groups.entries()) {
80
+ const { base_id, display_name } = inferSemanticName({ fp: g.fp, nodes: g.nodes });
81
+ let styleId = base_id;
82
+ if (used[base_id] !== undefined) {
83
+ used[base_id] += 1;
84
+ styleId = `${base_id}_${used[base_id]}`;
85
+ }
86
+ else {
87
+ used[base_id] = 0;
88
+ }
89
+ const median = g.nodes[Math.floor(g.nodes.length / 2)];
90
+ const info = {
91
+ style_id: styleId,
92
+ display_name,
93
+ fingerprint: g.fp,
94
+ example_node_id: median.id,
95
+ example_text: median.clean_text.slice(0, STYLE_EXAMPLE_TEXT_PREVIEW_LENGTH),
96
+ count: g.nodes.length,
97
+ dominant_alignment: g.fp.alignment,
98
+ };
99
+ styles.set(styleId, info);
100
+ fpToStyle.set(fpKey, styleId);
101
+ }
102
+ return { styles, fingerprint_to_style: fpToStyle };
103
+ }
104
+ //# sourceMappingURL=document_view-styles.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"document_view-styles.js","sourceRoot":"","sources":["../../src/primitives/document_view-styles.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAK7C,MAAM,iCAAiC,GAAG,EAAE,CAAC;AAE7C,MAAM,UAAU,cAAc,CAAC,EAAyB;IACtD,4CAA4C;IAC5C,OAAO,GAAG,EAAE,CAAC,UAAU,IAAI,EAAE,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,UAAU,IAAI,EAAE,CAAC,SAAS,EAAE,CAAC;AACnI,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,uBAAuB,CAAC,EAAyB,EAAE,OAAgB;IACjF,MAAM,IAAI,GAAG,OAAO,IAAI,EAAE,CAAC,UAAU,IAAI,MAAM,CAAC;IAChD,MAAM,KAAK,GAAG,IAAI,EAAE,CAAC,UAAU,EAAE,CAAC;IAClC,MAAM,KAAK,GAAG,EAAE,CAAC,SAAS,CAAC;IAC3B,MAAM,MAAM,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,cAAc,CAAC,EAAE,CAAC;IACnD,MAAM,OAAO,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,oBAAoB,CAAC,EAAE,CAAC;IAC1D,OAAO,GAAG,IAAI,IAAI,KAAK,IAAI,KAAK,IAAI,MAAM,IAAI,OAAO,EAAE,CAAC;AAC1D,CAAC;AAED,SAAS,iBAAiB,CAAC,MAG1B;IACC,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IAE7B,oCAAoC;IACpC,IAAI,SAAS,GAAqB,IAAI,CAAC;IACvC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,CAAC,CAAC,aAAa,CAAC,UAAU,EAAE,CAAC;YAC/B,SAAS,GAAG,CAAC,CAAC,aAAa,CAAC,UAAU,CAAC;YACvC,MAAM;QACR,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,EAAE,CAAC,UAAU,CAAC;IAEhC,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnB,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;YACpB,IAAI,SAAS,KAAK,SAAS,CAAC,OAAO;gBAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,iBAAiB,EAAE,CAAC;YACpG,IAAI,SAAS,KAAK,SAAS,CAAC,OAAO;gBAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,iBAAiB,EAAE,CAAC;YACpG,IAAI,SAAS,KAAK,SAAS,CAAC,KAAK;gBAAE,OAAO,EAAE,OAAO,EAAE,eAAe,EAAE,YAAY,EAAE,uBAAuB,EAAE,CAAC;YAC9G,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,qBAAqB,EAAE,CAAC;QACvE,CAAC;QACD,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;YACpB,IAAI,SAAS,KAAK,SAAS,CAAC,MAAM;gBAAE,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,oBAAoB,EAAE,CAAC;YACzG,IAAI,SAAS,KAAK,SAAS,CAAC,MAAM;gBAAE,OAAO,EAAE,OAAO,EAAE,mBAAmB,EAAE,YAAY,EAAE,qBAAqB,EAAE,CAAC;YACjH,IAAI,SAAS,KAAK,SAAS,CAAC,KAAK;gBAAE,OAAO,EAAE,OAAO,EAAE,kBAAkB,EAAE,YAAY,EAAE,kBAAkB,EAAE,CAAC;YAC5G,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,SAAS,SAAS,YAAY,EAAE,CAAC;QAC9E,CAAC;QACD,IAAI,SAAS,KAAK,SAAS,CAAC,KAAK;YAAE,OAAO,EAAE,OAAO,EAAE,SAAS,SAAS,QAAQ,EAAE,YAAY,EAAE,SAAS,SAAS,QAAQ,EAAE,CAAC;QAC5H,IAAI,SAAS,KAAK,SAAS,CAAC,MAAM;YAAE,OAAO,EAAE,OAAO,EAAE,SAAS,SAAS,SAAS,EAAE,YAAY,EAAE,SAAS,SAAS,SAAS,EAAE,CAAC;QAC/H,OAAO,EAAE,OAAO,EAAE,SAAS,SAAS,EAAE,EAAE,YAAY,EAAE,SAAS,SAAS,YAAY,EAAE,CAAC;IACzF,CAAC;IAED,YAAY;IACZ,MAAM,SAAS,GAAG,EAAE,CAAC,UAAU,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IACnE,IAAI,EAAE,CAAC,cAAc,GAAG,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,gBAAgB,EAAE,CAAC;IAC9F,IAAI,SAAS,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,SAAS,EAAE,CAAC;IACzH,IAAI,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,aAAa,EAAE,CAAC;IAC/H,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,CAAC;AACxD,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,KAAyB;IACtD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAoE,CAAC;IAC3F,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,cAAc,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC;QAChD,MAAM,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAC1B,IAAI,CAAC;YAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;;YAClB,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,iBAAiB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,MAAM,IAAI,GAA2B,EAAE,CAAC;IACxC,MAAM,MAAM,GAAG,IAAI,GAAG,EAA6B,CAAC;IACpD,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE5C,KAAK,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,EAAE,CAAC;QAC1C,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,GAAG,iBAAiB,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QAClF,IAAI,OAAO,GAAG,OAAO,CAAC;QACtB,IAAI,IAAI,CAAC,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;YAChC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YACnB,OAAO,GAAG,GAAG,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1C,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpB,CAAC;QAED,MAAM,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAE,CAAC;QACxD,MAAM,IAAI,GAAsB;YAC9B,QAAQ,EAAE,OAAO;YACjB,YAAY;YACZ,WAAW,EAAE,CAAC,CAAC,EAAE;YACjB,eAAe,EAAE,MAAM,CAAC,EAAE;YAC1B,YAAY,EAAE,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,iCAAiC,CAAC;YAC3E,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,MAAM;YACrB,kBAAkB,EAAE,CAAC,CAAC,EAAE,CAAC,SAAS;SACnC,CAAC;QACF,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QAC1B,SAAS,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAChC,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,oBAAoB,EAAE,SAAS,EAAE,CAAC;AACrD,CAAC"}
@@ -0,0 +1,37 @@
1
+ import type { DocumentViewNode, ToonCommentMarkerMap } from './document_view-types.js';
2
+ /**
3
+ * Format a single toon data line for one DocumentViewNode.
4
+ * Handles table-context-aware style (th/td) and header stripping.
5
+ */
6
+ export declare function formatToonDataLine(n: DocumentViewNode, options?: {
7
+ compact?: boolean;
8
+ commentMarkers?: ToonCommentMarkerMap;
9
+ }): string;
10
+ /**
11
+ * Collect table marker info (dimensions) from nodes for #TABLE markers.
12
+ * Column headers are NOT included in the marker — they appear once in the th() rows.
13
+ */
14
+ export declare function collectTableMarkerInfo(nodes: readonly Pick<DocumentViewNode, 'table_context'>[]): Map<number, {
15
+ id: string;
16
+ totalRows: number;
17
+ totalCols: number;
18
+ }>;
19
+ /**
20
+ * Format a #TABLE marker line from collected table info.
21
+ * Headers are omitted — they appear exactly once in the th(0,N) data rows.
22
+ */
23
+ export declare function formatTableMarker(info: {
24
+ id: string;
25
+ totalRows: number;
26
+ totalCols: number;
27
+ }): string;
28
+ export declare function formatToonCommentLines(node: Pick<DocumentViewNode, 'id' | 'comments'>): string[];
29
+ export declare function formatToonCommentEndnoteLines(node: Pick<DocumentViewNode, 'id' | 'comments'>): string[];
30
+ export declare function formatToonCommentsEndnotesBlock(nodes: readonly Pick<DocumentViewNode, 'id' | 'comments'>[]): string[];
31
+ export declare function renderToon(nodes: DocumentViewNode[], options?: {
32
+ compact?: boolean;
33
+ }): string;
34
+ export declare function renderToonWithCommentEndnotes(nodes: DocumentViewNode[], options?: {
35
+ compact?: boolean;
36
+ }): string;
37
+ //# sourceMappingURL=document_view-toon.d.ts.map