@usejunior/docx-core 0.9.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -1
- package/dist/atomizer.d.ts +15 -1
- package/dist/atomizer.d.ts.map +1 -1
- package/dist/atomizer.js +37 -1
- package/dist/atomizer.js.map +1 -1
- package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -1
- package/dist/baselines/atomizer/documentReconstructor.js +218 -90
- package/dist/baselines/atomizer/documentReconstructor.js.map +1 -1
- package/dist/baselines/atomizer/formattingFidelity.d.ts +99 -0
- package/dist/baselines/atomizer/formattingFidelity.d.ts.map +1 -0
- package/dist/baselines/atomizer/formattingFidelity.js +449 -0
- package/dist/baselines/atomizer/formattingFidelity.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-bookmarks.d.ts +37 -0
- package/dist/baselines/atomizer/inPlaceModifier-bookmarks.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-bookmarks.js +189 -0
- package/dist/baselines/atomizer/inPlaceModifier-bookmarks.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-containers.d.ts +74 -0
- package/dist/baselines/atomizer/inPlaceModifier-containers.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-containers.js +171 -0
- package/dist/baselines/atomizer/inPlaceModifier-containers.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-deletion.d.ts +88 -0
- package/dist/baselines/atomizer/inPlaceModifier-deletion.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-deletion.js +326 -0
- package/dist/baselines/atomizer/inPlaceModifier-deletion.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-postprocess.d.ts +85 -0
- package/dist/baselines/atomizer/inPlaceModifier-postprocess.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-postprocess.js +402 -0
- package/dist/baselines/atomizer/inPlaceModifier-postprocess.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-presplit.d.ts +39 -0
- package/dist/baselines/atomizer/inPlaceModifier-presplit.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-presplit.js +265 -0
- package/dist/baselines/atomizer/inPlaceModifier-presplit.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-shared.d.ts +62 -0
- package/dist/baselines/atomizer/inPlaceModifier-shared.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-shared.js +139 -0
- package/dist/baselines/atomizer/inPlaceModifier-shared.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-wrappers.d.ts +189 -0
- package/dist/baselines/atomizer/inPlaceModifier-wrappers.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-wrappers.js +427 -0
- package/dist/baselines/atomizer/inPlaceModifier-wrappers.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier.d.ts +6 -290
- package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -1
- package/dist/baselines/atomizer/inPlaceModifier.js +23 -1828
- package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -1
- package/dist/baselines/atomizer/pipeline.d.ts +76 -1
- package/dist/baselines/atomizer/pipeline.d.ts.map +1 -1
- package/dist/baselines/atomizer/pipeline.js +204 -27
- package/dist/baselines/atomizer/pipeline.js.map +1 -1
- package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts.map +1 -1
- package/dist/baselines/atomizer/trackChangesAcceptorAst.js +56 -160
- package/dist/baselines/atomizer/trackChangesAcceptorAst.js.map +1 -1
- package/dist/compare-types.d.ts +151 -0
- package/dist/compare-types.d.ts.map +1 -0
- package/dist/compare-types.js +2 -0
- package/dist/compare-types.js.map +1 -0
- package/dist/core-types.d.ts +5 -1
- package/dist/core-types.d.ts.map +1 -1
- package/dist/core-types.js +5 -1
- package/dist/core-types.js.map +1 -1
- package/dist/footnotes.d.ts +8 -3
- package/dist/footnotes.d.ts.map +1 -1
- package/dist/footnotes.js +8 -3
- package/dist/footnotes.js.map +1 -1
- package/dist/index.d.ts +6 -150
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -1
- package/dist/integration/libreoffice-oracle.d.ts +41 -0
- package/dist/integration/libreoffice-oracle.d.ts.map +1 -0
- package/dist/integration/libreoffice-oracle.js +282 -0
- package/dist/integration/libreoffice-oracle.js.map +1 -0
- package/dist/primitives/accept_changes.d.ts +2 -2
- package/dist/primitives/accept_changes.d.ts.map +1 -1
- package/dist/primitives/accept_changes.js +24 -79
- package/dist/primitives/accept_changes.js.map +1 -1
- package/dist/primitives/comments.d.ts +12 -3
- package/dist/primitives/comments.d.ts.map +1 -1
- package/dist/primitives/comments.js +374 -97
- package/dist/primitives/comments.js.map +1 -1
- package/dist/primitives/content_fingerprint.d.ts +29 -0
- package/dist/primitives/content_fingerprint.d.ts.map +1 -0
- package/dist/primitives/content_fingerprint.js +63 -0
- package/dist/primitives/content_fingerprint.js.map +1 -0
- package/dist/primitives/document.d.ts +56 -15
- package/dist/primitives/document.d.ts.map +1 -1
- package/dist/primitives/document.js +303 -32
- package/dist/primitives/document.js.map +1 -1
- package/dist/primitives/document_view-comments.d.ts +18 -0
- package/dist/primitives/document_view-comments.d.ts.map +1 -0
- package/dist/primitives/document_view-comments.js +159 -0
- package/dist/primitives/document_view-comments.js.map +1 -0
- package/dist/primitives/document_view-headings.d.ts +45 -0
- package/dist/primitives/document_view-headings.d.ts.map +1 -0
- package/dist/primitives/document_view-headings.js +247 -0
- package/dist/primitives/document_view-headings.js.map +1 -0
- package/dist/primitives/document_view-styles.d.ts +11 -0
- package/dist/primitives/document_view-styles.d.ts.map +1 -0
- package/dist/primitives/document_view-styles.js +104 -0
- package/dist/primitives/document_view-styles.js.map +1 -0
- package/dist/primitives/document_view-toon.d.ts +37 -0
- package/dist/primitives/document_view-toon.d.ts.map +1 -0
- package/dist/primitives/document_view-toon.js +199 -0
- package/dist/primitives/document_view-toon.js.map +1 -0
- package/dist/primitives/document_view-types.d.ts +137 -0
- package/dist/primitives/document_view-types.d.ts.map +1 -0
- package/dist/primitives/document_view-types.js +2 -0
- package/dist/primitives/document_view-types.js.map +1 -0
- package/dist/primitives/document_view.d.ts +8 -106
- package/dist/primitives/document_view.d.ts.map +1 -1
- package/dist/primitives/document_view.js +134 -301
- package/dist/primitives/document_view.js.map +1 -1
- package/dist/primitives/dom-helpers.d.ts +9 -0
- package/dist/primitives/dom-helpers.d.ts.map +1 -1
- package/dist/primitives/dom-helpers.js +10 -1
- package/dist/primitives/dom-helpers.js.map +1 -1
- package/dist/primitives/footnotes.d.ts +4 -3
- package/dist/primitives/footnotes.d.ts.map +1 -1
- package/dist/primitives/footnotes.js +232 -44
- package/dist/primitives/footnotes.js.map +1 -1
- package/dist/primitives/formatting_tags.d.ts +6 -0
- package/dist/primitives/formatting_tags.d.ts.map +1 -1
- package/dist/primitives/formatting_tags.js +6 -1
- package/dist/primitives/formatting_tags.js.map +1 -1
- package/dist/primitives/index.d.ts +6 -0
- package/dist/primitives/index.d.ts.map +1 -1
- package/dist/primitives/index.js +5 -0
- package/dist/primitives/index.js.map +1 -1
- package/dist/primitives/layout.d.ts +4 -3
- package/dist/primitives/layout.d.ts.map +1 -1
- package/dist/primitives/layout.js +32 -3
- package/dist/primitives/layout.js.map +1 -1
- package/dist/primitives/merge_runs.d.ts +21 -3
- package/dist/primitives/merge_runs.d.ts.map +1 -1
- package/dist/primitives/merge_runs.js +32 -10
- package/dist/primitives/merge_runs.js.map +1 -1
- package/dist/primitives/namespaces.d.ts +6 -0
- package/dist/primitives/namespaces.d.ts.map +1 -1
- package/dist/primitives/namespaces.js +9 -0
- package/dist/primitives/namespaces.js.map +1 -1
- package/dist/primitives/reject_changes.d.ts +2 -2
- package/dist/primitives/reject_changes.d.ts.map +1 -1
- package/dist/primitives/reject_changes.js +24 -81
- package/dist/primitives/reject_changes.js.map +1 -1
- package/dist/primitives/semantic_tags.d.ts +7 -0
- package/dist/primitives/semantic_tags.d.ts.map +1 -1
- package/dist/primitives/semantic_tags.js +21 -3
- package/dist/primitives/semantic_tags.js.map +1 -1
- package/dist/primitives/serialize_html.d.ts +36 -0
- package/dist/primitives/serialize_html.d.ts.map +1 -0
- package/dist/primitives/serialize_html.js +393 -0
- package/dist/primitives/serialize_html.js.map +1 -0
- package/dist/primitives/serialize_markdown.d.ts +16 -0
- package/dist/primitives/serialize_markdown.d.ts.map +1 -0
- package/dist/primitives/serialize_markdown.js +300 -0
- package/dist/primitives/serialize_markdown.js.map +1 -0
- package/dist/primitives/serialize_plaintext.d.ts +15 -0
- package/dist/primitives/serialize_plaintext.d.ts.map +1 -0
- package/dist/primitives/serialize_plaintext.js +154 -0
- package/dist/primitives/serialize_plaintext.js.map +1 -0
- package/dist/primitives/styles.js +22 -22
- package/dist/primitives/styles.js.map +1 -1
- package/dist/primitives/tables.d.ts.map +1 -1
- package/dist/primitives/tables.js +13 -3
- package/dist/primitives/tables.js.map +1 -1
- package/dist/primitives/text.d.ts +2 -1
- package/dist/primitives/text.d.ts.map +1 -1
- package/dist/primitives/text.js +116 -12
- package/dist/primitives/text.js.map +1 -1
- package/dist/primitives/track-changes-emitter.d.ts +139 -0
- package/dist/primitives/track-changes-emitter.d.ts.map +1 -0
- package/dist/primitives/track-changes-emitter.js +241 -0
- package/dist/primitives/track-changes-emitter.js.map +1 -0
- package/dist/primitives/xml-helpers.d.ts +29 -0
- package/dist/primitives/xml-helpers.d.ts.map +1 -0
- package/dist/primitives/xml-helpers.js +35 -0
- package/dist/primitives/xml-helpers.js.map +1 -0
- package/dist/shared/ooxml/namespaces.d.ts +4 -1
- package/dist/shared/ooxml/namespaces.d.ts.map +1 -1
- package/dist/shared/ooxml/namespaces.js +4 -1
- package/dist/shared/ooxml/namespaces.js.map +1 -1
- package/package.json +7 -6
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
export const INLINE_COMMENT_MARKER_RUNTIME = Symbol('inline_comment_marker_runtime');
|
|
2
|
+
// Matches the exact set of TOON inline formatting tags that emitFormattingTags() can emit:
|
|
3
|
+
// <b>, </b>, <i>, </i>, <u>, </u>, <highlight>, </highlight>,
|
|
4
|
+
// <a href="...">, </a>, <font ATTR=...>, </font>
|
|
5
|
+
// Anything else in the form `<...>` is literal document text (e.g., `<Borrower>` placeholders
|
|
6
|
+
// in legal templates, or stylesheet samples like `<font>`) and must be counted as visible
|
|
7
|
+
// characters, not skipped as markup.
|
|
8
|
+
//
|
|
9
|
+
// Note the opening `a`/`font` alternative requires `\s[^>]*` (mandatory attributes), because
|
|
10
|
+
// the formatter only emits `<a href="...">` and `<font ATTR=...>` — never bare `<a>` or
|
|
11
|
+
// `<font>`. Allowing the bare forms would cause literal `<a>` / `<font>` in document text to
|
|
12
|
+
// be silently skipped, shifting marker positions.
|
|
13
|
+
export const TOON_INLINE_TAG_RE = /^(?:<\/?(?:b|i|u|highlight)>|<\/(?:a|font)>|<(?:a|font)\s[^>]*>)/;
|
|
14
|
+
/**
|
|
15
|
+
* Split a TOON inline-tag string (`DocumentViewNode.tagged_text` produced with
|
|
16
|
+
* `show_formatting`) into an ordered list of `tag` and `text` tokens, using the exact same
|
|
17
|
+
* grammar (`TOON_INLINE_TAG_RE`) the formatter emits. Consecutive literal characters are
|
|
18
|
+
* coalesced into one `text` token. This is the shared tokenization primitive used by
|
|
19
|
+
* downstream serializers (Markdown today, HTML next) so they never reason about the tag
|
|
20
|
+
* grammar independently and drift from the emitter.
|
|
21
|
+
*/
|
|
22
|
+
export function tokenizeToonInline(text) {
|
|
23
|
+
const tokens = [];
|
|
24
|
+
let buffer = '';
|
|
25
|
+
for (let i = 0; i < text.length; i++) {
|
|
26
|
+
const tagLen = toonTagLengthAt(text, i);
|
|
27
|
+
if (tagLen > 0) {
|
|
28
|
+
if (buffer) {
|
|
29
|
+
tokens.push({ kind: 'text', value: buffer });
|
|
30
|
+
buffer = '';
|
|
31
|
+
}
|
|
32
|
+
tokens.push({ kind: 'tag', value: text.slice(i, i + tagLen) });
|
|
33
|
+
i += tagLen - 1;
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
buffer += text[i];
|
|
37
|
+
}
|
|
38
|
+
if (buffer)
|
|
39
|
+
tokens.push({ kind: 'text', value: buffer });
|
|
40
|
+
return tokens;
|
|
41
|
+
}
|
|
42
|
+
function toonTagLengthAt(text, i) {
|
|
43
|
+
if (text[i] !== '<')
|
|
44
|
+
return 0;
|
|
45
|
+
const match = TOON_INLINE_TAG_RE.exec(text.slice(i));
|
|
46
|
+
return match ? match[0].length : 0;
|
|
47
|
+
}
|
|
48
|
+
export function countVisibleTextCharacters(text) {
|
|
49
|
+
let visibleCount = 0;
|
|
50
|
+
for (let i = 0; i < text.length; i++) {
|
|
51
|
+
const tagLen = toonTagLengthAt(text, i);
|
|
52
|
+
if (tagLen > 0) {
|
|
53
|
+
i += tagLen - 1;
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
visibleCount++;
|
|
57
|
+
}
|
|
58
|
+
return visibleCount;
|
|
59
|
+
}
|
|
60
|
+
export function findTaggedTextInsertionIndex(text, visibleOffset) {
|
|
61
|
+
if (visibleOffset <= 0)
|
|
62
|
+
return 0;
|
|
63
|
+
let visibleCount = 0;
|
|
64
|
+
for (let i = 0; i < text.length; i++) {
|
|
65
|
+
if (visibleCount === visibleOffset)
|
|
66
|
+
return i;
|
|
67
|
+
const tagLen = toonTagLengthAt(text, i);
|
|
68
|
+
if (tagLen > 0) {
|
|
69
|
+
i += tagLen - 1;
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
visibleCount++;
|
|
73
|
+
}
|
|
74
|
+
return text.length;
|
|
75
|
+
}
|
|
76
|
+
export function injectToonCommentMarkers(text, markers) {
|
|
77
|
+
if (markers.length === 0)
|
|
78
|
+
return text;
|
|
79
|
+
let result = text;
|
|
80
|
+
for (const { offset, marker } of markers) {
|
|
81
|
+
const insertionIndex = findTaggedTextInsertionIndex(result, offset);
|
|
82
|
+
result = result.slice(0, insertionIndex) + marker + result.slice(insertionIndex);
|
|
83
|
+
}
|
|
84
|
+
return result;
|
|
85
|
+
}
|
|
86
|
+
function collectInlineCommentMarkerCandidates(comments, paragraphIndexById, candidates) {
|
|
87
|
+
for (const comment of comments) {
|
|
88
|
+
const runtime = comment[INLINE_COMMENT_MARKER_RUNTIME];
|
|
89
|
+
if (comment.range && runtime && !runtime.suppressInlineMarkers) {
|
|
90
|
+
candidates.push({
|
|
91
|
+
id: comment.id,
|
|
92
|
+
startParagraphId: comment.range.startParagraphId,
|
|
93
|
+
endParagraphId: comment.range.endParagraphId,
|
|
94
|
+
startParagraphIndex: paragraphIndexById.get(comment.range.startParagraphId) ?? Number.MAX_SAFE_INTEGER,
|
|
95
|
+
startOffset: runtime.startVisibleOffset,
|
|
96
|
+
endOffset: runtime.endVisibleOffset,
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
if (comment.replies.length > 0) {
|
|
100
|
+
collectInlineCommentMarkerCandidates(comment.replies, paragraphIndexById, candidates);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
function compareInlineCommentCloseOrder(left, right) {
|
|
105
|
+
if (left.startParagraphIndex !== right.startParagraphIndex) {
|
|
106
|
+
return right.startParagraphIndex - left.startParagraphIndex;
|
|
107
|
+
}
|
|
108
|
+
if (left.startOffset !== right.startOffset) {
|
|
109
|
+
return right.startOffset - left.startOffset;
|
|
110
|
+
}
|
|
111
|
+
return right.id - left.id;
|
|
112
|
+
}
|
|
113
|
+
export function collectInlineCommentMarkers(nodes) {
|
|
114
|
+
const paragraphIndexById = new Map();
|
|
115
|
+
for (let index = 0; index < nodes.length; index++) {
|
|
116
|
+
paragraphIndexById.set(nodes[index].id, index);
|
|
117
|
+
}
|
|
118
|
+
const candidates = [];
|
|
119
|
+
for (const node of nodes) {
|
|
120
|
+
if (node.comments && node.comments.length > 0) {
|
|
121
|
+
collectInlineCommentMarkerCandidates(node.comments, paragraphIndexById, candidates);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
const groupedByParagraph = new Map();
|
|
125
|
+
for (const candidate of candidates) {
|
|
126
|
+
const startOffsets = groupedByParagraph.get(candidate.startParagraphId) ?? new Map();
|
|
127
|
+
const startGroup = startOffsets.get(candidate.startOffset) ?? { closes: [], opens: [] };
|
|
128
|
+
startGroup.opens.push(candidate);
|
|
129
|
+
startOffsets.set(candidate.startOffset, startGroup);
|
|
130
|
+
groupedByParagraph.set(candidate.startParagraphId, startOffsets);
|
|
131
|
+
const endOffsets = groupedByParagraph.get(candidate.endParagraphId) ?? new Map();
|
|
132
|
+
const endGroup = endOffsets.get(candidate.endOffset) ?? { closes: [], opens: [] };
|
|
133
|
+
endGroup.closes.push(candidate);
|
|
134
|
+
endOffsets.set(candidate.endOffset, endGroup);
|
|
135
|
+
groupedByParagraph.set(candidate.endParagraphId, endOffsets);
|
|
136
|
+
}
|
|
137
|
+
const markersByParagraph = new Map();
|
|
138
|
+
for (const [paragraphId, offsetGroups] of groupedByParagraph.entries()) {
|
|
139
|
+
const markers = [];
|
|
140
|
+
const sortedOffsets = Array.from(offsetGroups.keys()).sort((left, right) => right - left);
|
|
141
|
+
for (const offset of sortedOffsets) {
|
|
142
|
+
const group = offsetGroups.get(offset);
|
|
143
|
+
if (!group)
|
|
144
|
+
continue;
|
|
145
|
+
const closes = [...group.closes].sort(compareInlineCommentCloseOrder);
|
|
146
|
+
const opens = [...group.opens].sort((left, right) => left.id - right.id);
|
|
147
|
+
const marker = closes.map((comment) => `[cm-end:${comment.id}]`).join('') +
|
|
148
|
+
opens.map((comment) => `[cm-start:${comment.id}]`).join('');
|
|
149
|
+
if (!marker)
|
|
150
|
+
continue;
|
|
151
|
+
markers.push({ offset, marker });
|
|
152
|
+
}
|
|
153
|
+
if (markers.length > 0) {
|
|
154
|
+
markersByParagraph.set(paragraphId, markers);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return markersByParagraph;
|
|
158
|
+
}
|
|
159
|
+
//# sourceMappingURL=document_view-comments.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document_view-comments.js","sourceRoot":"","sources":["../../src/primitives/document_view-comments.ts"],"names":[],"mappings":"AAIA,MAAM,CAAC,MAAM,6BAA6B,GAAG,MAAM,CAAC,+BAA+B,CAAC,CAAC;AAYrF,2FAA2F;AAC3F,gEAAgE;AAChE,mDAAmD;AACnD,8FAA8F;AAC9F,0FAA0F;AAC1F,qCAAqC;AACrC,EAAE;AACF,6FAA6F;AAC7F,wFAAwF;AACxF,6FAA6F;AAC7F,kDAAkD;AAClD,MAAM,CAAC,MAAM,kBAAkB,GAAG,kEAAkE,CAAC;AAErG;;;;;;;GAOG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,MAAM,MAAM,GAAsB,EAAE,CAAC;IACrC,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,MAAM,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QACxC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;YACf,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;gBAC7C,MAAM,GAAG,EAAE,CAAC;YACd,CAAC;YACD,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC;YAC/D,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC;YAChB,SAAS;QACX,CAAC;QACD,MAAM,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IACD,IAAI,MAAM;QAAE,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;IACzD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,CAAS;IAC9C,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,GAAG;QAAE,OAAO,CAAC,CAAC;IAC9B,MAAM,KAAK,GAAG,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACrD,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;AACrC,CAAC;AAED,MAAM,UAAU,0BAA0B,CAAC,IAAY;IACrD,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,MAAM,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QACxC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;YACf,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC;YAChB,SAAS;QACX,CAAC;QACD,YAAY,EAAE,CAAC;IACjB,CAAC;IACD,OAAO,YAAY,CAAC;AACtB,CAAC;AAED,MAAM,UAAU,4BAA4B,CAAC,IAAY,EAAE,aAAqB;IAC9E,IAAI,aAAa,IAAI,CAAC;QAAE,OAAO,CAAC,CAAC;IAEjC,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,YAAY,KAAK,aAAa;YAAE,OAAO,CAAC,CAAC;QAE7C,MAAM,MAAM,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QACxC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;YACf,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC;YAChB,SAAS;QACX,CAAC;QAED,YAAY,EAAE,CAAC;IACjB,CAAC;IAED,OAAO,IAAI,CAAC,MAAM,CAAC;AACrB,CAAC;AAED,MAAM,UAAU,wBAAwB,CACtC,IAAY,EACZ,OAAqC;IAErC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEtC,IAAI,MAAM,GAAG,IAAI,CAAC;IAClB,KAAK,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,OAAO,EAAE,CAAC;QACzC,MAAM,cAAc,GAAG,4BAA4B,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QACpE,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,GAAG,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACnF,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAgBD,SAAS,oCAAoC,CAC3C,QAAwC,EACxC,kBAA+C,EAC/C,UAA0C;IAE1C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,OAAO,GAAI,OAA0C,CAAC,6BAA6B,CAAC,CAAC;QAC3F,IAAI,OAAO,CAAC,KAAK,IAAI,OAAO,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,CAAC;YAC/D,UAAU,CAAC,IAAI,CAAC;gBACd,EAAE,EAAE,OAAO,CAAC,EAAE;gBACd,gBAAgB,EAAE,OAAO,CAAC,KAAK,CAAC,gBAAgB;gBAChD,cAAc,EAAE,OAAO,CAAC,KAAK,CAAC,cAAc;gBAC5C,mBAAmB,EAAE,kBAAkB,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,MAAM,CAAC,gBAAgB;gBACtG,WAAW,EAAE,OAAO,CAAC,kBAAkB;gBACvC,SAAS,EAAE,OAAO,CAAC,gBAAgB;aACpC,CAAC,CAAC;QACL,CAAC;QAED,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/B,oCAAoC,CAAC,OAAO,CAAC,OAAO,EAAE,kBAAkB,EAAE,UAAU,CAAC,CAAC;QACxF,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,8BAA8B,CACrC,IAAkC,EAClC,KAAmC;IAEnC,IAAI,IAAI,CAAC,mBAAmB,KAAK,KAAK,CAAC,mBAAmB,EAAE,CAAC;QAC3D,OAAO,KAAK,CAAC,mBAAmB,GAAG,IAAI,CAAC,mBAAmB,CAAC;IAC9D,CAAC;IACD,IAAI,IAAI,CAAC,WAAW,KAAK,KAAK,CAAC,WAAW,EAAE,CAAC;QAC3C,OAAO,KAAK,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC;IAC9C,CAAC;IACD,OAAO,KAAK,CAAC,EAAE,GAAG,IAAI,CAAC,EAAE,CAAC;AAC5B,CAAC;AAED,MAAM,UAAU,2BAA2B,CACzC,KAAkC;IAElC,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAAkB,CAAC;IACrD,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC;QAClD,kBAAkB,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAE,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;IAClD,CAAC;IAED,MAAM,UAAU,GAAmC,EAAE,CAAC;IACtD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9C,oCAAoC,CAAC,IAAI,CAAC,QAAQ,EAAE,kBAAkB,EAAE,UAAU,CAAC,CAAC;QACtF,CAAC;IACH,CAAC;IAED,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAAiD,CAAC;IACpF,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,YAAY,GAAG,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,gBAAgB,CAAC,IAAI,IAAI,GAAG,EAAoC,CAAC;QACvH,MAAM,UAAU,GAAG,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;QACxF,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACjC,YAAY,CAAC,GAAG,CAAC,SAAS,CAAC,WAAW,EAAE,UAAU,CAAC,CAAC;QACpD,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,gBAAgB,EAAE,YAAY,CAAC,CAAC;QAEjE,MAAM,UAAU,GAAG,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,cAAc,CAAC,IAAI,IAAI,GAAG,EAAoC,CAAC;QACnH,MAAM,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;QAClF,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAChC,UAAU,CAAC,GAAG,CAAC,SAAS,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC9C,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,cAAc,EAAE,UAAU,CAAC,CAAC;IAC/D,CAAC;IAED,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAA+B,CAAC;IAClE,KAAK,MAAM,CAAC,WAAW,EAAE,YAAY,CAAC,IAAI,kBAAkB,CAAC,OAAO,EAAE,EAAE,CAAC;QACvE,MAAM,OAAO,GAAwB,EAAE,CAAC;QACxC,MAAM,aAAa,GAAG,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC;QAC1F,KAAK,MAAM,MAAM,IAAI,aAAa,EAAE,CAAC;YACnC,MAAM,KAAK,GAAG,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACvC,IAAI,CAAC,KAAK;gBAAE,SAAS;YAErB,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;YACtE,MAAM,KAAK,GAAG,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,EAAE,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC;YACzE,MAAM,MAAM,GACV,MAAM,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,WAAW,OAAO,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1D,KAAK,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,aAAa,OAAO,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAC9D,IAAI,CAAC,MAAM;gBAAE,SAAS;YACtB,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QACnC,CAAC;QAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,kBAAkB,CAAC,GAAG,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAED,OAAO,kBAAkB,CAAC;AAC5B,CAAC"}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { type ParagraphAlignment, type StylesModel } from './styles.js';
|
|
2
|
+
import type { DocumentViewNode, HeaderFormatting, HeadingValue, HeuristicHeadingSource } from './document_view-types.js';
|
|
3
|
+
export type { HeaderFormatting, HeadingSource, HeadingValue, HeuristicHeadingSource } from './document_view-types.js';
|
|
4
|
+
export declare function extractHeaderInfo(cleanText: string): {
|
|
5
|
+
header_text: string | null;
|
|
6
|
+
header_style: HeuristicHeadingSource | null;
|
|
7
|
+
};
|
|
8
|
+
export declare function deriveHeading(paragraphStyleId: string | null, cleanText: string, headerText: string | null, headerStyle: HeuristicHeadingSource | null, isInTableCell: boolean): HeadingValue | undefined;
|
|
9
|
+
export declare function detectRunInHeader(params: {
|
|
10
|
+
paragraph: Element;
|
|
11
|
+
paragraphPPr: Element | null;
|
|
12
|
+
paragraphStyleId: string | null;
|
|
13
|
+
styles: StylesModel;
|
|
14
|
+
}): {
|
|
15
|
+
raw_text: string;
|
|
16
|
+
formatting: HeaderFormatting;
|
|
17
|
+
headerCharCount: number;
|
|
18
|
+
} | null;
|
|
19
|
+
/**
|
|
20
|
+
* Detect a centered, ALL-CAPS, bold standalone title (e.g. an NVCA SPA's
|
|
21
|
+
* `SERIES […] PREFERRED STOCK PURCHASE AGREEMENT` title).
|
|
22
|
+
*
|
|
23
|
+
* Strict gates only — fires only when the paragraph cannot be confused with
|
|
24
|
+
* body prose, a placeholder, or a signature line:
|
|
25
|
+
* - paragraph alignment is CENTER
|
|
26
|
+
* - clean text contains no lowercase letters
|
|
27
|
+
* - clean text contains ≥ 3 ASCII letters AND ≥ 2 whitespace-separated
|
|
28
|
+
* word-tokens (so single-token bracketed placeholders like `[COMPANY]`
|
|
29
|
+
* and underscore-only signature lines like `____________` are rejected)
|
|
30
|
+
* - clean text is non-empty and ≤ MAX_CENTERED_TITLE_LENGTH
|
|
31
|
+
* - all visible runs are bold (a single non-bold char disqualifies)
|
|
32
|
+
*/
|
|
33
|
+
export declare function detectTitleCapsCentered(params: {
|
|
34
|
+
paragraph: Element;
|
|
35
|
+
paragraphPPr: Element | null;
|
|
36
|
+
paragraphStyleId: string | null;
|
|
37
|
+
alignment: ParagraphAlignment;
|
|
38
|
+
cleanTextNoLabel: string;
|
|
39
|
+
styles: StylesModel;
|
|
40
|
+
}): {
|
|
41
|
+
raw_text: string;
|
|
42
|
+
formatting: HeaderFormatting;
|
|
43
|
+
} | null;
|
|
44
|
+
export declare function suppressSignatureClusters(nodes: DocumentViewNode[]): void;
|
|
45
|
+
//# sourceMappingURL=document_view-headings.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document_view-headings.d.ts","sourceRoot":"","sources":["../../src/primitives/document_view-headings.ts"],"names":[],"mappings":"AAEA,OAAO,EAAiC,KAAK,kBAAkB,EAAE,KAAK,WAAW,EAAE,MAAM,aAAa,CAAC;AACvG,OAAO,KAAK,EAAE,gBAAgB,EAAE,gBAAgB,EAAE,YAAY,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAEzH,YAAY,EAAE,gBAAgB,EAAE,aAAa,EAAE,YAAY,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AActH,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,GAAG;IAAE,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,YAAY,EAAE,sBAAsB,GAAG,IAAI,CAAA;CAAE,CA2BhI;AAED,wBAAgB,aAAa,CAC3B,gBAAgB,EAAE,MAAM,GAAG,IAAI,EAC/B,SAAS,EAAE,MAAM,EACjB,UAAU,EAAE,MAAM,GAAG,IAAI,EACzB,WAAW,EAAE,sBAAsB,GAAG,IAAI,EAC1C,aAAa,EAAE,OAAO,GACrB,YAAY,GAAG,SAAS,CA2B1B;AAED,wBAAgB,iBAAiB,CAAC,MAAM,EAAE;IACxC,SAAS,EAAE,OAAO,CAAC;IACnB,YAAY,EAAE,OAAO,GAAG,IAAI,CAAC;IAC7B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,MAAM,EAAE,WAAW,CAAC;CACrB,GAAG;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,gBAAgB,CAAC;IAAC,eAAe,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAyDrF;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,uBAAuB,CAAC,MAAM,EAAE;IAC9C,SAAS,EAAE,OAAO,CAAC;IACnB,YAAY,EAAE,OAAO,GAAG,IAAI,CAAC;IAC7B,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,SAAS,EAAE,kBAAkB,CAAC;IAC9B,gBAAgB,EAAE,MAAM,CAAC;IACzB,MAAM,EAAE,WAAW,CAAC;CACrB,GAAG;IAAE,QAAQ,EAAE,MAAM,CAAC;IAAC,UAAU,EAAE,gBAAgB,CAAA;CAAE,GAAG,IAAI,CA8C5D;AAWD,wBAAgB,yBAAyB,CAAC,KAAK,EAAE,gBAAgB,EAAE,GAAG,IAAI,CAqCzE"}
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import { OOXML, W } from './namespaces.js';
|
|
2
|
+
import { getParagraphRuns } from './text.js';
|
|
3
|
+
import { extractEffectiveRunFormatting } from './styles.js';
|
|
4
|
+
const SHORT_HEADER_MAX_LENGTH = 50;
|
|
5
|
+
const MAX_HEADER_TEXT_LENGTH = 60;
|
|
6
|
+
// Centered ALL-CAPS titles (e.g. NVCA COI's `AMENDED AND RESTATED CERTIFICATE
|
|
7
|
+
// OF INCORPORATION OF FOO INC.`) routinely exceed 60 chars in real corporate
|
|
8
|
+
// documents. The 60-char cap on `extractHeaderInfo` exists to avoid emitting a
|
|
9
|
+
// "leading words = header" guess from long body prose, which doesn't apply to
|
|
10
|
+
// the standalone-title detector.
|
|
11
|
+
const MAX_CENTERED_TITLE_LENGTH = 120;
|
|
12
|
+
// Pattern-based header detection fallback (ported from Python ingestor._extract_header_info).
|
|
13
|
+
const HEADER_PATTERN = /^([A-Z][^.!?:]*(?:\s+[A-Z][^.!?:]*)*)([.:]?)(?:\s|$)/;
|
|
14
|
+
export function extractHeaderInfo(cleanText) {
|
|
15
|
+
if (!cleanText || cleanText.length < 2)
|
|
16
|
+
return { header_text: null, header_style: null };
|
|
17
|
+
if (!/^[A-Z]/.test(cleanText))
|
|
18
|
+
return { header_text: null, header_style: null };
|
|
19
|
+
const stripped = cleanText.trim();
|
|
20
|
+
if (stripped.length <= SHORT_HEADER_MAX_LENGTH) {
|
|
21
|
+
if (stripped.endsWith('.'))
|
|
22
|
+
return { header_text: stripped.slice(0, -1), header_style: 'title_with_period' };
|
|
23
|
+
if (stripped.endsWith(':'))
|
|
24
|
+
return { header_text: stripped.slice(0, -1), header_style: 'title_with_colon' };
|
|
25
|
+
const words = stripped.split(/\s+/);
|
|
26
|
+
if (words.length <= 5)
|
|
27
|
+
return { header_text: stripped, header_style: 'title_bare' };
|
|
28
|
+
return { header_text: null, header_style: null };
|
|
29
|
+
}
|
|
30
|
+
const m = HEADER_PATTERN.exec(stripped);
|
|
31
|
+
if (!m)
|
|
32
|
+
return { header_text: null, header_style: null };
|
|
33
|
+
const headerText = (m[1] ?? '').trim();
|
|
34
|
+
const terminator = m[2] ?? '';
|
|
35
|
+
const remaining = stripped.slice(m[0].length);
|
|
36
|
+
if (!remaining || headerText.length > MAX_HEADER_TEXT_LENGTH)
|
|
37
|
+
return { header_text: null, header_style: null };
|
|
38
|
+
if (terminator === '.')
|
|
39
|
+
return { header_text: headerText, header_style: 'title_with_period' };
|
|
40
|
+
if (terminator === ':')
|
|
41
|
+
return { header_text: headerText, header_style: 'title_with_colon' };
|
|
42
|
+
// Long-paragraph regex matches without an explicit terminator are body prose
|
|
43
|
+
// (e.g. "Termination of Section 2.2(d)(i) shall not affect ..."), not headers.
|
|
44
|
+
// Bare titles only fire from the short-paragraph branch above.
|
|
45
|
+
return { header_text: null, header_style: null };
|
|
46
|
+
}
|
|
47
|
+
export function deriveHeading(paragraphStyleId, cleanText, headerText, headerStyle, isInTableCell) {
|
|
48
|
+
const styleMatch = paragraphStyleId ? /^Heading([1-6])$/.exec(paragraphStyleId) : null;
|
|
49
|
+
if (styleMatch) {
|
|
50
|
+
return {
|
|
51
|
+
text: cleanText,
|
|
52
|
+
source: 'word_style',
|
|
53
|
+
level: Number.parseInt(styleMatch[1], 10),
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
// Inside table cells, heuristic detectors (run_in_header, title_with_period,
|
|
57
|
+
// title_with_colon, title_bare) routinely fire on ordinary label/value content
|
|
58
|
+
// — "Name", "Purchase Price:", "Name: Acme" — which are not structural document
|
|
59
|
+
// headings. We keep the per-detector explanation on list_metadata.header_style
|
|
60
|
+
// for debugging, but suppress heuristic promotion into the canonical heading
|
|
61
|
+
// predicate. Word built-in heading styles inside cells remain real headings.
|
|
62
|
+
if (isInTableCell)
|
|
63
|
+
return undefined;
|
|
64
|
+
if (headerText && headerStyle) {
|
|
65
|
+
return {
|
|
66
|
+
text: headerText,
|
|
67
|
+
source: headerStyle,
|
|
68
|
+
level: null,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
return undefined;
|
|
72
|
+
}
|
|
73
|
+
export function detectRunInHeader(params) {
|
|
74
|
+
const { paragraph, paragraphPPr, paragraphStyleId, styles } = params;
|
|
75
|
+
const punct = new Set(['.', ':', '-']);
|
|
76
|
+
// Use visible runs only (field code text stripped in getParagraphRuns()).
|
|
77
|
+
const runs = getParagraphRuns(paragraph);
|
|
78
|
+
if (runs.length === 0)
|
|
79
|
+
return null;
|
|
80
|
+
// Group by run element, preserving order.
|
|
81
|
+
const orderedUniqueRuns = [];
|
|
82
|
+
const seen = new Set();
|
|
83
|
+
for (const tr of runs) {
|
|
84
|
+
if (!seen.has(tr.r)) {
|
|
85
|
+
seen.add(tr.r);
|
|
86
|
+
orderedUniqueRuns.push(tr.r);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
// Walk runs once, splitting into bold/underline header-prefix text and
|
|
90
|
+
// everything-after body text. The header → body transition is what
|
|
91
|
+
// distinguishes a run-in header (bold prefix + body) from a fully-bold
|
|
92
|
+
// signature label or defined-term lead-in.
|
|
93
|
+
let headerText = '';
|
|
94
|
+
let bodyText = '';
|
|
95
|
+
let formatting = null;
|
|
96
|
+
let headerCharCount = 0;
|
|
97
|
+
let inHeader = true;
|
|
98
|
+
for (const r of orderedUniqueRuns) {
|
|
99
|
+
const fmt = extractEffectiveRunFormatting({ run: r, paragraphPPr, paragraphStyleId, styles });
|
|
100
|
+
const isHeaderStyle = fmt.bold || fmt.underline;
|
|
101
|
+
const ts = Array.from(r.getElementsByTagNameNS(OOXML.W_NS, W.t));
|
|
102
|
+
let runText = '';
|
|
103
|
+
for (const t of ts)
|
|
104
|
+
runText += t.textContent ?? '';
|
|
105
|
+
if (inHeader && isHeaderStyle) {
|
|
106
|
+
headerText += runText;
|
|
107
|
+
headerCharCount += runText.length;
|
|
108
|
+
if (!formatting)
|
|
109
|
+
formatting = { bold: fmt.bold, italic: fmt.italic, underline: fmt.underline };
|
|
110
|
+
}
|
|
111
|
+
else {
|
|
112
|
+
inHeader = false;
|
|
113
|
+
bodyText += runText;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
const trimmed = headerText.trim();
|
|
117
|
+
if (!trimmed)
|
|
118
|
+
return null;
|
|
119
|
+
if (!punct.has(trimmed[trimmed.length - 1]))
|
|
120
|
+
return null;
|
|
121
|
+
if (!formatting)
|
|
122
|
+
return null;
|
|
123
|
+
// Require a real header-prefix → body transition: there must be non-whitespace
|
|
124
|
+
// body text after the bold/underline prefix. Trailing-whitespace-only "body"
|
|
125
|
+
// (e.g. a single bold run followed by a non-bold run that holds just `" "`)
|
|
126
|
+
// is not a transition — those are still whole-paragraph bold blocks
|
|
127
|
+
// (signature labels, all-bold short titles, etc.) and must be rejected.
|
|
128
|
+
if (!bodyText.trim())
|
|
129
|
+
return null;
|
|
130
|
+
return { raw_text: trimmed, formatting, headerCharCount };
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Detect a centered, ALL-CAPS, bold standalone title (e.g. an NVCA SPA's
|
|
134
|
+
* `SERIES […] PREFERRED STOCK PURCHASE AGREEMENT` title).
|
|
135
|
+
*
|
|
136
|
+
* Strict gates only — fires only when the paragraph cannot be confused with
|
|
137
|
+
* body prose, a placeholder, or a signature line:
|
|
138
|
+
* - paragraph alignment is CENTER
|
|
139
|
+
* - clean text contains no lowercase letters
|
|
140
|
+
* - clean text contains ≥ 3 ASCII letters AND ≥ 2 whitespace-separated
|
|
141
|
+
* word-tokens (so single-token bracketed placeholders like `[COMPANY]`
|
|
142
|
+
* and underscore-only signature lines like `____________` are rejected)
|
|
143
|
+
* - clean text is non-empty and ≤ MAX_CENTERED_TITLE_LENGTH
|
|
144
|
+
* - all visible runs are bold (a single non-bold char disqualifies)
|
|
145
|
+
*/
|
|
146
|
+
export function detectTitleCapsCentered(params) {
|
|
147
|
+
const { paragraph, paragraphPPr, paragraphStyleId, alignment, cleanTextNoLabel, styles } = params;
|
|
148
|
+
if (alignment !== 'CENTER')
|
|
149
|
+
return null;
|
|
150
|
+
const trimmed = cleanTextNoLabel.trim();
|
|
151
|
+
if (!trimmed)
|
|
152
|
+
return null;
|
|
153
|
+
if (trimmed.length > MAX_CENTERED_TITLE_LENGTH)
|
|
154
|
+
return null;
|
|
155
|
+
if (/[a-z]/.test(trimmed))
|
|
156
|
+
return null;
|
|
157
|
+
// Content gate: punctuation/underscore-only signature lines and bracketed
|
|
158
|
+
// single-token placeholders (`[COMPANY]`, `[___]`, `<NAME>`) must not
|
|
159
|
+
// classify as titles. Real titles are multi-word ALL-CAPS phrases.
|
|
160
|
+
const letterCount = (trimmed.match(/[A-Z]/g) ?? []).length;
|
|
161
|
+
if (letterCount < 3)
|
|
162
|
+
return null;
|
|
163
|
+
const wordTokens = trimmed.split(/\s+/).filter((w) => /[A-Z]/.test(w));
|
|
164
|
+
if (wordTokens.length < 2)
|
|
165
|
+
return null;
|
|
166
|
+
const runs = getParagraphRuns(paragraph);
|
|
167
|
+
if (runs.length === 0)
|
|
168
|
+
return null;
|
|
169
|
+
const orderedUniqueRuns = [];
|
|
170
|
+
const seen = new Set();
|
|
171
|
+
for (const tr of runs) {
|
|
172
|
+
if (!seen.has(tr.r)) {
|
|
173
|
+
seen.add(tr.r);
|
|
174
|
+
orderedUniqueRuns.push(tr.r);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
let formatting = null;
|
|
178
|
+
let sawAnyText = false;
|
|
179
|
+
for (const r of orderedUniqueRuns) {
|
|
180
|
+
const ts = Array.from(r.getElementsByTagNameNS(OOXML.W_NS, W.t));
|
|
181
|
+
let runHasText = false;
|
|
182
|
+
for (const t of ts) {
|
|
183
|
+
if ((t.textContent ?? '').length > 0) {
|
|
184
|
+
runHasText = true;
|
|
185
|
+
break;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
if (!runHasText)
|
|
189
|
+
continue;
|
|
190
|
+
const fmt = extractEffectiveRunFormatting({ run: r, paragraphPPr, paragraphStyleId, styles });
|
|
191
|
+
if (!fmt.bold)
|
|
192
|
+
return null;
|
|
193
|
+
sawAnyText = true;
|
|
194
|
+
if (!formatting)
|
|
195
|
+
formatting = { bold: fmt.bold, italic: fmt.italic, underline: fmt.underline };
|
|
196
|
+
}
|
|
197
|
+
if (!sawAnyText || !formatting)
|
|
198
|
+
return null;
|
|
199
|
+
return { raw_text: trimmed, formatting };
|
|
200
|
+
}
|
|
201
|
+
const SIGNATURE_LABEL_LINE_RE = /^[A-Z][a-zA-Z ]{0,28}:\s*$/;
|
|
202
|
+
const SIGNATURE_LABEL_PREFIX_RE = /^[A-Z]+(?::\s|$)/;
|
|
203
|
+
function isSignatureClusterLabel(text) {
|
|
204
|
+
const trimmed = text.trim();
|
|
205
|
+
if (!trimmed)
|
|
206
|
+
return false;
|
|
207
|
+
return SIGNATURE_LABEL_LINE_RE.test(trimmed) || SIGNATURE_LABEL_PREFIX_RE.test(trimmed);
|
|
208
|
+
}
|
|
209
|
+
export function suppressSignatureClusters(nodes) {
|
|
210
|
+
if (nodes.length < 4)
|
|
211
|
+
return;
|
|
212
|
+
const prefixMatches = new Array(nodes.length + 1).fill(0);
|
|
213
|
+
for (let idx = 0; idx < nodes.length; idx++) {
|
|
214
|
+
prefixMatches[idx + 1] = prefixMatches[idx] + (isSignatureClusterLabel(nodes[idx].clean_text) ? 1 : 0);
|
|
215
|
+
}
|
|
216
|
+
const coverage = new Array(nodes.length + 1).fill(0);
|
|
217
|
+
for (let start = 0; start <= nodes.length - 4; start++) {
|
|
218
|
+
for (let end = start + 3; end < nodes.length; end++) {
|
|
219
|
+
const runLength = end - start + 1;
|
|
220
|
+
const matchCount = prefixMatches[end + 1] - prefixMatches[start];
|
|
221
|
+
if ((matchCount * 4) < (runLength * 3))
|
|
222
|
+
continue;
|
|
223
|
+
coverage[start] += 1;
|
|
224
|
+
coverage[end + 1] -= 1;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
let activeClusters = 0;
|
|
228
|
+
for (let idx = 0; idx < nodes.length; idx++) {
|
|
229
|
+
activeClusters += coverage[idx];
|
|
230
|
+
if (activeClusters <= 0)
|
|
231
|
+
continue;
|
|
232
|
+
const node = nodes[idx];
|
|
233
|
+
// The density gate authorizes us to clear *labels* inside the window;
|
|
234
|
+
// non-label neighbors (real headings, body text) keep their detected
|
|
235
|
+
// heading metadata regardless of paragraph style. This avoids erasing
|
|
236
|
+
// an adjacent section heading or body line that happens to fall inside
|
|
237
|
+
// a window meeting the density threshold.
|
|
238
|
+
if (!isSignatureClusterLabel(node.clean_text))
|
|
239
|
+
continue;
|
|
240
|
+
node.header = '';
|
|
241
|
+
node.header_formatting = null;
|
|
242
|
+
node.list_metadata.header_text = null;
|
|
243
|
+
node.list_metadata.header_style = null;
|
|
244
|
+
node.list_metadata.header_formatting = null;
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
//# sourceMappingURL=document_view-headings.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document_view-headings.js","sourceRoot":"","sources":["../../src/primitives/document_view-headings.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,iBAAiB,CAAC;AAC3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,WAAW,CAAC;AAC7C,OAAO,EAAE,6BAA6B,EAA6C,MAAM,aAAa,CAAC;AAKvG,MAAM,uBAAuB,GAAG,EAAE,CAAC;AACnC,MAAM,sBAAsB,GAAG,EAAE,CAAC;AAClC,8EAA8E;AAC9E,6EAA6E;AAC7E,+EAA+E;AAC/E,8EAA8E;AAC9E,iCAAiC;AACjC,MAAM,yBAAyB,GAAG,GAAG,CAAC;AAEtC,8FAA8F;AAC9F,MAAM,cAAc,GAAG,sDAAsD,CAAC;AAE9E,MAAM,UAAU,iBAAiB,CAAC,SAAiB;IACjD,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IACzF,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC;QAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IAEhF,MAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IAClC,IAAI,QAAQ,CAAC,MAAM,IAAI,uBAAuB,EAAE,CAAC;QAC/C,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,EAAE,WAAW,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,mBAAmB,EAAE,CAAC;QAC7G,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,EAAE,WAAW,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,kBAAkB,EAAE,CAAC;QAE5G,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACpC,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC;YAAE,OAAO,EAAE,WAAW,EAAE,QAAQ,EAAE,YAAY,EAAE,YAAY,EAAE,CAAC;QACpF,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IACnD,CAAC;IAED,MAAM,CAAC,GAAG,cAAc,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACxC,IAAI,CAAC,CAAC;QAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IACzD,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IACvC,MAAM,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAC9B,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAC9C,IAAI,CAAC,SAAS,IAAI,UAAU,CAAC,MAAM,GAAG,sBAAsB;QAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IAE/G,IAAI,UAAU,KAAK,GAAG;QAAE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,mBAAmB,EAAE,CAAC;IAC9F,IAAI,UAAU,KAAK,GAAG;QAAE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,YAAY,EAAE,kBAAkB,EAAE,CAAC;IAC7F,6EAA6E;IAC7E,+EAA+E;IAC/E,+DAA+D;IAC/D,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;AACnD,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,gBAA+B,EAC/B,SAAiB,EACjB,UAAyB,EACzB,WAA0C,EAC1C,aAAsB;IAEtB,MAAM,UAAU,GAAG,gBAAgB,CAAC,CAAC,CAAC,kBAAkB,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IACvF,IAAI,UAAU,EAAE,CAAC;QACf,OAAO;YACL,IAAI,EAAE,SAAS;YACf,MAAM,EAAE,YAAY;YACpB,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAE,EAAE,EAAE,CAAC;SAC3C,CAAC;IACJ,CAAC;IAED,6EAA6E;IAC7E,+EAA+E;IAC/E,gFAAgF;IAChF,+EAA+E;IAC/E,6EAA6E;IAC7E,6EAA6E;IAC7E,IAAI,aAAa;QAAE,OAAO,SAAS,CAAC;IAEpC,IAAI,UAAU,IAAI,WAAW,EAAE,CAAC;QAC9B,OAAO;YACL,IAAI,EAAE,UAAU;YAChB,MAAM,EAAE,WAAW;YACnB,KAAK,EAAE,IAAI;SACZ,CAAC;IACJ,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,MAKjC;IACC,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC;IACrE,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;IAEvC,0EAA0E;IAC1E,MAAM,IAAI,GAAG,gBAAgB,CAAC,SAAS,CAAC,CAAC;IACzC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEnC,0CAA0C;IAC1C,MAAM,iBAAiB,GAAc,EAAE,CAAC;IACxC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAW,CAAC;IAChC,KAAK,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC;QACtB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACpB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACf,iBAAiB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,uEAAuE;IACvE,mEAAmE;IACnE,uEAAuE;IACvE,2CAA2C;IAC3C,IAAI,UAAU,GAAG,EAAE,CAAC;IACpB,IAAI,QAAQ,GAAG,EAAE,CAAC;IAClB,IAAI,UAAU,GAA4B,IAAI,CAAC;IAC/C,IAAI,eAAe,GAAG,CAAC,CAAC;IACxB,IAAI,QAAQ,GAAG,IAAI,CAAC;IAEpB,KAAK,MAAM,CAAC,IAAI,iBAAiB,EAAE,CAAC;QAClC,MAAM,GAAG,GAAG,6BAA6B,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC,CAAC;QAC9F,MAAM,aAAa,GAAG,GAAG,CAAC,IAAI,IAAI,GAAG,CAAC,SAAS,CAAC;QAChD,MAAM,EAAE,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,sBAAsB,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,KAAK,MAAM,CAAC,IAAI,EAAE;YAAE,OAAO,IAAI,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC;QAEnD,IAAI,QAAQ,IAAI,aAAa,EAAE,CAAC;YAC9B,UAAU,IAAI,OAAO,CAAC;YACtB,eAAe,IAAI,OAAO,CAAC,MAAM,CAAC;YAClC,IAAI,CAAC,UAAU;gBAAE,UAAU,GAAG,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,SAAS,EAAE,GAAG,CAAC,SAAS,EAAE,CAAC;QACjG,CAAC;aAAM,CAAC;YACN,QAAQ,GAAG,KAAK,CAAC;YACjB,QAAQ,IAAI,OAAO,CAAC;QACtB,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,EAAE,CAAC;IAClC,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC;QAAE,OAAO,IAAI,CAAC;IAC1D,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAC7B,+EAA+E;IAC/E,6EAA6E;IAC7E,4EAA4E;IAC5E,oEAAoE;IACpE,wEAAwE;IACxE,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;QAAE,OAAO,IAAI,CAAC;IAElC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,CAAC;AAC5D,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,uBAAuB,CAAC,MAOvC;IACC,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,gBAAgB,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC;IAClG,IAAI,SAAS,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IACxC,MAAM,OAAO,GAAG,gBAAgB,CAAC,IAAI,EAAE,CAAC;IACxC,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC1B,IAAI,OAAO,CAAC,MAAM,GAAG,yBAAyB;QAAE,OAAO,IAAI,CAAC;IAC5D,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,IAAI,CAAC;IACvC,0EAA0E;IAC1E,sEAAsE;IACtE,mEAAmE;IACnE,MAAM,WAAW,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IAC3D,IAAI,WAAW,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IACjC,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IACvE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAEvC,MAAM,IAAI,GAAG,gBAAgB,CAAC,SAAS,CAAC,CAAC;IACzC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACnC,MAAM,iBAAiB,GAAc,EAAE,CAAC;IACxC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAW,CAAC;IAChC,KAAK,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC;QACtB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACpB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACf,iBAAiB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,IAAI,UAAU,GAA4B,IAAI,CAAC;IAC/C,IAAI,UAAU,GAAG,KAAK,CAAC;IACvB,KAAK,MAAM,CAAC,IAAI,iBAAiB,EAAE,CAAC;QAClC,MAAM,EAAE,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,sBAAsB,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,IAAI,UAAU,GAAG,KAAK,CAAC;QACvB,KAAK,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC;YACnB,IAAI,CAAC,CAAC,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrC,UAAU,GAAG,IAAI,CAAC;gBAClB,MAAM;YACR,CAAC;QACH,CAAC;QACD,IAAI,CAAC,UAAU;YAAE,SAAS;QAC1B,MAAM,GAAG,GAAG,6BAA6B,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,EAAE,CAAC,CAAC;QAC9F,IAAI,CAAC,GAAG,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAC3B,UAAU,GAAG,IAAI,CAAC;QAClB,IAAI,CAAC,UAAU;YAAE,UAAU,GAAG,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,SAAS,EAAE,GAAG,CAAC,SAAS,EAAE,CAAC;IACjG,CAAC;IACD,IAAI,CAAC,UAAU,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAE5C,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;AAC3C,CAAC;AAED,MAAM,uBAAuB,GAAG,4BAA4B,CAAC;AAC7D,MAAM,yBAAyB,GAAG,kBAAkB,CAAC;AAErD,SAAS,uBAAuB,CAAC,IAAY;IAC3C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,KAAK,CAAC;IAC3B,OAAO,uBAAuB,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,yBAAyB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;AAC1F,CAAC;AAED,MAAM,UAAU,yBAAyB,CAAC,KAAyB;IACjE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO;IAE7B,MAAM,aAAa,GAAG,IAAI,KAAK,CAAS,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClE,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;QAC5C,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,aAAa,CAAC,GAAG,CAAE,GAAG,CAAC,uBAAuB,CAAC,KAAK,CAAC,GAAG,CAAE,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3G,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAS,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7D,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC;QACvD,KAAK,IAAI,GAAG,GAAG,KAAK,GAAG,CAAC,EAAE,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;YACpD,MAAM,SAAS,GAAG,GAAG,GAAG,KAAK,GAAG,CAAC,CAAC;YAClC,MAAM,UAAU,GAAG,aAAa,CAAC,GAAG,GAAG,CAAC,CAAE,GAAG,aAAa,CAAC,KAAK,CAAE,CAAC;YACnE,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,CAAC,SAAS,GAAG,CAAC,CAAC;gBAAE,SAAS;YACjD,QAAQ,CAAC,KAAK,CAAE,IAAI,CAAC,CAAC;YACtB,QAAQ,CAAC,GAAG,GAAG,CAAC,CAAE,IAAI,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,IAAI,cAAc,GAAG,CAAC,CAAC;IACvB,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;QAC5C,cAAc,IAAI,QAAQ,CAAC,GAAG,CAAE,CAAC;QACjC,IAAI,cAAc,IAAI,CAAC;YAAE,SAAS;QAElC,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAE,CAAC;QACzB,sEAAsE;QACtE,qEAAqE;QACrE,sEAAsE;QACtE,uEAAuE;QACvE,0CAA0C;QAC1C,IAAI,CAAC,uBAAuB,CAAC,IAAI,CAAC,UAAU,CAAC;YAAE,SAAS;QACxD,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;QACjB,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC;QAC9B,IAAI,CAAC,aAAa,CAAC,WAAW,GAAG,IAAI,CAAC;QACtC,IAAI,CAAC,aAAa,CAAC,YAAY,GAAG,IAAI,CAAC;QACvC,IAAI,CAAC,aAAa,CAAC,iBAAiB,GAAG,IAAI,CAAC;IAC9C,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { DocumentStyles, DocumentViewNode, FormattingFingerprint } from './document_view-types.js';
|
|
2
|
+
export type { DocumentStyleInfo, DocumentStyles, FormattingFingerprint } from './document_view-types.js';
|
|
3
|
+
export declare function fingerprintKey(fp: FormattingFingerprint): string;
|
|
4
|
+
/**
|
|
5
|
+
* v0.3: Compact style fingerprint token.
|
|
6
|
+
* Concatenates style name, list level, alignment, and indentation for token-efficient LLM context.
|
|
7
|
+
* Example: "Normal:L-1:LEFT:I0:H0"
|
|
8
|
+
*/
|
|
9
|
+
export declare function computeFingerprintToken(fp: FormattingFingerprint, styleId?: string): string;
|
|
10
|
+
export declare function discoverStyles(nodes: DocumentViewNode[]): DocumentStyles;
|
|
11
|
+
//# sourceMappingURL=document_view-styles.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document_view-styles.d.ts","sourceRoot":"","sources":["../../src/primitives/document_view-styles.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAqB,cAAc,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,MAAM,0BAA0B,CAAC;AAE3H,YAAY,EAAE,iBAAiB,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,0BAA0B,CAAC;AAIzG,wBAAgB,cAAc,CAAC,EAAE,EAAE,qBAAqB,GAAG,MAAM,CAGhE;AAED;;;;GAIG;AACH,wBAAgB,uBAAuB,CAAC,EAAE,EAAE,qBAAqB,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,CAO3F;AA6CD,wBAAgB,cAAc,CAAC,KAAK,EAAE,gBAAgB,EAAE,GAAG,cAAc,CAsCxE"}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import { LabelType } from './list_labels.js';
|
|
2
|
+
const STYLE_EXAMPLE_TEXT_PREVIEW_LENGTH = 50;
|
|
3
|
+
export function fingerprintKey(fp) {
|
|
4
|
+
// Stable JSON-ish key used for Map lookups.
|
|
5
|
+
return `${fp.list_level}|${fp.left_indent_pt.toFixed(1)}|${fp.first_line_indent_pt.toFixed(1)}|${fp.style_name}|${fp.alignment}`;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* v0.3: Compact style fingerprint token.
|
|
9
|
+
* Concatenates style name, list level, alignment, and indentation for token-efficient LLM context.
|
|
10
|
+
* Example: "Normal:L-1:LEFT:I0:H0"
|
|
11
|
+
*/
|
|
12
|
+
export function computeFingerprintToken(fp, styleId) {
|
|
13
|
+
const name = styleId || fp.style_name || 'body';
|
|
14
|
+
const level = `L${fp.list_level}`;
|
|
15
|
+
const align = fp.alignment;
|
|
16
|
+
const indent = `I${Math.round(fp.left_indent_pt)}`;
|
|
17
|
+
const hanging = `H${Math.round(fp.first_line_indent_pt)}`;
|
|
18
|
+
return `${name}:${level}:${align}:${indent}:${hanging}`;
|
|
19
|
+
}
|
|
20
|
+
function inferSemanticName(params) {
|
|
21
|
+
const { fp, nodes } = params;
|
|
22
|
+
// Find first label_type if present.
|
|
23
|
+
let labelType = null;
|
|
24
|
+
for (const n of nodes) {
|
|
25
|
+
if (n.list_metadata.label_type) {
|
|
26
|
+
labelType = n.list_metadata.label_type;
|
|
27
|
+
break;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
const listLevel = fp.list_level;
|
|
31
|
+
if (listLevel >= 0) {
|
|
32
|
+
if (listLevel === 0) {
|
|
33
|
+
if (labelType === LabelType.ARTICLE)
|
|
34
|
+
return { base_id: 'article', display_name: 'Article Heading' };
|
|
35
|
+
if (labelType === LabelType.SECTION)
|
|
36
|
+
return { base_id: 'section', display_name: 'Section Heading' };
|
|
37
|
+
if (labelType === LabelType.ROMAN)
|
|
38
|
+
return { base_id: 'roman_section', display_name: 'Roman Numeral Section' };
|
|
39
|
+
return { base_id: 'top_level', display_name: 'Top-Level List Item' };
|
|
40
|
+
}
|
|
41
|
+
if (listLevel === 1) {
|
|
42
|
+
if (labelType === LabelType.LETTER)
|
|
43
|
+
return { base_id: 'subsection', display_name: 'Subsection (a)/(A)' };
|
|
44
|
+
if (labelType === LabelType.NUMBER)
|
|
45
|
+
return { base_id: 'subsection_number', display_name: 'Numbered Subsection' };
|
|
46
|
+
if (labelType === LabelType.ROMAN)
|
|
47
|
+
return { base_id: 'subsection_roman', display_name: 'Roman Subsection' };
|
|
48
|
+
return { base_id: 'level_1', display_name: `Level ${listLevel} List Item` };
|
|
49
|
+
}
|
|
50
|
+
if (labelType === LabelType.ROMAN)
|
|
51
|
+
return { base_id: `level_${listLevel}_roman`, display_name: `Level ${listLevel} Roman` };
|
|
52
|
+
if (labelType === LabelType.LETTER)
|
|
53
|
+
return { base_id: `level_${listLevel}_letter`, display_name: `Level ${listLevel} Letter` };
|
|
54
|
+
return { base_id: `level_${listLevel}`, display_name: `Level ${listLevel} List Item` };
|
|
55
|
+
}
|
|
56
|
+
// Non-list.
|
|
57
|
+
const styleName = fp.style_name.toLowerCase().replace(/\s+/g, '_');
|
|
58
|
+
if (fp.left_indent_pt > 0)
|
|
59
|
+
return { base_id: 'indent_block', display_name: 'Indented Block' };
|
|
60
|
+
if (styleName.includes('heading') || styleName.includes('title'))
|
|
61
|
+
return { base_id: 'heading', display_name: 'Heading' };
|
|
62
|
+
if (styleName.includes('quote') || styleName.includes('block'))
|
|
63
|
+
return { base_id: 'block_quote', display_name: 'Block Quote' };
|
|
64
|
+
return { base_id: 'body', display_name: 'Body Text' };
|
|
65
|
+
}
|
|
66
|
+
export function discoverStyles(nodes) {
|
|
67
|
+
const groups = new Map();
|
|
68
|
+
for (const n of nodes) {
|
|
69
|
+
const key = fingerprintKey(n.style_fingerprint);
|
|
70
|
+
const g = groups.get(key);
|
|
71
|
+
if (g)
|
|
72
|
+
g.nodes.push(n);
|
|
73
|
+
else
|
|
74
|
+
groups.set(key, { fp: n.style_fingerprint, nodes: [n] });
|
|
75
|
+
}
|
|
76
|
+
const used = {};
|
|
77
|
+
const styles = new Map();
|
|
78
|
+
const fpToStyle = new Map();
|
|
79
|
+
for (const [fpKey, g] of groups.entries()) {
|
|
80
|
+
const { base_id, display_name } = inferSemanticName({ fp: g.fp, nodes: g.nodes });
|
|
81
|
+
let styleId = base_id;
|
|
82
|
+
if (used[base_id] !== undefined) {
|
|
83
|
+
used[base_id] += 1;
|
|
84
|
+
styleId = `${base_id}_${used[base_id]}`;
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
used[base_id] = 0;
|
|
88
|
+
}
|
|
89
|
+
const median = g.nodes[Math.floor(g.nodes.length / 2)];
|
|
90
|
+
const info = {
|
|
91
|
+
style_id: styleId,
|
|
92
|
+
display_name,
|
|
93
|
+
fingerprint: g.fp,
|
|
94
|
+
example_node_id: median.id,
|
|
95
|
+
example_text: median.clean_text.slice(0, STYLE_EXAMPLE_TEXT_PREVIEW_LENGTH),
|
|
96
|
+
count: g.nodes.length,
|
|
97
|
+
dominant_alignment: g.fp.alignment,
|
|
98
|
+
};
|
|
99
|
+
styles.set(styleId, info);
|
|
100
|
+
fpToStyle.set(fpKey, styleId);
|
|
101
|
+
}
|
|
102
|
+
return { styles, fingerprint_to_style: fpToStyle };
|
|
103
|
+
}
|
|
104
|
+
//# sourceMappingURL=document_view-styles.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"document_view-styles.js","sourceRoot":"","sources":["../../src/primitives/document_view-styles.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAK7C,MAAM,iCAAiC,GAAG,EAAE,CAAC;AAE7C,MAAM,UAAU,cAAc,CAAC,EAAyB;IACtD,4CAA4C;IAC5C,OAAO,GAAG,EAAE,CAAC,UAAU,IAAI,EAAE,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,oBAAoB,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,UAAU,IAAI,EAAE,CAAC,SAAS,EAAE,CAAC;AACnI,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,uBAAuB,CAAC,EAAyB,EAAE,OAAgB;IACjF,MAAM,IAAI,GAAG,OAAO,IAAI,EAAE,CAAC,UAAU,IAAI,MAAM,CAAC;IAChD,MAAM,KAAK,GAAG,IAAI,EAAE,CAAC,UAAU,EAAE,CAAC;IAClC,MAAM,KAAK,GAAG,EAAE,CAAC,SAAS,CAAC;IAC3B,MAAM,MAAM,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,cAAc,CAAC,EAAE,CAAC;IACnD,MAAM,OAAO,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,oBAAoB,CAAC,EAAE,CAAC;IAC1D,OAAO,GAAG,IAAI,IAAI,KAAK,IAAI,KAAK,IAAI,MAAM,IAAI,OAAO,EAAE,CAAC;AAC1D,CAAC;AAED,SAAS,iBAAiB,CAAC,MAG1B;IACC,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IAE7B,oCAAoC;IACpC,IAAI,SAAS,GAAqB,IAAI,CAAC;IACvC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,CAAC,CAAC,aAAa,CAAC,UAAU,EAAE,CAAC;YAC/B,SAAS,GAAG,CAAC,CAAC,aAAa,CAAC,UAAU,CAAC;YACvC,MAAM;QACR,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,EAAE,CAAC,UAAU,CAAC;IAEhC,IAAI,SAAS,IAAI,CAAC,EAAE,CAAC;QACnB,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;YACpB,IAAI,SAAS,KAAK,SAAS,CAAC,OAAO;gBAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,iBAAiB,EAAE,CAAC;YACpG,IAAI,SAAS,KAAK,SAAS,CAAC,OAAO;gBAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,iBAAiB,EAAE,CAAC;YACpG,IAAI,SAAS,KAAK,SAAS,CAAC,KAAK;gBAAE,OAAO,EAAE,OAAO,EAAE,eAAe,EAAE,YAAY,EAAE,uBAAuB,EAAE,CAAC;YAC9G,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,qBAAqB,EAAE,CAAC;QACvE,CAAC;QACD,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;YACpB,IAAI,SAAS,KAAK,SAAS,CAAC,MAAM;gBAAE,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,oBAAoB,EAAE,CAAC;YACzG,IAAI,SAAS,KAAK,SAAS,CAAC,MAAM;gBAAE,OAAO,EAAE,OAAO,EAAE,mBAAmB,EAAE,YAAY,EAAE,qBAAqB,EAAE,CAAC;YACjH,IAAI,SAAS,KAAK,SAAS,CAAC,KAAK;gBAAE,OAAO,EAAE,OAAO,EAAE,kBAAkB,EAAE,YAAY,EAAE,kBAAkB,EAAE,CAAC;YAC5G,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,SAAS,SAAS,YAAY,EAAE,CAAC;QAC9E,CAAC;QACD,IAAI,SAAS,KAAK,SAAS,CAAC,KAAK;YAAE,OAAO,EAAE,OAAO,EAAE,SAAS,SAAS,QAAQ,EAAE,YAAY,EAAE,SAAS,SAAS,QAAQ,EAAE,CAAC;QAC5H,IAAI,SAAS,KAAK,SAAS,CAAC,MAAM;YAAE,OAAO,EAAE,OAAO,EAAE,SAAS,SAAS,SAAS,EAAE,YAAY,EAAE,SAAS,SAAS,SAAS,EAAE,CAAC;QAC/H,OAAO,EAAE,OAAO,EAAE,SAAS,SAAS,EAAE,EAAE,YAAY,EAAE,SAAS,SAAS,YAAY,EAAE,CAAC;IACzF,CAAC;IAED,YAAY;IACZ,MAAM,SAAS,GAAG,EAAE,CAAC,UAAU,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;IACnE,IAAI,EAAE,CAAC,cAAc,GAAG,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,gBAAgB,EAAE,CAAC;IAC9F,IAAI,SAAS,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,SAAS,EAAE,CAAC;IACzH,IAAI,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,aAAa,EAAE,CAAC;IAC/H,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,CAAC;AACxD,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,KAAyB;IACtD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAoE,CAAC;IAC3F,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,cAAc,CAAC,CAAC,CAAC,iBAAiB,CAAC,CAAC;QAChD,MAAM,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAC1B,IAAI,CAAC;YAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;;YAClB,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,iBAAiB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAChE,CAAC;IAED,MAAM,IAAI,GAA2B,EAAE,CAAC;IACxC,MAAM,MAAM,GAAG,IAAI,GAAG,EAA6B,CAAC;IACpD,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE5C,KAAK,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,EAAE,CAAC;QAC1C,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,GAAG,iBAAiB,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QAClF,IAAI,OAAO,GAAG,OAAO,CAAC;QACtB,IAAI,IAAI,CAAC,OAAO,CAAC,KAAK,SAAS,EAAE,CAAC;YAChC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YACnB,OAAO,GAAG,GAAG,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1C,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACpB,CAAC;QAED,MAAM,MAAM,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAE,CAAC;QACxD,MAAM,IAAI,GAAsB;YAC9B,QAAQ,EAAE,OAAO;YACjB,YAAY;YACZ,WAAW,EAAE,CAAC,CAAC,EAAE;YACjB,eAAe,EAAE,MAAM,CAAC,EAAE;YAC1B,YAAY,EAAE,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,iCAAiC,CAAC;YAC3E,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,MAAM;YACrB,kBAAkB,EAAE,CAAC,CAAC,EAAE,CAAC,SAAS;SACnC,CAAC;QACF,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QAC1B,SAAS,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAChC,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,oBAAoB,EAAE,SAAS,EAAE,CAAC;AACrD,CAAC"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import type { DocumentViewNode, ToonCommentMarkerMap } from './document_view-types.js';
|
|
2
|
+
/**
|
|
3
|
+
* Format a single toon data line for one DocumentViewNode.
|
|
4
|
+
* Handles table-context-aware style (th/td) and header stripping.
|
|
5
|
+
*/
|
|
6
|
+
export declare function formatToonDataLine(n: DocumentViewNode, options?: {
|
|
7
|
+
compact?: boolean;
|
|
8
|
+
commentMarkers?: ToonCommentMarkerMap;
|
|
9
|
+
}): string;
|
|
10
|
+
/**
|
|
11
|
+
* Collect table marker info (dimensions) from nodes for #TABLE markers.
|
|
12
|
+
* Column headers are NOT included in the marker — they appear once in the th() rows.
|
|
13
|
+
*/
|
|
14
|
+
export declare function collectTableMarkerInfo(nodes: readonly Pick<DocumentViewNode, 'table_context'>[]): Map<number, {
|
|
15
|
+
id: string;
|
|
16
|
+
totalRows: number;
|
|
17
|
+
totalCols: number;
|
|
18
|
+
}>;
|
|
19
|
+
/**
|
|
20
|
+
* Format a #TABLE marker line from collected table info.
|
|
21
|
+
* Headers are omitted — they appear exactly once in the th(0,N) data rows.
|
|
22
|
+
*/
|
|
23
|
+
export declare function formatTableMarker(info: {
|
|
24
|
+
id: string;
|
|
25
|
+
totalRows: number;
|
|
26
|
+
totalCols: number;
|
|
27
|
+
}): string;
|
|
28
|
+
export declare function formatToonCommentLines(node: Pick<DocumentViewNode, 'id' | 'comments'>): string[];
|
|
29
|
+
export declare function formatToonCommentEndnoteLines(node: Pick<DocumentViewNode, 'id' | 'comments'>): string[];
|
|
30
|
+
export declare function formatToonCommentsEndnotesBlock(nodes: readonly Pick<DocumentViewNode, 'id' | 'comments'>[]): string[];
|
|
31
|
+
export declare function renderToon(nodes: DocumentViewNode[], options?: {
|
|
32
|
+
compact?: boolean;
|
|
33
|
+
}): string;
|
|
34
|
+
export declare function renderToonWithCommentEndnotes(nodes: DocumentViewNode[], options?: {
|
|
35
|
+
compact?: boolean;
|
|
36
|
+
}): string;
|
|
37
|
+
//# sourceMappingURL=document_view-toon.d.ts.map
|