@usejunior/docx-core 0.9.1 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -21
- package/NOTICE +2 -0
- package/README.md +2 -2
- package/dist/.tsbuildinfo +1 -1
- package/dist/atomizer.d.ts +28 -8
- package/dist/atomizer.d.ts.map +1 -1
- package/dist/atomizer.js +96 -25
- package/dist/atomizer.js.map +1 -1
- package/dist/baselines/atomizer/auxiliaryIdCollision.d.ts +99 -0
- package/dist/baselines/atomizer/auxiliaryIdCollision.d.ts.map +1 -0
- package/dist/baselines/atomizer/auxiliaryIdCollision.js +415 -0
- package/dist/baselines/atomizer/auxiliaryIdCollision.js.map +1 -0
- package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -1
- package/dist/baselines/atomizer/documentReconstructor.js +333 -112
- package/dist/baselines/atomizer/documentReconstructor.js.map +1 -1
- package/dist/baselines/atomizer/formattingFidelity.d.ts +99 -0
- package/dist/baselines/atomizer/formattingFidelity.d.ts.map +1 -0
- package/dist/baselines/atomizer/formattingFidelity.js +449 -0
- package/dist/baselines/atomizer/formattingFidelity.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-bookmarks.d.ts +37 -0
- package/dist/baselines/atomizer/inPlaceModifier-bookmarks.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-bookmarks.js +189 -0
- package/dist/baselines/atomizer/inPlaceModifier-bookmarks.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-containers.d.ts +74 -0
- package/dist/baselines/atomizer/inPlaceModifier-containers.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-containers.js +171 -0
- package/dist/baselines/atomizer/inPlaceModifier-containers.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-deletion.d.ts +88 -0
- package/dist/baselines/atomizer/inPlaceModifier-deletion.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-deletion.js +326 -0
- package/dist/baselines/atomizer/inPlaceModifier-deletion.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-postprocess.d.ts +85 -0
- package/dist/baselines/atomizer/inPlaceModifier-postprocess.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-postprocess.js +402 -0
- package/dist/baselines/atomizer/inPlaceModifier-postprocess.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-presplit.d.ts +39 -0
- package/dist/baselines/atomizer/inPlaceModifier-presplit.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-presplit.js +265 -0
- package/dist/baselines/atomizer/inPlaceModifier-presplit.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-shared.d.ts +62 -0
- package/dist/baselines/atomizer/inPlaceModifier-shared.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-shared.js +139 -0
- package/dist/baselines/atomizer/inPlaceModifier-shared.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-wrappers.d.ts +198 -0
- package/dist/baselines/atomizer/inPlaceModifier-wrappers.d.ts.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier-wrappers.js +475 -0
- package/dist/baselines/atomizer/inPlaceModifier-wrappers.js.map +1 -0
- package/dist/baselines/atomizer/inPlaceModifier.d.ts +6 -290
- package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -1
- package/dist/baselines/atomizer/inPlaceModifier.js +23 -1828
- package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -1
- package/dist/baselines/atomizer/pipeline.d.ts +36 -2
- package/dist/baselines/atomizer/pipeline.d.ts.map +1 -1
- package/dist/baselines/atomizer/pipeline.js +216 -144
- package/dist/baselines/atomizer/pipeline.js.map +1 -1
- package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts.map +1 -1
- package/dist/baselines/atomizer/trackChangesAcceptorAst.js +199 -173
- package/dist/baselines/atomizer/trackChangesAcceptorAst.js.map +1 -1
- package/dist/baselines/wmlcomparer/DotnetCli.d.ts.map +1 -1
- package/dist/baselines/wmlcomparer/DotnetCli.js +7 -0
- package/dist/baselines/wmlcomparer/DotnetCli.js.map +1 -1
- package/dist/cli/compare-two.d.ts.map +1 -1
- package/dist/cli/compare-two.js +3 -1
- package/dist/cli/compare-two.js.map +1 -1
- package/dist/cli/conformance-adapter.d.ts +3 -0
- package/dist/cli/conformance-adapter.d.ts.map +1 -0
- package/dist/cli/conformance-adapter.js +93 -0
- package/dist/cli/conformance-adapter.js.map +1 -0
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +5 -1
- package/dist/cli/index.js.map +1 -1
- package/dist/compare-types.d.ts +197 -0
- package/dist/compare-types.d.ts.map +1 -0
- package/dist/compare-types.js +2 -0
- package/dist/compare-types.js.map +1 -0
- package/dist/core-types.d.ts +5 -1
- package/dist/core-types.d.ts.map +1 -1
- package/dist/core-types.js +5 -1
- package/dist/core-types.js.map +1 -1
- package/dist/footnotes.d.ts +8 -3
- package/dist/footnotes.d.ts.map +1 -1
- package/dist/footnotes.js +8 -3
- package/dist/footnotes.js.map +1 -1
- package/dist/generation/compile.d.ts +21 -0
- package/dist/generation/compile.d.ts.map +1 -0
- package/dist/generation/compile.js +46 -0
- package/dist/generation/compile.js.map +1 -0
- package/dist/generation/context.d.ts +42 -0
- package/dist/generation/context.d.ts.map +1 -0
- package/dist/generation/context.js +65 -0
- package/dist/generation/context.js.map +1 -0
- package/dist/generation/emit/comments-part.d.ts +36 -0
- package/dist/generation/emit/comments-part.d.ts.map +1 -0
- package/dist/generation/emit/comments-part.js +116 -0
- package/dist/generation/emit/comments-part.js.map +1 -0
- package/dist/generation/emit/document-part.d.ts +24 -0
- package/dist/generation/emit/document-part.d.ts.map +1 -0
- package/dist/generation/emit/document-part.js +60 -0
- package/dist/generation/emit/document-part.js.map +1 -0
- package/dist/generation/emit/emit-context.d.ts +26 -0
- package/dist/generation/emit/emit-context.d.ts.map +1 -0
- package/dist/generation/emit/emit-context.js +19 -0
- package/dist/generation/emit/emit-context.js.map +1 -0
- package/dist/generation/emit/header-footer-part.d.ts +23 -0
- package/dist/generation/emit/header-footer-part.d.ts.map +1 -0
- package/dist/generation/emit/header-footer-part.js +57 -0
- package/dist/generation/emit/header-footer-part.js.map +1 -0
- package/dist/generation/emit/numbering-part.d.ts +29 -0
- package/dist/generation/emit/numbering-part.d.ts.map +1 -0
- package/dist/generation/emit/numbering-part.js +100 -0
- package/dist/generation/emit/numbering-part.js.map +1 -0
- package/dist/generation/emit/package-parts.d.ts +24 -0
- package/dist/generation/emit/package-parts.d.ts.map +1 -0
- package/dist/generation/emit/package-parts.js +121 -0
- package/dist/generation/emit/package-parts.js.map +1 -0
- package/dist/generation/emit/paragraph.d.ts +24 -0
- package/dist/generation/emit/paragraph.d.ts.map +1 -0
- package/dist/generation/emit/paragraph.js +63 -0
- package/dist/generation/emit/paragraph.js.map +1 -0
- package/dist/generation/emit/properties.d.ts +34 -0
- package/dist/generation/emit/properties.d.ts.map +1 -0
- package/dist/generation/emit/properties.js +138 -0
- package/dist/generation/emit/properties.js.map +1 -0
- package/dist/generation/emit/run.d.ts +15 -0
- package/dist/generation/emit/run.d.ts.map +1 -0
- package/dist/generation/emit/run.js +71 -0
- package/dist/generation/emit/run.js.map +1 -0
- package/dist/generation/emit/section.d.ts +29 -0
- package/dist/generation/emit/section.d.ts.map +1 -0
- package/dist/generation/emit/section.js +117 -0
- package/dist/generation/emit/section.js.map +1 -0
- package/dist/generation/emit/settings-part.d.ts +13 -0
- package/dist/generation/emit/settings-part.d.ts.map +1 -0
- package/dist/generation/emit/settings-part.js +24 -0
- package/dist/generation/emit/settings-part.js.map +1 -0
- package/dist/generation/emit/styles-part.d.ts +16 -0
- package/dist/generation/emit/styles-part.d.ts.map +1 -0
- package/dist/generation/emit/styles-part.js +80 -0
- package/dist/generation/emit/styles-part.js.map +1 -0
- package/dist/generation/emit/table.d.ts +26 -0
- package/dist/generation/emit/table.d.ts.map +1 -0
- package/dist/generation/emit/table.js +196 -0
- package/dist/generation/emit/table.js.map +1 -0
- package/dist/generation/errors.d.ts +22 -0
- package/dist/generation/errors.d.ts.map +1 -0
- package/dist/generation/errors.js +29 -0
- package/dist/generation/errors.js.map +1 -0
- package/dist/generation/index.d.ts +13 -0
- package/dist/generation/index.d.ts.map +1 -0
- package/dist/generation/index.js +12 -0
- package/dist/generation/index.js.map +1 -0
- package/dist/generation/ordering.d.ts +46 -0
- package/dist/generation/ordering.d.ts.map +1 -0
- package/dist/generation/ordering.js +119 -0
- package/dist/generation/ordering.js.map +1 -0
- package/dist/generation/recipes.d.ts +47 -0
- package/dist/generation/recipes.d.ts.map +1 -0
- package/dist/generation/recipes.js +84 -0
- package/dist/generation/recipes.js.map +1 -0
- package/dist/generation/structural-checks.d.ts +24 -0
- package/dist/generation/structural-checks.d.ts.map +1 -0
- package/dist/generation/structural-checks.js +318 -0
- package/dist/generation/structural-checks.js.map +1 -0
- package/dist/generation/types.d.ts +217 -0
- package/dist/generation/types.d.ts.map +1 -0
- package/dist/generation/types.js +16 -0
- package/dist/generation/types.js.map +1 -0
- package/dist/generation/validate-spec.d.ts +27 -0
- package/dist/generation/validate-spec.d.ts.map +1 -0
- package/dist/generation/validate-spec.js +307 -0
- package/dist/generation/validate-spec.js.map +1 -0
- package/dist/index.d.ts +9 -150
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +14 -0
- package/dist/index.js.map +1 -1
- package/dist/integration/generation-probes.d.ts +15 -0
- package/dist/integration/generation-probes.d.ts.map +1 -0
- package/dist/integration/generation-probes.js +84 -0
- package/dist/integration/generation-probes.js.map +1 -0
- package/dist/integration/libreoffice-oracle.d.ts +49 -0
- package/dist/integration/libreoffice-oracle.d.ts.map +1 -0
- package/dist/integration/libreoffice-oracle.js +290 -0
- package/dist/integration/libreoffice-oracle.js.map +1 -0
- package/dist/integration/synthetic-docx-fixture.d.ts +72 -0
- package/dist/integration/synthetic-docx-fixture.d.ts.map +1 -1
- package/dist/integration/synthetic-docx-fixture.js +131 -4
- package/dist/integration/synthetic-docx-fixture.js.map +1 -1
- package/dist/primitives/accept_changes.d.ts +4 -3
- package/dist/primitives/accept_changes.d.ts.map +1 -1
- package/dist/primitives/accept_changes.js +163 -77
- package/dist/primitives/accept_changes.js.map +1 -1
- package/dist/primitives/comments.d.ts +12 -3
- package/dist/primitives/comments.d.ts.map +1 -1
- package/dist/primitives/comments.js +374 -97
- package/dist/primitives/comments.js.map +1 -1
- package/dist/primitives/content_fingerprint.d.ts +29 -0
- package/dist/primitives/content_fingerprint.d.ts.map +1 -0
- package/dist/primitives/content_fingerprint.js +63 -0
- package/dist/primitives/content_fingerprint.js.map +1 -0
- package/dist/primitives/document.d.ts +94 -15
- package/dist/primitives/document.d.ts.map +1 -1
- package/dist/primitives/document.js +373 -36
- package/dist/primitives/document.js.map +1 -1
- package/dist/primitives/document_view-comments.d.ts +18 -0
- package/dist/primitives/document_view-comments.d.ts.map +1 -0
- package/dist/primitives/document_view-comments.js +160 -0
- package/dist/primitives/document_view-comments.js.map +1 -0
- package/dist/primitives/document_view-headings.d.ts +45 -0
- package/dist/primitives/document_view-headings.d.ts.map +1 -0
- package/dist/primitives/document_view-headings.js +247 -0
- package/dist/primitives/document_view-headings.js.map +1 -0
- package/dist/primitives/document_view-styles.d.ts +11 -0
- package/dist/primitives/document_view-styles.d.ts.map +1 -0
- package/dist/primitives/document_view-styles.js +104 -0
- package/dist/primitives/document_view-styles.js.map +1 -0
- package/dist/primitives/document_view-toon.d.ts +37 -0
- package/dist/primitives/document_view-toon.d.ts.map +1 -0
- package/dist/primitives/document_view-toon.js +199 -0
- package/dist/primitives/document_view-toon.js.map +1 -0
- package/dist/primitives/document_view-types.d.ts +152 -0
- package/dist/primitives/document_view-types.d.ts.map +1 -0
- package/dist/primitives/document_view-types.js +2 -0
- package/dist/primitives/document_view-types.js.map +1 -0
- package/dist/primitives/document_view.d.ts +8 -106
- package/dist/primitives/document_view.d.ts.map +1 -1
- package/dist/primitives/document_view.js +153 -312
- package/dist/primitives/document_view.js.map +1 -1
- package/dist/primitives/dom-helpers.d.ts +9 -0
- package/dist/primitives/dom-helpers.d.ts.map +1 -1
- package/dist/primitives/dom-helpers.js +10 -1
- package/dist/primitives/dom-helpers.js.map +1 -1
- package/dist/primitives/footnotes.d.ts +4 -3
- package/dist/primitives/footnotes.d.ts.map +1 -1
- package/dist/primitives/footnotes.js +232 -44
- package/dist/primitives/footnotes.js.map +1 -1
- package/dist/primitives/formatting_tags.d.ts +7 -0
- package/dist/primitives/formatting_tags.d.ts.map +1 -1
- package/dist/primitives/formatting_tags.js +22 -11
- package/dist/primitives/formatting_tags.js.map +1 -1
- package/dist/primitives/index.d.ts +10 -0
- package/dist/primitives/index.d.ts.map +1 -1
- package/dist/primitives/index.js +9 -0
- package/dist/primitives/index.js.map +1 -1
- package/dist/primitives/layout.d.ts +4 -3
- package/dist/primitives/layout.d.ts.map +1 -1
- package/dist/primitives/layout.js +45 -3
- package/dist/primitives/layout.js.map +1 -1
- package/dist/primitives/merge_runs.d.ts +21 -3
- package/dist/primitives/merge_runs.d.ts.map +1 -1
- package/dist/primitives/merge_runs.js +32 -10
- package/dist/primitives/merge_runs.js.map +1 -1
- package/dist/primitives/minimal_save.d.ts +38 -0
- package/dist/primitives/minimal_save.d.ts.map +1 -0
- package/dist/primitives/minimal_save.js +323 -0
- package/dist/primitives/minimal_save.js.map +1 -0
- package/dist/primitives/namespaces.d.ts +47 -0
- package/dist/primitives/namespaces.d.ts.map +1 -1
- package/dist/primitives/namespaces.js +52 -0
- package/dist/primitives/namespaces.js.map +1 -1
- package/dist/primitives/reject_changes.d.ts +6 -4
- package/dist/primitives/reject_changes.d.ts.map +1 -1
- package/dist/primitives/reject_changes.js +187 -91
- package/dist/primitives/reject_changes.js.map +1 -1
- package/dist/primitives/revision-parts.d.ts +7 -0
- package/dist/primitives/revision-parts.d.ts.map +1 -0
- package/dist/primitives/revision-parts.js +27 -0
- package/dist/primitives/revision-parts.js.map +1 -0
- package/dist/primitives/revision-vocabulary.d.ts +7 -0
- package/dist/primitives/revision-vocabulary.d.ts.map +1 -0
- package/dist/primitives/revision-vocabulary.js +39 -0
- package/dist/primitives/revision-vocabulary.js.map +1 -0
- package/dist/primitives/schema-corpus-capture.d.ts +19 -0
- package/dist/primitives/schema-corpus-capture.d.ts.map +1 -0
- package/dist/primitives/schema-corpus-capture.js +29 -0
- package/dist/primitives/schema-corpus-capture.js.map +1 -0
- package/dist/primitives/sectPrAudit.d.ts +19 -0
- package/dist/primitives/sectPrAudit.d.ts.map +1 -0
- package/dist/primitives/sectPrAudit.js +165 -0
- package/dist/primitives/sectPrAudit.js.map +1 -0
- package/dist/primitives/semantic_tags.d.ts +7 -0
- package/dist/primitives/semantic_tags.d.ts.map +1 -1
- package/dist/primitives/semantic_tags.js +23 -4
- package/dist/primitives/semantic_tags.js.map +1 -1
- package/dist/primitives/serialize_html.d.ts +37 -0
- package/dist/primitives/serialize_html.d.ts.map +1 -0
- package/dist/primitives/serialize_html.js +395 -0
- package/dist/primitives/serialize_html.js.map +1 -0
- package/dist/primitives/serialize_markdown.d.ts +16 -0
- package/dist/primitives/serialize_markdown.d.ts.map +1 -0
- package/dist/primitives/serialize_markdown.js +300 -0
- package/dist/primitives/serialize_markdown.js.map +1 -0
- package/dist/primitives/serialize_plaintext.d.ts +15 -0
- package/dist/primitives/serialize_plaintext.d.ts.map +1 -0
- package/dist/primitives/serialize_plaintext.js +154 -0
- package/dist/primitives/serialize_plaintext.js.map +1 -0
- package/dist/primitives/styles.d.ts +15 -0
- package/dist/primitives/styles.d.ts.map +1 -1
- package/dist/primitives/styles.js +33 -22
- package/dist/primitives/styles.js.map +1 -1
- package/dist/primitives/tables.d.ts.map +1 -1
- package/dist/primitives/tables.js +13 -3
- package/dist/primitives/tables.js.map +1 -1
- package/dist/primitives/text.d.ts +2 -1
- package/dist/primitives/text.d.ts.map +1 -1
- package/dist/primitives/text.js +116 -12
- package/dist/primitives/text.js.map +1 -1
- package/dist/primitives/track-changes-emitter.d.ts +148 -0
- package/dist/primitives/track-changes-emitter.d.ts.map +1 -0
- package/dist/primitives/track-changes-emitter.js +291 -0
- package/dist/primitives/track-changes-emitter.js.map +1 -0
- package/dist/primitives/validate_ai_revisions.d.ts +35 -0
- package/dist/primitives/validate_ai_revisions.d.ts.map +1 -0
- package/dist/primitives/validate_ai_revisions.js +323 -0
- package/dist/primitives/validate_ai_revisions.js.map +1 -0
- package/dist/primitives/xml-helpers.d.ts +29 -0
- package/dist/primitives/xml-helpers.d.ts.map +1 -0
- package/dist/primitives/xml-helpers.js +35 -0
- package/dist/primitives/xml-helpers.js.map +1 -0
- package/dist/primitives/xml.d.ts +5 -0
- package/dist/primitives/xml.d.ts.map +1 -1
- package/dist/primitives/xml.js +5 -0
- package/dist/primitives/xml.js.map +1 -1
- package/dist/primitives/zip.d.ts +1 -0
- package/dist/primitives/zip.d.ts.map +1 -1
- package/dist/primitives/zip.js +21 -3
- package/dist/primitives/zip.js.map +1 -1
- package/dist/shared/field-structure.d.ts +14 -0
- package/dist/shared/field-structure.d.ts.map +1 -0
- package/dist/shared/field-structure.js +166 -0
- package/dist/shared/field-structure.js.map +1 -0
- package/dist/shared/ooxml/namespaces.d.ts +4 -1
- package/dist/shared/ooxml/namespaces.d.ts.map +1 -1
- package/dist/shared/ooxml/namespaces.js +4 -1
- package/dist/shared/ooxml/namespaces.js.map +1 -1
- package/package.json +13 -9
|
@@ -1,22 +1,26 @@
|
|
|
1
1
|
import { OOXML, W } from './namespaces.js';
|
|
2
|
+
import { getAttributeSafe, getFirstChild } from './xml-helpers.js';
|
|
2
3
|
import { getParagraphText, getParagraphRuns } from './text.js';
|
|
3
|
-
import { extractListLabel, stripListLabel
|
|
4
|
+
import { extractListLabel, stripListLabel } from './list_labels.js';
|
|
4
5
|
import { parseNumberingXml, computeListLabelForParagraph } from './numbering.js';
|
|
5
6
|
import { parseStylesXml, extractParagraphFormatting, extractEffectiveRunFormatting } from './styles.js';
|
|
6
7
|
import { HIGHLIGHT_TAG } from './semantic_tags.js';
|
|
7
8
|
import { computeModalBaseline, computeParagraphFontBaseline, emitFormattingTags, mergeAdjacentTags } from './formatting_tags.js';
|
|
8
9
|
import { isReservedFootnote } from './footnotes.js';
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
import { deriveHeading, detectRunInHeader, detectTitleCapsCentered, extractHeaderInfo, suppressSignatureClusters, } from './document_view-headings.js';
|
|
11
|
+
import { discoverStyles, fingerprintKey } from './document_view-styles.js';
|
|
12
|
+
import { findTaggedTextInsertionIndex } from './document_view-comments.js';
|
|
13
|
+
export { discoverStyles } from './document_view-styles.js';
|
|
14
|
+
export { INLINE_COMMENT_MARKER_RUNTIME, TOON_INLINE_TAG_RE, collectInlineCommentMarkers, tokenizeToonInline } from './document_view-comments.js';
|
|
15
|
+
export { collectTableMarkerInfo, formatTableMarker, formatToonCommentEndnoteLines, formatToonCommentLines, formatToonCommentsEndnotesBlock, formatToonDataLine, renderToon, renderToonWithCommentEndnotes, } from './document_view-toon.js';
|
|
12
16
|
function getWAttr(el, localName) {
|
|
13
|
-
return el
|
|
17
|
+
return getAttributeSafe(el, OOXML.W_NS, localName, 'w');
|
|
14
18
|
}
|
|
15
19
|
function runHighlightVal(run) {
|
|
16
|
-
const rPr = run
|
|
20
|
+
const rPr = getFirstChild(run, OOXML.W_NS, W.rPr);
|
|
17
21
|
if (!rPr)
|
|
18
22
|
return null;
|
|
19
|
-
const h = rPr
|
|
23
|
+
const h = getFirstChild(rPr, OOXML.W_NS, W.highlight);
|
|
20
24
|
if (!h)
|
|
21
25
|
return null;
|
|
22
26
|
const v = getWAttr(h, 'val');
|
|
@@ -46,284 +50,6 @@ function emitHighlightTagsFromParagraph(p) {
|
|
|
46
50
|
out.push(`</${HIGHLIGHT_TAG}>`);
|
|
47
51
|
return out.join('');
|
|
48
52
|
}
|
|
49
|
-
function fingerprintKey(fp) {
|
|
50
|
-
// Stable JSON-ish key used for Map lookups.
|
|
51
|
-
return `${fp.list_level}|${fp.left_indent_pt.toFixed(1)}|${fp.first_line_indent_pt.toFixed(1)}|${fp.style_name}|${fp.alignment}`;
|
|
52
|
-
}
|
|
53
|
-
/**
|
|
54
|
-
* v0.3: Compact style fingerprint token.
|
|
55
|
-
* Concatenates style name, list level, alignment, and indentation for token-efficient LLM context.
|
|
56
|
-
* Example: "Normal:L-1:LEFT:I0:H0"
|
|
57
|
-
*/
|
|
58
|
-
function computeFingerprintToken(fp, styleId) {
|
|
59
|
-
const name = styleId || fp.style_name || 'body';
|
|
60
|
-
const level = `L${fp.list_level}`;
|
|
61
|
-
const align = fp.alignment;
|
|
62
|
-
const indent = `I${Math.round(fp.left_indent_pt)}`;
|
|
63
|
-
const hanging = `H${Math.round(fp.first_line_indent_pt)}`;
|
|
64
|
-
return `${name}:${level}:${align}:${indent}:${hanging}`;
|
|
65
|
-
}
|
|
66
|
-
// Pattern-based header detection fallback (ported from Python ingestor._extract_header_info).
|
|
67
|
-
const HEADER_PATTERN = /^([A-Z][^.!?:]*(?:\s+[A-Z][^.!?:]*)*)([.:]?)(?:\s|$)/;
|
|
68
|
-
function extractHeaderInfo(cleanText) {
|
|
69
|
-
if (!cleanText || cleanText.length < 2)
|
|
70
|
-
return { header_text: null, header_style: null };
|
|
71
|
-
if (!/^[A-Z]/.test(cleanText))
|
|
72
|
-
return { header_text: null, header_style: null };
|
|
73
|
-
const stripped = cleanText.trim();
|
|
74
|
-
if (stripped.length <= SHORT_HEADER_MAX_LENGTH) {
|
|
75
|
-
if (stripped.endsWith('.'))
|
|
76
|
-
return { header_text: stripped.slice(0, -1), header_style: 'title_with_period' };
|
|
77
|
-
if (stripped.endsWith(':'))
|
|
78
|
-
return { header_text: stripped.slice(0, -1), header_style: 'title_with_colon' };
|
|
79
|
-
const words = stripped.split(/\s+/);
|
|
80
|
-
if (words.length <= 5)
|
|
81
|
-
return { header_text: stripped, header_style: 'title_bare' };
|
|
82
|
-
return { header_text: null, header_style: null };
|
|
83
|
-
}
|
|
84
|
-
const m = HEADER_PATTERN.exec(stripped);
|
|
85
|
-
if (!m)
|
|
86
|
-
return { header_text: null, header_style: null };
|
|
87
|
-
const headerText = (m[1] ?? '').trim();
|
|
88
|
-
const terminator = m[2] ?? '';
|
|
89
|
-
const remaining = stripped.slice(m[0].length);
|
|
90
|
-
if (!remaining || headerText.length > MAX_HEADER_TEXT_LENGTH)
|
|
91
|
-
return { header_text: null, header_style: null };
|
|
92
|
-
if (terminator === '.')
|
|
93
|
-
return { header_text: headerText, header_style: 'title_with_period' };
|
|
94
|
-
if (terminator === ':')
|
|
95
|
-
return { header_text: headerText, header_style: 'title_with_colon' };
|
|
96
|
-
return { header_text: headerText, header_style: 'title_bare' };
|
|
97
|
-
}
|
|
98
|
-
function detectRunInHeader(params) {
|
|
99
|
-
const { paragraph, paragraphPPr, paragraphStyleId, styles } = params;
|
|
100
|
-
const punct = new Set(['.', ':', '-']);
|
|
101
|
-
// Use visible runs only (field code text stripped in getParagraphRuns()).
|
|
102
|
-
const runs = getParagraphRuns(paragraph);
|
|
103
|
-
if (runs.length === 0)
|
|
104
|
-
return null;
|
|
105
|
-
// Group by run element, preserving order.
|
|
106
|
-
const orderedUniqueRuns = [];
|
|
107
|
-
const seen = new Set();
|
|
108
|
-
for (const tr of runs) {
|
|
109
|
-
if (!seen.has(tr.r)) {
|
|
110
|
-
seen.add(tr.r);
|
|
111
|
-
orderedUniqueRuns.push(tr.r);
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
let headerText = '';
|
|
115
|
-
let formatting = null;
|
|
116
|
-
let headerCharCount = 0;
|
|
117
|
-
for (const r of orderedUniqueRuns) {
|
|
118
|
-
const fmt = extractEffectiveRunFormatting({ run: r, paragraphPPr, paragraphStyleId, styles });
|
|
119
|
-
const isHeaderStyle = fmt.bold || fmt.underline;
|
|
120
|
-
if (!isHeaderStyle)
|
|
121
|
-
break;
|
|
122
|
-
// Accumulate run text.
|
|
123
|
-
const ts = Array.from(r.getElementsByTagNameNS(OOXML.W_NS, W.t));
|
|
124
|
-
for (const t of ts) {
|
|
125
|
-
const tc = t.textContent ?? '';
|
|
126
|
-
headerText += tc;
|
|
127
|
-
headerCharCount += tc.length;
|
|
128
|
-
}
|
|
129
|
-
if (!formatting)
|
|
130
|
-
formatting = { bold: fmt.bold, italic: fmt.italic, underline: fmt.underline };
|
|
131
|
-
}
|
|
132
|
-
const trimmed = headerText.trim();
|
|
133
|
-
if (!trimmed)
|
|
134
|
-
return null;
|
|
135
|
-
if (!punct.has(trimmed[trimmed.length - 1]))
|
|
136
|
-
return null;
|
|
137
|
-
if (!formatting)
|
|
138
|
-
return null;
|
|
139
|
-
return { raw_text: trimmed, formatting, headerCharCount };
|
|
140
|
-
}
|
|
141
|
-
function inferSemanticName(params) {
|
|
142
|
-
const { fp, nodes } = params;
|
|
143
|
-
// Find first label_type if present.
|
|
144
|
-
let labelType = null;
|
|
145
|
-
for (const n of nodes) {
|
|
146
|
-
if (n.list_metadata.label_type) {
|
|
147
|
-
labelType = n.list_metadata.label_type;
|
|
148
|
-
break;
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
const listLevel = fp.list_level;
|
|
152
|
-
if (listLevel >= 0) {
|
|
153
|
-
if (listLevel === 0) {
|
|
154
|
-
if (labelType === LabelType.ARTICLE)
|
|
155
|
-
return { base_id: 'article', display_name: 'Article Heading' };
|
|
156
|
-
if (labelType === LabelType.SECTION)
|
|
157
|
-
return { base_id: 'section', display_name: 'Section Heading' };
|
|
158
|
-
if (labelType === LabelType.ROMAN)
|
|
159
|
-
return { base_id: 'roman_section', display_name: 'Roman Numeral Section' };
|
|
160
|
-
return { base_id: 'top_level', display_name: 'Top-Level List Item' };
|
|
161
|
-
}
|
|
162
|
-
if (listLevel === 1) {
|
|
163
|
-
if (labelType === LabelType.LETTER)
|
|
164
|
-
return { base_id: 'subsection', display_name: 'Subsection (a)/(A)' };
|
|
165
|
-
if (labelType === LabelType.NUMBER)
|
|
166
|
-
return { base_id: 'subsection_number', display_name: 'Numbered Subsection' };
|
|
167
|
-
if (labelType === LabelType.ROMAN)
|
|
168
|
-
return { base_id: 'subsection_roman', display_name: 'Roman Subsection' };
|
|
169
|
-
return { base_id: 'level_1', display_name: `Level ${listLevel} List Item` };
|
|
170
|
-
}
|
|
171
|
-
if (labelType === LabelType.ROMAN)
|
|
172
|
-
return { base_id: `level_${listLevel}_roman`, display_name: `Level ${listLevel} Roman` };
|
|
173
|
-
if (labelType === LabelType.LETTER)
|
|
174
|
-
return { base_id: `level_${listLevel}_letter`, display_name: `Level ${listLevel} Letter` };
|
|
175
|
-
return { base_id: `level_${listLevel}`, display_name: `Level ${listLevel} List Item` };
|
|
176
|
-
}
|
|
177
|
-
// Non-list.
|
|
178
|
-
const styleName = fp.style_name.toLowerCase().replace(/\s+/g, '_');
|
|
179
|
-
if (fp.left_indent_pt > 0)
|
|
180
|
-
return { base_id: 'indent_block', display_name: 'Indented Block' };
|
|
181
|
-
if (styleName.includes('heading') || styleName.includes('title'))
|
|
182
|
-
return { base_id: 'heading', display_name: 'Heading' };
|
|
183
|
-
if (styleName.includes('quote') || styleName.includes('block'))
|
|
184
|
-
return { base_id: 'block_quote', display_name: 'Block Quote' };
|
|
185
|
-
return { base_id: 'body', display_name: 'Body Text' };
|
|
186
|
-
}
|
|
187
|
-
export function discoverStyles(nodes) {
|
|
188
|
-
const groups = new Map();
|
|
189
|
-
for (const n of nodes) {
|
|
190
|
-
const key = fingerprintKey(n.style_fingerprint);
|
|
191
|
-
const g = groups.get(key);
|
|
192
|
-
if (g)
|
|
193
|
-
g.nodes.push(n);
|
|
194
|
-
else
|
|
195
|
-
groups.set(key, { fp: n.style_fingerprint, nodes: [n] });
|
|
196
|
-
}
|
|
197
|
-
const used = {};
|
|
198
|
-
const styles = new Map();
|
|
199
|
-
const fpToStyle = new Map();
|
|
200
|
-
for (const [fpKey, g] of groups.entries()) {
|
|
201
|
-
const { base_id, display_name } = inferSemanticName({ fp: g.fp, nodes: g.nodes });
|
|
202
|
-
let styleId = base_id;
|
|
203
|
-
if (used[base_id] !== undefined) {
|
|
204
|
-
used[base_id] += 1;
|
|
205
|
-
styleId = `${base_id}_${used[base_id]}`;
|
|
206
|
-
}
|
|
207
|
-
else {
|
|
208
|
-
used[base_id] = 0;
|
|
209
|
-
}
|
|
210
|
-
const median = g.nodes[Math.floor(g.nodes.length / 2)];
|
|
211
|
-
const info = {
|
|
212
|
-
style_id: styleId,
|
|
213
|
-
display_name,
|
|
214
|
-
fingerprint: g.fp,
|
|
215
|
-
example_node_id: median.id,
|
|
216
|
-
example_text: median.clean_text.slice(0, STYLE_EXAMPLE_TEXT_PREVIEW_LENGTH),
|
|
217
|
-
count: g.nodes.length,
|
|
218
|
-
dominant_alignment: g.fp.alignment,
|
|
219
|
-
};
|
|
220
|
-
styles.set(styleId, info);
|
|
221
|
-
fpToStyle.set(fpKey, styleId);
|
|
222
|
-
}
|
|
223
|
-
return { styles, fingerprint_to_style: fpToStyle };
|
|
224
|
-
}
|
|
225
|
-
function headerStripFromText(params) {
|
|
226
|
-
// Mirrors Python TOONRenderer header stripping.
|
|
227
|
-
const { header } = params;
|
|
228
|
-
let { text } = params;
|
|
229
|
-
if (!header)
|
|
230
|
-
return text;
|
|
231
|
-
const headerNorm = header.trim().toLowerCase();
|
|
232
|
-
const textLower = text.toLowerCase();
|
|
233
|
-
for (const punct of [':', '.', '-', ';', '']) {
|
|
234
|
-
const testPrefix = `${headerNorm}${punct}`;
|
|
235
|
-
if (textLower.startsWith(testPrefix)) {
|
|
236
|
-
text = text.slice(testPrefix.length).trimStart();
|
|
237
|
-
return text;
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
if (text.startsWith(header)) {
|
|
241
|
-
text = text.slice(header.length).replace(/^[.:\-;]+/, '').trimStart();
|
|
242
|
-
}
|
|
243
|
-
return text;
|
|
244
|
-
}
|
|
245
|
-
/**
|
|
246
|
-
* Format a single toon data line for one DocumentViewNode.
|
|
247
|
-
* Handles table-context-aware style (th/td) and header stripping.
|
|
248
|
-
*/
|
|
249
|
-
export function formatToonDataLine(n, options) {
|
|
250
|
-
let text = n.tagged_text;
|
|
251
|
-
if (n.header)
|
|
252
|
-
text = headerStripFromText({ header: n.header, text });
|
|
253
|
-
let header = n.header;
|
|
254
|
-
if (header && !text) {
|
|
255
|
-
text = header;
|
|
256
|
-
header = '';
|
|
257
|
-
}
|
|
258
|
-
const tc = n.table_context;
|
|
259
|
-
let style;
|
|
260
|
-
if (tc) {
|
|
261
|
-
style = tc.is_header_row
|
|
262
|
-
? `th(${tc.row_index},${tc.col_index})`
|
|
263
|
-
: `td(${tc.row_index},${tc.col_index})`;
|
|
264
|
-
}
|
|
265
|
-
else {
|
|
266
|
-
style = options?.compact
|
|
267
|
-
? computeFingerprintToken(n.style_fingerprint, n.style)
|
|
268
|
-
: n.style;
|
|
269
|
-
}
|
|
270
|
-
return `${n.id} | ${n.list_label} | ${header} | ${style} | ${text}`;
|
|
271
|
-
}
|
|
272
|
-
/**
|
|
273
|
-
* Collect table marker info (dimensions) from nodes for #TABLE markers.
|
|
274
|
-
* Column headers are NOT included in the marker — they appear once in the th() rows.
|
|
275
|
-
*/
|
|
276
|
-
export function collectTableMarkerInfo(nodes) {
|
|
277
|
-
const info = new Map();
|
|
278
|
-
for (const n of nodes) {
|
|
279
|
-
const tc = n.table_context;
|
|
280
|
-
if (!tc)
|
|
281
|
-
continue;
|
|
282
|
-
if (!info.has(tc.table_index)) {
|
|
283
|
-
info.set(tc.table_index, {
|
|
284
|
-
id: tc.table_id,
|
|
285
|
-
totalRows: tc.total_rows,
|
|
286
|
-
totalCols: tc.total_cols,
|
|
287
|
-
});
|
|
288
|
-
}
|
|
289
|
-
}
|
|
290
|
-
return info;
|
|
291
|
-
}
|
|
292
|
-
/**
|
|
293
|
-
* Format a #TABLE marker line from collected table info.
|
|
294
|
-
* Headers are omitted — they appear exactly once in the th(0,N) data rows.
|
|
295
|
-
*/
|
|
296
|
-
export function formatTableMarker(info) {
|
|
297
|
-
return `#TABLE ${info.id} | ${info.totalRows} rows × ${info.totalCols} cols`;
|
|
298
|
-
}
|
|
299
|
-
export function renderToon(nodes, options = {}) {
|
|
300
|
-
const lines = ['#SCHEMA id | list_label | header | style | text'];
|
|
301
|
-
// Pre-scan: collect table marker info for #TABLE lines
|
|
302
|
-
const tableInfo = collectTableMarkerInfo(nodes);
|
|
303
|
-
let currentTableIndex = null;
|
|
304
|
-
for (const n of nodes) {
|
|
305
|
-
const tc = n.table_context;
|
|
306
|
-
const nodeTableIndex = tc ? tc.table_index : null;
|
|
307
|
-
// Close previous table if we left it or moved to a different table
|
|
308
|
-
if (currentTableIndex !== null && nodeTableIndex !== currentTableIndex) {
|
|
309
|
-
lines.push('#END_TABLE');
|
|
310
|
-
currentTableIndex = null;
|
|
311
|
-
}
|
|
312
|
-
// Open new table if entering one
|
|
313
|
-
if (nodeTableIndex !== null && currentTableIndex === null) {
|
|
314
|
-
const info = tableInfo.get(nodeTableIndex);
|
|
315
|
-
if (info)
|
|
316
|
-
lines.push(formatTableMarker(info));
|
|
317
|
-
currentTableIndex = nodeTableIndex;
|
|
318
|
-
}
|
|
319
|
-
lines.push(formatToonDataLine(n, options));
|
|
320
|
-
}
|
|
321
|
-
// Close any open table at end
|
|
322
|
-
if (currentTableIndex !== null) {
|
|
323
|
-
lines.push('#END_TABLE');
|
|
324
|
-
}
|
|
325
|
-
return lines.join('\n');
|
|
326
|
-
}
|
|
327
53
|
export function buildDocumentView(params) {
|
|
328
54
|
const { documentXml, stylesXml, numberingXml, opts } = params;
|
|
329
55
|
const includeSemantic = opts?.include_semantic_tags ?? true;
|
|
@@ -334,7 +60,7 @@ export function buildDocumentView(params) {
|
|
|
334
60
|
void numberingModel;
|
|
335
61
|
const counters = new Map();
|
|
336
62
|
void counters;
|
|
337
|
-
const body = documentXml
|
|
63
|
+
const body = getFirstChild(documentXml, OOXML.W_NS, W.body);
|
|
338
64
|
if (!body)
|
|
339
65
|
return { nodes: [], styles: { styles: new Map(), fingerprint_to_style: new Map() } };
|
|
340
66
|
const paragraphs = Array.from(body.getElementsByTagNameNS(OOXML.W_NS, W.p));
|
|
@@ -357,9 +83,7 @@ function resolveRunHyperlinkUrl(runEl, relsMap) {
|
|
|
357
83
|
if (!parent || parent.localName !== W.hyperlink)
|
|
358
84
|
return null;
|
|
359
85
|
// r:id attribute can be namespaced or prefixed.
|
|
360
|
-
const rId = parent
|
|
361
|
-
parent.getAttribute('r:id') ??
|
|
362
|
-
null;
|
|
86
|
+
const rId = getAttributeSafe(parent, OOXML.R_NS, 'id', 'r', { bareFallback: false });
|
|
363
87
|
if (!rId)
|
|
364
88
|
return null;
|
|
365
89
|
return relsMap.get(rId) ?? null;
|
|
@@ -439,9 +163,11 @@ function buildFootnoteDisplayMap(documentXml, footnotesXml) {
|
|
|
439
163
|
return map;
|
|
440
164
|
}
|
|
441
165
|
/**
|
|
442
|
-
* Compute footnote
|
|
443
|
-
*
|
|
444
|
-
*
|
|
166
|
+
* Compute the footnote references a paragraph visibly anchors, in document
|
|
167
|
+
* order. This is the single derivation of "which footnotes does this paragraph
|
|
168
|
+
* reference, and with what display number" — the view injects [^N] markers
|
|
169
|
+
* from it AND exposes it as DocumentViewNode.footnote_refs so consumers
|
|
170
|
+
* (read_file's clean_text suffix) never re-walk the DOM. @see #393
|
|
445
171
|
*
|
|
446
172
|
* Self-contained: only inspects the paragraph DOM for w:footnoteReference
|
|
447
173
|
* elements. Does NOT modify getParagraphRuns or getParagraphText.
|
|
@@ -500,28 +226,85 @@ function getFootnoteMarkersForParagraph(p, displayMap) {
|
|
|
500
226
|
if (displayNum != null) {
|
|
501
227
|
markers.push({
|
|
502
228
|
offset: visibleOffset + runVisibleLen,
|
|
503
|
-
|
|
229
|
+
id: footnoteId,
|
|
230
|
+
display: displayNum,
|
|
504
231
|
});
|
|
505
232
|
}
|
|
506
233
|
}
|
|
507
234
|
visibleOffset += runVisibleLen;
|
|
508
235
|
}
|
|
509
|
-
// Sort descending by offset for safe right-to-left insertion
|
|
510
|
-
markers.sort((a, b) => b.offset - a.offset);
|
|
511
236
|
return markers;
|
|
512
237
|
}
|
|
513
238
|
/**
|
|
514
|
-
*
|
|
515
|
-
*
|
|
239
|
+
* Paragraph content that makes a text-empty paragraph meaningful on its own:
|
|
240
|
+
* an endnote or comment anchored to the paragraph (the comment range markers
|
|
241
|
+
* are what `getComments` resolves `anchored_paragraph_id`/`end_paragraph_id`
|
|
242
|
+
* from, so dropping their paragraph leaves a dangling anchor ID no node_ids
|
|
243
|
+
* probe can resolve), or embedded visual content (DrawingML drawing, VML
|
|
244
|
+
* picture, embedded object). Dropping such a paragraph from the document view
|
|
245
|
+
* severs the anchored note/comment from every read surface and silently
|
|
246
|
+
* hides images.
|
|
247
|
+
*
|
|
248
|
+
* Footnote references are handled separately via the display map so their
|
|
249
|
+
* [^N] markers render; the shapes here only need the node to exist.
|
|
250
|
+
* @see #383
|
|
251
|
+
*/
|
|
252
|
+
const ANCHORING_CONTENT = [
|
|
253
|
+
W.endnoteReference,
|
|
254
|
+
W.commentReference,
|
|
255
|
+
W.commentRangeStart,
|
|
256
|
+
W.commentRangeEnd,
|
|
257
|
+
W.drawing,
|
|
258
|
+
W.pict,
|
|
259
|
+
W.object,
|
|
260
|
+
];
|
|
261
|
+
/**
|
|
262
|
+
* True when `el` sits inside a `w:del` or `w:moveFrom` revision wrapper below
|
|
263
|
+
* the paragraph. Deleted/moved-from content is invisible to the view's text
|
|
264
|
+
* extraction (`getParagraphText` reads `w:t`, never `w:delText`), so an
|
|
265
|
+
* anchor that only survives inside a tracked deletion — e.g. the
|
|
266
|
+
* `w:commentReference` a tracked comment-delete leaves under `w:del` — must
|
|
267
|
+
* not resurrect its paragraph as a blank visible node.
|
|
268
|
+
*/
|
|
269
|
+
function isInsideRemovedRevisionWrapper(el, paragraph) {
|
|
270
|
+
let cur = el.parentNode;
|
|
271
|
+
while (cur && cur !== paragraph) {
|
|
272
|
+
if (cur.namespaceURI === OOXML.W_NS && (cur.localName === W.del || cur.localName === W.moveFrom)) {
|
|
273
|
+
return true;
|
|
274
|
+
}
|
|
275
|
+
cur = cur.parentNode;
|
|
276
|
+
}
|
|
277
|
+
return false;
|
|
278
|
+
}
|
|
279
|
+
function paragraphHasAnchoringContent(p) {
|
|
280
|
+
return ANCHORING_CONTENT.some((localName) => {
|
|
281
|
+
const els = p.getElementsByTagNameNS(OOXML.W_NS, localName);
|
|
282
|
+
for (let i = 0; i < els.length; i++) {
|
|
283
|
+
if (!isInsideRemovedRevisionWrapper(els.item(i), p))
|
|
284
|
+
return true;
|
|
285
|
+
}
|
|
286
|
+
return false;
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
/**
|
|
290
|
+
* Inject [^N] footnote markers into a text string at the given offsets.
|
|
291
|
+
* Markers arrive in document order; insertion happens right-to-left (offset
|
|
292
|
+
* descending) so earlier offsets stay valid as text grows.
|
|
293
|
+
*
|
|
294
|
+
* Offsets are *visible*-character offsets (they count document text, not the inline
|
|
295
|
+
* formatting tags emitted by `emitFormattingTags`). When `text` carries formatting tags
|
|
296
|
+
* we therefore map each visible offset to a tag-aware insertion index, exactly as the
|
|
297
|
+
* comment-marker path does (`findTaggedTextInsertionIndex`). A naive `slice(offset)` would
|
|
298
|
+
* land the `[^n]` marker inside a tag or mid-word once formatting is present.
|
|
516
299
|
*/
|
|
517
300
|
function injectFootnoteMarkers(text, markers) {
|
|
518
301
|
if (markers.length === 0)
|
|
519
302
|
return text;
|
|
303
|
+
const descending = [...markers].sort((a, b) => b.offset - a.offset);
|
|
520
304
|
let result = text;
|
|
521
|
-
for (const { offset,
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
result = result.slice(0, pos) + marker + result.slice(pos);
|
|
305
|
+
for (const { offset, display } of descending) {
|
|
306
|
+
const insertionIndex = findTaggedTextInsertionIndex(result, offset);
|
|
307
|
+
result = result.slice(0, insertionIndex) + `[^${display}]` + result.slice(insertionIndex);
|
|
525
308
|
}
|
|
526
309
|
return result;
|
|
527
310
|
}
|
|
@@ -544,7 +327,7 @@ export function buildNodesForDocumentView(params) {
|
|
|
544
327
|
const allBodyRuns = [];
|
|
545
328
|
if (showFormatting) {
|
|
546
329
|
for (const { p } of paragraphs) {
|
|
547
|
-
const paraPPr = p
|
|
330
|
+
const paraPPr = getFirstChild(p, OOXML.W_NS, W.pPr);
|
|
548
331
|
const paraFmt = extractParagraphFormatting(paraPPr ?? null, stylesModel);
|
|
549
332
|
const runs = buildAnnotatedRuns({
|
|
550
333
|
p,
|
|
@@ -593,20 +376,34 @@ export function buildNodesForDocumentView(params) {
|
|
|
593
376
|
const nodes = [];
|
|
594
377
|
for (let idx = 0; idx < paragraphs.length; idx++) {
|
|
595
378
|
const { id, p, tableContext } = paragraphs[idx];
|
|
596
|
-
const paraPPr = p
|
|
379
|
+
const paraPPr = getFirstChild(p, OOXML.W_NS, W.pPr);
|
|
597
380
|
const paraFmt = extractParagraphFormatting(paraPPr ?? null, stylesModel);
|
|
598
381
|
// Visible clean text (field codes stripped).
|
|
599
382
|
const fullText = getParagraphText(p).replace(/\r/g, '').replace(/\n/g, '').trim();
|
|
600
|
-
//
|
|
601
|
-
|
|
383
|
+
// Computed once per paragraph: gates the empty-paragraph skip below, drives
|
|
384
|
+
// the [^N] marker injection, and is exposed as node.footnote_refs.
|
|
385
|
+
const fnMarkers = getFootnoteMarkersForParagraph(p, footnoteDisplayMap);
|
|
386
|
+
// Preserve empty table cell paragraphs for structural completeness, and
|
|
387
|
+
// text-empty paragraphs that carry anchoring content — a visible footnote
|
|
388
|
+
// reference (its [^N] marker renders via the injection pass below), an
|
|
389
|
+
// endnote reference, a comment reference or comment range marker, or an
|
|
390
|
+
// embedded drawing/picture/object. Dropping those loses the anchored
|
|
391
|
+
// note/comment/image from every rendering of the document view. Anchors
|
|
392
|
+
// that survive only inside a tracked deletion don't count, and paragraphs
|
|
393
|
+
// that are empty for spacing only are still skipped.
|
|
394
|
+
// @see #185, #383
|
|
395
|
+
if (!fullText &&
|
|
396
|
+
!tableContext &&
|
|
397
|
+
fnMarkers.length === 0 &&
|
|
398
|
+
!paragraphHasAnchoringContent(p))
|
|
602
399
|
continue;
|
|
603
400
|
// Numbering (auto-numbered) info from numPr.
|
|
604
401
|
let numId = null;
|
|
605
402
|
let ilvl = null;
|
|
606
|
-
const numPr = paraPPr ? paraPPr
|
|
403
|
+
const numPr = paraPPr ? getFirstChild(paraPPr, OOXML.W_NS, W.numPr) : null;
|
|
607
404
|
if (numPr) {
|
|
608
|
-
const numIdEl = numPr
|
|
609
|
-
const ilvlEl = numPr
|
|
405
|
+
const numIdEl = getFirstChild(numPr, OOXML.W_NS, W.numId);
|
|
406
|
+
const ilvlEl = getFirstChild(numPr, OOXML.W_NS, W.ilvl);
|
|
610
407
|
const numIdVal = numIdEl ? getWAttr(numIdEl, 'val') : null;
|
|
611
408
|
const ilvlVal = ilvlEl ? getWAttr(ilvlEl, 'val') : null;
|
|
612
409
|
if (numIdVal)
|
|
@@ -649,7 +446,13 @@ export function buildNodesForDocumentView(params) {
|
|
|
649
446
|
let headerFormatting = null;
|
|
650
447
|
let headerCharCount = 0;
|
|
651
448
|
try {
|
|
652
|
-
|
|
449
|
+
// Skip in-table run-in header detection — table cells are key/value
|
|
450
|
+
// layout and a bold prefix is a label, not a section heading.
|
|
451
|
+
// Mirrors the !tableContext gates on detectTitleCapsCentered and
|
|
452
|
+
// extractHeaderInfo below.
|
|
453
|
+
const hdr = tableContext
|
|
454
|
+
? null
|
|
455
|
+
: detectRunInHeader({ paragraph: p, paragraphPPr: paraPPr ?? null, paragraphStyleId: paraFmt.styleId, styles: stylesModel });
|
|
653
456
|
if (hdr) {
|
|
654
457
|
headerText = hdr.raw_text.replace(/[.:\-]+$/g, '');
|
|
655
458
|
headerStyle = 'run_in_header';
|
|
@@ -660,11 +463,39 @@ export function buildNodesForDocumentView(params) {
|
|
|
660
463
|
catch {
|
|
661
464
|
// ignore
|
|
662
465
|
}
|
|
663
|
-
|
|
466
|
+
// Centered ALL-CAPS bold standalone titles (e.g. an NVCA SPA's
|
|
467
|
+
// `SERIES […] PREFERRED STOCK PURCHASE AGREEMENT`). Runs before
|
|
468
|
+
// extractHeaderInfo so the documented precedence (title_caps_centered
|
|
469
|
+
// outranks short standalone title_bare/title_with_period/title_with_colon)
|
|
470
|
+
// matches the implementation. Only fires when run_in_header did not match
|
|
471
|
+
// AND the paragraph has no list label AND is not in a table cell. The
|
|
472
|
+
// try/catch is defensive against malformed XML in user documents.
|
|
473
|
+
if (!headerText && !labelString && !tableContext) {
|
|
474
|
+
try {
|
|
475
|
+
const titleHdr = detectTitleCapsCentered({
|
|
476
|
+
paragraph: p,
|
|
477
|
+
paragraphPPr: paraPPr ?? null,
|
|
478
|
+
paragraphStyleId: paraFmt.styleId,
|
|
479
|
+
alignment: paraFmt.alignment,
|
|
480
|
+
cleanTextNoLabel,
|
|
481
|
+
styles: stylesModel,
|
|
482
|
+
});
|
|
483
|
+
if (titleHdr) {
|
|
484
|
+
headerText = titleHdr.raw_text;
|
|
485
|
+
headerStyle = 'title_caps_centered';
|
|
486
|
+
headerFormatting = titleHdr.formatting;
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
catch {
|
|
490
|
+
// ignore: malformed run/style data falls through to extractHeaderInfo.
|
|
491
|
+
}
|
|
492
|
+
}
|
|
493
|
+
if (!headerText && !tableContext) {
|
|
664
494
|
const fallback = extractHeaderInfo(cleanTextNoLabel);
|
|
665
495
|
headerText = fallback.header_text;
|
|
666
496
|
headerStyle = fallback.header_style;
|
|
667
497
|
}
|
|
498
|
+
const heading = deriveHeading(paraFmt.styleId, cleanTextNoLabel, headerText, headerStyle, tableContext != null);
|
|
668
499
|
// ── Tag emission ──
|
|
669
500
|
let tagged = cleanTextNoLabel;
|
|
670
501
|
if (showFormatting) {
|
|
@@ -721,7 +552,7 @@ export function buildNodesForDocumentView(params) {
|
|
|
721
552
|
}
|
|
722
553
|
// Emit formatting tags from run-level metadata.
|
|
723
554
|
const paraFontBaseline = computeParagraphFontBaseline(bodyRuns, { formattingMode });
|
|
724
|
-
tagged = emitFormattingTags({ runs: bodyRuns, baseline: docBaseline, fontBaseline: paraFontBaseline });
|
|
555
|
+
tagged = emitFormattingTags({ runs: bodyRuns, baseline: docBaseline, fontBaseline: paraFontBaseline, formattingMode });
|
|
725
556
|
tagged = mergeAdjacentTags(tagged);
|
|
726
557
|
}
|
|
727
558
|
else if (includeSemantic) {
|
|
@@ -774,10 +605,13 @@ export function buildNodesForDocumentView(params) {
|
|
|
774
605
|
bodyFmt = null;
|
|
775
606
|
}
|
|
776
607
|
// Inject footnote [^N] markers into view text (view-only, not shared text primitives)
|
|
777
|
-
const fnMarkers = getFootnoteMarkersForParagraph(p, footnoteDisplayMap);
|
|
778
608
|
if (fnMarkers.length > 0) {
|
|
779
609
|
tagged = injectFootnoteMarkers(tagged, fnMarkers);
|
|
780
610
|
}
|
|
611
|
+
// Visible characters stripped from the raw paragraph head when extracting a manual
|
|
612
|
+
// label (label text + trailing whitespace). Auto-numbered paragraphs leave fullText
|
|
613
|
+
// intact, so this is 0 for them.
|
|
614
|
+
const visibleOffsetCorrection = isAutoNumbered ? 0 : Math.max(0, fullText.length - cleanTextNoLabel.length);
|
|
781
615
|
const node = {
|
|
782
616
|
id,
|
|
783
617
|
list_label: labelString,
|
|
@@ -786,6 +620,7 @@ export function buildNodesForDocumentView(params) {
|
|
|
786
620
|
text: tagged, // filled after header stripping at render time
|
|
787
621
|
clean_text: cleanTextNoLabel,
|
|
788
622
|
tagged_text: tagged,
|
|
623
|
+
visible_offset_correction: visibleOffsetCorrection > 0 ? visibleOffsetCorrection : undefined,
|
|
789
624
|
list_metadata: {
|
|
790
625
|
list_level: listLevel,
|
|
791
626
|
label_type: labelType,
|
|
@@ -804,10 +639,16 @@ export function buildNodesForDocumentView(params) {
|
|
|
804
639
|
header_formatting: headerFormatting,
|
|
805
640
|
body_run_formatting: bodyFmt,
|
|
806
641
|
};
|
|
642
|
+
if (heading)
|
|
643
|
+
node.heading = heading;
|
|
644
|
+
if (fnMarkers.length > 0) {
|
|
645
|
+
node.footnote_refs = fnMarkers.map(({ id: fnId, display }) => ({ id: fnId, display }));
|
|
646
|
+
}
|
|
807
647
|
if (tableContext)
|
|
808
648
|
node.table_context = tableContext;
|
|
809
649
|
nodes.push(node);
|
|
810
650
|
}
|
|
651
|
+
suppressSignatureClusters(nodes);
|
|
811
652
|
const styles = discoverStyles(nodes);
|
|
812
653
|
for (const n of nodes) {
|
|
813
654
|
const sid = styles.fingerprint_to_style.get(fingerprintKey(n.style_fingerprint));
|