docxmlater 10.0.2 → 10.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/constants/legacyCompatFlags.d.ts.map +1 -1
- package/dist/constants/legacyCompatFlags.js.map +1 -1
- package/dist/constants/limits.d.ts +0 -27
- package/dist/constants/limits.d.ts.map +1 -1
- package/dist/constants/limits.js +13 -13
- package/dist/constants/limits.js.map +1 -1
- package/dist/core/Document.d.ts +23 -19
- package/dist/core/Document.d.ts.map +1 -1
- package/dist/core/Document.js +197 -63
- package/dist/core/Document.js.map +1 -1
- package/dist/core/DocumentContent.d.ts.map +1 -1
- package/dist/core/DocumentContent.js.map +1 -1
- package/dist/core/DocumentGenerator.d.ts.map +1 -1
- package/dist/core/DocumentGenerator.js +59 -24
- package/dist/core/DocumentGenerator.js.map +1 -1
- package/dist/core/DocumentIdManager.d.ts.map +1 -1
- package/dist/core/DocumentIdManager.js.map +1 -1
- package/dist/core/DocumentParser.d.ts +6 -6
- package/dist/core/DocumentParser.d.ts.map +1 -1
- package/dist/core/DocumentParser.js +60 -54
- package/dist/core/DocumentParser.js.map +1 -1
- package/dist/core/DocumentValidator.d.ts.map +1 -1
- package/dist/core/DocumentValidator.js.map +1 -1
- package/dist/core/Relationship.d.ts.map +1 -1
- package/dist/core/Relationship.js +1 -1
- package/dist/core/Relationship.js.map +1 -1
- package/dist/core/RelationshipManager.js +3 -3
- package/dist/core/RelationshipManager.js.map +1 -1
- package/dist/elements/AlternateContent.js.map +1 -1
- package/dist/elements/Bookmark.d.ts.map +1 -1
- package/dist/elements/Bookmark.js.map +1 -1
- package/dist/elements/BookmarkManager.d.ts.map +1 -1
- package/dist/elements/BookmarkManager.js.map +1 -1
- package/dist/elements/Comment.js +1 -1
- package/dist/elements/Comment.js.map +1 -1
- package/dist/elements/CommentManager.d.ts.map +1 -1
- package/dist/elements/CommentManager.js +8 -2
- package/dist/elements/CommentManager.js.map +1 -1
- package/dist/elements/CommonTypes.d.ts.map +1 -1
- package/dist/elements/CommonTypes.js +1 -2
- package/dist/elements/CommonTypes.js.map +1 -1
- package/dist/elements/CustomXml.js.map +1 -1
- package/dist/elements/Endnote.d.ts.map +1 -1
- package/dist/elements/Endnote.js.map +1 -1
- package/dist/elements/EndnoteManager.d.ts.map +1 -1
- package/dist/elements/EndnoteManager.js.map +1 -1
- package/dist/elements/Field.d.ts.map +1 -1
- package/dist/elements/Field.js +31 -28
- package/dist/elements/Field.js.map +1 -1
- package/dist/elements/FieldHelpers.d.ts.map +1 -1
- package/dist/elements/FieldHelpers.js +6 -6
- package/dist/elements/FieldHelpers.js.map +1 -1
- package/dist/elements/FontManager.d.ts.map +1 -1
- package/dist/elements/FontManager.js.map +1 -1
- package/dist/elements/Footer.js.map +1 -1
- package/dist/elements/Footnote.d.ts.map +1 -1
- package/dist/elements/Footnote.js.map +1 -1
- package/dist/elements/FootnoteManager.d.ts.map +1 -1
- package/dist/elements/FootnoteManager.js.map +1 -1
- package/dist/elements/Header.js.map +1 -1
- package/dist/elements/HeaderFooterManager.js.map +1 -1
- package/dist/elements/Hyperlink.d.ts.map +1 -1
- package/dist/elements/Hyperlink.js +5 -5
- package/dist/elements/Hyperlink.js.map +1 -1
- package/dist/elements/Image.d.ts +2 -2
- package/dist/elements/Image.d.ts.map +1 -1
- package/dist/elements/Image.js +21 -5
- package/dist/elements/Image.js.map +1 -1
- package/dist/elements/ImageManager.d.ts.map +1 -1
- package/dist/elements/ImageManager.js +2 -2
- package/dist/elements/ImageManager.js.map +1 -1
- package/dist/elements/ImageRun.js.map +1 -1
- package/dist/elements/MathElement.js.map +1 -1
- package/dist/elements/Paragraph.d.ts.map +1 -1
- package/dist/elements/Paragraph.js +128 -117
- package/dist/elements/Paragraph.js.map +1 -1
- package/dist/elements/PreservedElement.js.map +1 -1
- package/dist/elements/PropertyChangeTypes.js.map +1 -1
- package/dist/elements/RangeMarker.js.map +1 -1
- package/dist/elements/Revision.d.ts +1 -0
- package/dist/elements/Revision.d.ts.map +1 -1
- package/dist/elements/Revision.js +44 -5
- package/dist/elements/Revision.js.map +1 -1
- package/dist/elements/RevisionContent.js.map +1 -1
- package/dist/elements/RevisionManager.d.ts.map +1 -1
- package/dist/elements/RevisionManager.js.map +1 -1
- package/dist/elements/Run.d.ts.map +1 -1
- package/dist/elements/Run.js +1 -3
- package/dist/elements/Run.js.map +1 -1
- package/dist/elements/Section.d.ts.map +1 -1
- package/dist/elements/Section.js +127 -118
- package/dist/elements/Section.js.map +1 -1
- package/dist/elements/Shape.d.ts.map +1 -1
- package/dist/elements/Shape.js +21 -0
- package/dist/elements/Shape.js.map +1 -1
- package/dist/elements/StructuredDocumentTag.d.ts.map +1 -1
- package/dist/elements/StructuredDocumentTag.js +20 -8
- package/dist/elements/StructuredDocumentTag.js.map +1 -1
- package/dist/elements/Table.d.ts +2 -2
- package/dist/elements/Table.d.ts.map +1 -1
- package/dist/elements/Table.js +29 -35
- package/dist/elements/Table.js.map +1 -1
- package/dist/elements/TableCell.d.ts +2 -2
- package/dist/elements/TableCell.d.ts.map +1 -1
- package/dist/elements/TableCell.js +63 -67
- package/dist/elements/TableCell.js.map +1 -1
- package/dist/elements/TableGridChange.js.map +1 -1
- package/dist/elements/TableOfContents.d.ts +6 -6
- package/dist/elements/TableOfContents.d.ts.map +1 -1
- package/dist/elements/TableOfContents.js.map +1 -1
- package/dist/elements/TableOfContentsElement.js.map +1 -1
- package/dist/elements/TableRow.d.ts.map +1 -1
- package/dist/elements/TableRow.js +65 -47
- package/dist/elements/TableRow.js.map +1 -1
- package/dist/elements/TextBox.d.ts.map +1 -1
- package/dist/elements/TextBox.js +1 -1
- package/dist/elements/TextBox.js.map +1 -1
- package/dist/formatting/AbstractNumbering.d.ts +1 -1
- package/dist/formatting/AbstractNumbering.d.ts.map +1 -1
- package/dist/formatting/AbstractNumbering.js +11 -11
- package/dist/formatting/AbstractNumbering.js.map +1 -1
- package/dist/formatting/NumberingInstance.d.ts.map +1 -1
- package/dist/formatting/NumberingInstance.js +4 -4
- package/dist/formatting/NumberingInstance.js.map +1 -1
- package/dist/formatting/NumberingLevel.d.ts.map +1 -1
- package/dist/formatting/NumberingLevel.js +26 -26
- package/dist/formatting/NumberingLevel.js.map +1 -1
- package/dist/formatting/NumberingManager.d.ts +1 -1
- package/dist/formatting/NumberingManager.d.ts.map +1 -1
- package/dist/formatting/NumberingManager.js.map +1 -1
- package/dist/formatting/Style.d.ts.map +1 -1
- package/dist/formatting/Style.js +87 -95
- package/dist/formatting/Style.js.map +1 -1
- package/dist/formatting/StylesManager.d.ts +3 -3
- package/dist/formatting/StylesManager.d.ts.map +1 -1
- package/dist/formatting/StylesManager.js.map +1 -1
- package/dist/helpers/CleanupHelper.js.map +1 -1
- package/dist/images/ImageOptimizer.js.map +1 -1
- package/dist/index.js.map +1 -1
- package/dist/managers/DrawingManager.d.ts.map +1 -1
- package/dist/managers/DrawingManager.js.map +1 -1
- package/dist/tracking/DocumentTrackingContext.js.map +1 -1
- package/dist/tracking/TrackingContext.js.map +1 -1
- package/dist/types/compatibility-types.js.map +1 -1
- package/dist/types/formatting.js.map +1 -1
- package/dist/types/list-types.d.ts +4 -4
- package/dist/types/list-types.d.ts.map +1 -1
- package/dist/types/list-types.js.map +1 -1
- package/dist/types/settings-types.js.map +1 -1
- package/dist/types/styleConfig.js.map +1 -1
- package/dist/utils/ChangelogGenerator.d.ts.map +1 -1
- package/dist/utils/ChangelogGenerator.js.map +1 -1
- package/dist/utils/CompatibilityUpgrader.d.ts.map +1 -1
- package/dist/utils/CompatibilityUpgrader.js +7 -7
- package/dist/utils/CompatibilityUpgrader.js.map +1 -1
- package/dist/utils/InMemoryRevisionAcceptor.js +1 -1
- package/dist/utils/InMemoryRevisionAcceptor.js.map +1 -1
- package/dist/utils/MoveOperationHelper.js.map +1 -1
- package/dist/utils/RevisionAwareProcessor.js.map +1 -1
- package/dist/utils/RevisionWalker.js.map +1 -1
- package/dist/utils/SelectiveRevisionAcceptor.js.map +1 -1
- package/dist/utils/ShadingResolver.js +1 -1
- package/dist/utils/ShadingResolver.js.map +1 -1
- package/dist/utils/acceptRevisions.d.ts +0 -28
- package/dist/utils/acceptRevisions.d.ts.map +1 -1
- package/dist/utils/acceptRevisions.js +5 -7
- package/dist/utils/acceptRevisions.js.map +1 -1
- package/dist/utils/cnfStyleDecoder.js +1 -1
- package/dist/utils/cnfStyleDecoder.js.map +1 -1
- package/dist/utils/corruptionDetection.js.map +1 -1
- package/dist/utils/dateFormatting.js.map +1 -1
- package/dist/utils/deepClone.d.ts +0 -1
- package/dist/utils/deepClone.d.ts.map +1 -1
- package/dist/utils/deepClone.js +0 -7
- package/dist/utils/deepClone.js.map +1 -1
- package/dist/utils/diagnostics.d.ts +2 -2
- package/dist/utils/diagnostics.d.ts.map +1 -1
- package/dist/utils/diagnostics.js.map +1 -1
- package/dist/utils/errorHandling.js.map +1 -1
- package/dist/utils/formatting.js.map +1 -1
- package/dist/utils/list-detection.d.ts +2 -2
- package/dist/utils/list-detection.d.ts.map +1 -1
- package/dist/utils/list-detection.js +3 -3
- package/dist/utils/list-detection.js.map +1 -1
- package/dist/utils/logger.d.ts +2 -4
- package/dist/utils/logger.d.ts.map +1 -1
- package/dist/utils/logger.js +0 -2
- package/dist/utils/logger.js.map +1 -1
- package/dist/utils/parsingHelpers.js.map +1 -1
- package/dist/utils/stripTrackedChanges.d.ts +0 -19
- package/dist/utils/stripTrackedChanges.d.ts.map +1 -1
- package/dist/utils/stripTrackedChanges.js +0 -2
- package/dist/utils/stripTrackedChanges.js.map +1 -1
- package/dist/utils/textDiff.js.map +1 -1
- package/dist/utils/units.js.map +1 -1
- package/dist/utils/validation.d.ts.map +1 -1
- package/dist/utils/validation.js.map +1 -1
- package/dist/utils/xmlSanitization.js.map +1 -1
- package/dist/validation/RevisionAutoFixer.js.map +1 -1
- package/dist/validation/RevisionValidator.js.map +1 -1
- package/dist/validation/ValidationRules.js.map +1 -1
- package/dist/validation/index.js.map +1 -1
- package/dist/xml/XMLBuilder.d.ts.map +1 -1
- package/dist/xml/XMLBuilder.js +10 -0
- package/dist/xml/XMLBuilder.js.map +1 -1
- package/dist/xml/XMLParser.d.ts.map +1 -1
- package/dist/xml/XMLParser.js +4 -5
- package/dist/xml/XMLParser.js.map +1 -1
- package/dist/zip/ZipHandler.js.map +1 -1
- package/dist/zip/ZipReader.js.map +1 -1
- package/dist/zip/ZipWriter.js.map +1 -1
- package/dist/zip/errors.js.map +1 -1
- package/dist/zip/types.js.map +1 -1
- package/package.json +34 -4
- package/src/__tests__/helper-methods.test.ts +512 -0
- package/src/constants/legacyCompatFlags.ts +138 -0
- package/src/constants/limits.ts +50 -0
- package/src/core/CLAUDE.md +109 -0
- package/src/core/Document.ts +15569 -0
- package/src/core/DocumentContent.ts +467 -0
- package/src/core/DocumentGenerator.ts +1104 -0
- package/src/core/DocumentIdManager.ts +158 -0
- package/src/core/DocumentParser.ts +10107 -0
- package/src/core/DocumentValidator.ts +372 -0
- package/src/core/Relationship.ts +367 -0
- package/src/core/RelationshipManager.ts +428 -0
- package/src/elements/AlternateContent.ts +42 -0
- package/src/elements/Bookmark.ts +210 -0
- package/src/elements/BookmarkManager.ts +250 -0
- package/src/elements/CLAUDE.md +126 -0
- package/src/elements/Comment.ts +359 -0
- package/src/elements/CommentManager.ts +502 -0
- package/src/elements/CommonTypes.ts +549 -0
- package/src/elements/CustomXml.ts +36 -0
- package/src/elements/Endnote.ts +217 -0
- package/src/elements/EndnoteManager.ts +249 -0
- package/src/elements/Field.ts +1233 -0
- package/src/elements/FieldHelpers.ts +333 -0
- package/src/elements/FontManager.ts +339 -0
- package/src/elements/Footer.ts +269 -0
- package/src/elements/Footnote.ts +217 -0
- package/src/elements/FootnoteManager.ts +249 -0
- package/src/elements/Header.ts +269 -0
- package/src/elements/HeaderFooterManager.ts +219 -0
- package/src/elements/Hyperlink.ts +1146 -0
- package/src/elements/Image.ts +1756 -0
- package/src/elements/ImageManager.ts +432 -0
- package/src/elements/ImageRun.ts +59 -0
- package/src/elements/MathElement.ts +65 -0
- package/src/elements/Paragraph.ts +4227 -0
- package/src/elements/PreservedElement.ts +53 -0
- package/src/elements/PropertyChangeTypes.ts +442 -0
- package/src/elements/RangeMarker.ts +400 -0
- package/src/elements/Revision.ts +1217 -0
- package/src/elements/RevisionContent.ts +73 -0
- package/src/elements/RevisionManager.ts +1070 -0
- package/src/elements/Run.ts +3068 -0
- package/src/elements/Section.ts +1421 -0
- package/src/elements/Shape.ts +873 -0
- package/src/elements/StructuredDocumentTag.ts +978 -0
- package/src/elements/Table.ts +2524 -0
- package/src/elements/TableCell.ts +1586 -0
- package/src/elements/TableGridChange.ts +151 -0
- package/src/elements/TableOfContents.ts +691 -0
- package/src/elements/TableOfContentsElement.ts +89 -0
- package/src/elements/TableRow.ts +906 -0
- package/src/elements/TextBox.ts +768 -0
- package/src/formatting/AbstractNumbering.ts +548 -0
- package/src/formatting/CLAUDE.md +74 -0
- package/src/formatting/NumberingInstance.ts +212 -0
- package/src/formatting/NumberingLevel.ts +1006 -0
- package/src/formatting/NumberingManager.ts +827 -0
- package/src/formatting/Style.ts +1833 -0
- package/src/formatting/StylesManager.ts +1005 -0
- package/src/helpers/CleanupHelper.ts +524 -0
- package/src/images/ImageOptimizer.ts +274 -0
- package/src/index.ts +554 -0
- package/src/managers/CLAUDE.md +47 -0
- package/src/managers/DrawingManager.ts +319 -0
- package/src/tracking/DocumentTrackingContext.ts +643 -0
- package/src/tracking/TrackingContext.ts +173 -0
- package/src/types/compatibility-types.ts +49 -0
- package/src/types/formatting.ts +210 -0
- package/src/types/list-types.ts +152 -0
- package/src/types/settings-types.ts +59 -0
- package/src/types/styleConfig.ts +189 -0
- package/src/utils/CLAUDE.md +153 -0
- package/src/utils/ChangelogGenerator.ts +1581 -0
- package/src/utils/CompatibilityUpgrader.ts +237 -0
- package/src/utils/InMemoryRevisionAcceptor.ts +668 -0
- package/src/utils/MoveOperationHelper.ts +238 -0
- package/src/utils/RevisionAwareProcessor.ts +526 -0
- package/src/utils/RevisionWalker.ts +457 -0
- package/src/utils/SelectiveRevisionAcceptor.ts +613 -0
- package/src/utils/ShadingResolver.ts +107 -0
- package/src/utils/acceptRevisions.ts +714 -0
- package/src/utils/cnfStyleDecoder.ts +217 -0
- package/src/utils/corruptionDetection.ts +345 -0
- package/src/utils/dateFormatting.ts +20 -0
- package/src/utils/deepClone.ts +78 -0
- package/src/utils/diagnostics.ts +129 -0
- package/src/utils/errorHandling.ts +80 -0
- package/src/utils/formatting.ts +213 -0
- package/src/utils/list-detection.ts +274 -0
- package/src/utils/logger.ts +404 -0
- package/src/utils/parsingHelpers.ts +190 -0
- package/src/utils/stripTrackedChanges.ts +353 -0
- package/src/utils/textDiff.ts +100 -0
- package/src/utils/units.ts +421 -0
- package/src/utils/validation.ts +542 -0
- package/src/utils/xmlSanitization.ts +182 -0
- package/src/validation/RevisionAutoFixer.ts +542 -0
- package/src/validation/RevisionValidator.ts +460 -0
- package/src/validation/ValidationRules.ts +338 -0
- package/src/validation/index.ts +30 -0
- package/src/xml/CLAUDE.md +65 -0
- package/src/xml/XMLBuilder.ts +871 -0
- package/src/xml/XMLParser.ts +919 -0
- package/src/zip/CLAUDE.md +55 -0
- package/src/zip/ZipHandler.ts +637 -0
- package/src/zip/ZipReader.ts +299 -0
- package/src/zip/ZipWriter.ts +390 -0
- package/src/zip/errors.ts +69 -0
- package/src/zip/types.ts +116 -0
- package/dist/core/ListNormalizer.d.ts +0 -23
- package/dist/core/ListNormalizer.d.ts.map +0 -1
- package/dist/core/ListNormalizer.js +0 -624
- package/dist/core/ListNormalizer.js.map +0 -1
- package/dist/images/index.d.ts +0 -2
- package/dist/images/index.d.ts.map +0 -1
- package/dist/images/index.js +0 -8
- package/dist/images/index.js.map +0 -1
- package/dist/ms-doc/cfb/CFBReader.d.ts +0 -35
- package/dist/ms-doc/cfb/CFBReader.d.ts.map +0 -1
- package/dist/ms-doc/cfb/CFBReader.js +0 -360
- package/dist/ms-doc/cfb/CFBReader.js.map +0 -1
- package/dist/ms-doc/converter/DocToDocxConverter.d.ts +0 -55
- package/dist/ms-doc/converter/DocToDocxConverter.d.ts.map +0 -1
- package/dist/ms-doc/converter/DocToDocxConverter.js +0 -324
- package/dist/ms-doc/converter/DocToDocxConverter.js.map +0 -1
- package/dist/ms-doc/fib/FIB.d.ts +0 -18
- package/dist/ms-doc/fib/FIB.d.ts.map +0 -1
- package/dist/ms-doc/fib/FIB.js +0 -342
- package/dist/ms-doc/fib/FIB.js.map +0 -1
- package/dist/ms-doc/fields/FieldParser.d.ts +0 -31
- package/dist/ms-doc/fields/FieldParser.d.ts.map +0 -1
- package/dist/ms-doc/fields/FieldParser.js +0 -266
- package/dist/ms-doc/fields/FieldParser.js.map +0 -1
- package/dist/ms-doc/images/PictureExtractor.d.ts +0 -22
- package/dist/ms-doc/images/PictureExtractor.d.ts.map +0 -1
- package/dist/ms-doc/images/PictureExtractor.js +0 -233
- package/dist/ms-doc/images/PictureExtractor.js.map +0 -1
- package/dist/ms-doc/index.d.ts +0 -20
- package/dist/ms-doc/index.d.ts.map +0 -1
- package/dist/ms-doc/index.js +0 -59
- package/dist/ms-doc/index.js.map +0 -1
- package/dist/ms-doc/properties/SPRM.d.ts +0 -210
- package/dist/ms-doc/properties/SPRM.d.ts.map +0 -1
- package/dist/ms-doc/properties/SPRM.js +0 -633
- package/dist/ms-doc/properties/SPRM.js.map +0 -1
- package/dist/ms-doc/sections/SectionParser.d.ts +0 -25
- package/dist/ms-doc/sections/SectionParser.d.ts.map +0 -1
- package/dist/ms-doc/sections/SectionParser.js +0 -214
- package/dist/ms-doc/sections/SectionParser.js.map +0 -1
- package/dist/ms-doc/styles/StyleSheet.d.ts +0 -23
- package/dist/ms-doc/styles/StyleSheet.d.ts.map +0 -1
- package/dist/ms-doc/styles/StyleSheet.js +0 -268
- package/dist/ms-doc/styles/StyleSheet.js.map +0 -1
- package/dist/ms-doc/subdocuments/SubdocumentParser.d.ts +0 -61
- package/dist/ms-doc/subdocuments/SubdocumentParser.d.ts.map +0 -1
- package/dist/ms-doc/subdocuments/SubdocumentParser.js +0 -208
- package/dist/ms-doc/subdocuments/SubdocumentParser.js.map +0 -1
- package/dist/ms-doc/tables/TableParser.d.ts +0 -29
- package/dist/ms-doc/tables/TableParser.d.ts.map +0 -1
- package/dist/ms-doc/tables/TableParser.js +0 -176
- package/dist/ms-doc/tables/TableParser.js.map +0 -1
- package/dist/ms-doc/text/PieceTable.d.ts +0 -21
- package/dist/ms-doc/text/PieceTable.d.ts.map +0 -1
- package/dist/ms-doc/text/PieceTable.js +0 -171
- package/dist/ms-doc/text/PieceTable.js.map +0 -1
- package/dist/ms-doc/types/Constants.d.ts +0 -99
- package/dist/ms-doc/types/Constants.d.ts.map +0 -1
- package/dist/ms-doc/types/Constants.js +0 -102
- package/dist/ms-doc/types/Constants.js.map +0 -1
- package/dist/ms-doc/types/DocTypes.d.ts +0 -368
- package/dist/ms-doc/types/DocTypes.d.ts.map +0 -1
- package/dist/ms-doc/types/DocTypes.js +0 -3
- package/dist/ms-doc/types/DocTypes.js.map +0 -1
- package/dist/tracking/index.d.ts +0 -3
- package/dist/tracking/index.d.ts.map +0 -1
- package/dist/tracking/index.js +0 -6
- package/dist/tracking/index.js.map +0 -1
|
@@ -0,0 +1,542 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validation utilities for DOCX files
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { REQUIRED_DOCX_FILES } from '../zip/types';
|
|
6
|
+
import { MissingRequiredFileError } from '../zip/errors';
|
|
7
|
+
import { defaultLogger } from './logger';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Validates that all required DOCX files are present
|
|
11
|
+
* @param filePaths - Array of file paths in the archive
|
|
12
|
+
* @throws {MissingRequiredFileError} If a required file is missing
|
|
13
|
+
*/
|
|
14
|
+
export function validateDocxStructure(filePaths: string[]): void {
|
|
15
|
+
const fileSet = new Set(filePaths);
|
|
16
|
+
|
|
17
|
+
for (const requiredFile of REQUIRED_DOCX_FILES) {
|
|
18
|
+
if (!fileSet.has(requiredFile)) {
|
|
19
|
+
throw new MissingRequiredFileError(requiredFile);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Checks if a file path represents a binary file based on extension
|
|
26
|
+
* @param filePath - The file path to check
|
|
27
|
+
* @returns True if the file is likely binary
|
|
28
|
+
*/
|
|
29
|
+
export function isBinaryFile(filePath: string): boolean {
|
|
30
|
+
const binaryExtensions = [
|
|
31
|
+
'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.ico',
|
|
32
|
+
'.emf', '.wmf', '.bin', '.dat', '.ttf', '.otf', '.woff',
|
|
33
|
+
];
|
|
34
|
+
|
|
35
|
+
const extension = filePath.substring(filePath.lastIndexOf('.')).toLowerCase();
|
|
36
|
+
return binaryExtensions.includes(extension);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Normalizes a file path for consistent comparisons
|
|
41
|
+
* Converts backslashes to forward slashes and removes leading slashes
|
|
42
|
+
* Also validates against path traversal attacks
|
|
43
|
+
*
|
|
44
|
+
* **Security:** This function validates paths to prevent:
|
|
45
|
+
* - Path traversal attacks (../, ..\, URL-encoded variants)
|
|
46
|
+
* - Absolute paths (C:\, /etc/, etc.)
|
|
47
|
+
* - Malicious DOCX files attempting directory escape
|
|
48
|
+
*
|
|
49
|
+
* @param path - The path to normalize
|
|
50
|
+
* @returns Normalized path
|
|
51
|
+
* @throws {Error} If path contains path traversal sequences, absolute paths, or URL-encoded attacks
|
|
52
|
+
*/
|
|
53
|
+
export function normalizePath(path: string): string {
|
|
54
|
+
// First convert all backslashes to forward slashes for consistent checking
|
|
55
|
+
const normalized = path.replace(/\\/g, '/').replace(/^\/+/, '');
|
|
56
|
+
|
|
57
|
+
// Security: Reject URL-encoded path traversal attempts
|
|
58
|
+
// Attackers might try: %2e%2e%2f (%2e = . and %2f = /)
|
|
59
|
+
if (/%2[eE]|%2[fF]|%5[cC]/.test(path)) {
|
|
60
|
+
throw new Error(
|
|
61
|
+
`Invalid file path: "${path}" contains URL-encoded characters (%2E, %2F, %5C). ` +
|
|
62
|
+
`This could be an attempt to bypass path validation. ` +
|
|
63
|
+
`Only plain characters are allowed in DOCX file paths.`
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Security: Prevent path traversal attacks
|
|
68
|
+
// Check AFTER normalization when all paths use forward slashes
|
|
69
|
+
// This catches: ../, /.., or standalone ".."
|
|
70
|
+
if (normalized.includes('../') || normalized.includes('/..') || normalized === '..') {
|
|
71
|
+
throw new Error(
|
|
72
|
+
`Invalid file path: "${path}" contains path traversal sequence (..). ` +
|
|
73
|
+
`This could be a malicious DOCX file attempting directory traversal. ` +
|
|
74
|
+
`DOCX archives must only contain relative paths within the archive.`
|
|
75
|
+
);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Security: Prevent absolute paths (Windows drive letters)
|
|
79
|
+
// Examples: C:/, C:\, D:, etc.
|
|
80
|
+
if (/^[a-zA-Z]:/.test(normalized)) {
|
|
81
|
+
throw new Error(
|
|
82
|
+
`Invalid file path: "${path}" appears to be an absolute Windows path. ` +
|
|
83
|
+
`Absolute paths are not allowed in DOCX archives. ` +
|
|
84
|
+
`Only relative paths within the archive are permitted.`
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Security: Prevent Unix absolute paths
|
|
89
|
+
// After removing leading slashes, if it starts with / it's suspicious
|
|
90
|
+
if (path.startsWith('/') && normalized.startsWith('/')) {
|
|
91
|
+
throw new Error(
|
|
92
|
+
`Invalid file path: "${path}" appears to be an absolute Unix path. ` +
|
|
93
|
+
`Only relative paths are allowed in DOCX archives.`
|
|
94
|
+
);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return normalized;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Validates that a buffer contains a valid ZIP file signature
|
|
102
|
+
* ZIP files start with the signature 'PK' (0x50 0x4B)
|
|
103
|
+
* @param buffer - The buffer to validate
|
|
104
|
+
* @returns True if the buffer appears to be a ZIP file
|
|
105
|
+
*/
|
|
106
|
+
export function isValidZipBuffer(buffer: Buffer): boolean {
|
|
107
|
+
if (buffer.length < 4) {
|
|
108
|
+
return false;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Check for ZIP signature: PK\x03\x04 or PK\x05\x06 (for empty archives)
|
|
112
|
+
return (
|
|
113
|
+
(buffer[0] === 0x50 && buffer[1] === 0x4B) &&
|
|
114
|
+
((buffer[2] === 0x03 && buffer[3] === 0x04) ||
|
|
115
|
+
(buffer[2] === 0x05 && buffer[3] === 0x06))
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Checks if a string is valid UTF-8 text
|
|
121
|
+
* @param content - The content to check
|
|
122
|
+
* @returns True if the content is valid text
|
|
123
|
+
*/
|
|
124
|
+
export function isTextContent(content: Buffer | string): boolean {
|
|
125
|
+
if (typeof content === 'string') {
|
|
126
|
+
return true;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Try to decode as UTF-8 and check for null bytes
|
|
130
|
+
try {
|
|
131
|
+
const text = content.toString('utf8');
|
|
132
|
+
// Binary files often contain null bytes
|
|
133
|
+
return !text.includes('\0');
|
|
134
|
+
} catch {
|
|
135
|
+
return false;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Validates a twips value (used for spacing, indentation, margins)
|
|
141
|
+
* Twips: 1/20th of a point, 1440 twips = 1 inch
|
|
142
|
+
* Reasonable range: -31680 to 31680 (±22 inches)
|
|
143
|
+
* @param value - The twips value to validate
|
|
144
|
+
* @param fieldName - Name of the field (for error messages)
|
|
145
|
+
* @throws {Error} If the value is invalid
|
|
146
|
+
*/
|
|
147
|
+
export function validateTwips(value: number, fieldName = 'value'): void {
|
|
148
|
+
if (!Number.isFinite(value)) {
|
|
149
|
+
throw new Error(`${fieldName} must be a finite number, got ${value}`);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Reasonable range: ±22 inches (31680 twips)
|
|
153
|
+
const MIN_TWIPS = -31680;
|
|
154
|
+
const MAX_TWIPS = 31680;
|
|
155
|
+
|
|
156
|
+
if (value < MIN_TWIPS || value > MAX_TWIPS) {
|
|
157
|
+
throw new Error(
|
|
158
|
+
`${fieldName} out of range: ${value} twips (allowed: ${MIN_TWIPS} to ${MAX_TWIPS}, ±22 inches)`
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Normalizes a color to uppercase 6-character hex format
|
|
165
|
+
* Accepts 3-character or 6-character hex colors with or without '#' prefix
|
|
166
|
+
* Follows Microsoft Word convention of uppercase hex colors
|
|
167
|
+
*
|
|
168
|
+
* @param color - Color to normalize (e.g., '#F00', 'FF0000', '#FF0000', 'f00')
|
|
169
|
+
* @returns Normalized color (e.g., 'FF0000')
|
|
170
|
+
* @throws Error if color format is invalid
|
|
171
|
+
*
|
|
172
|
+
* @example
|
|
173
|
+
* ```typescript
|
|
174
|
+
* normalizeColor('#F00') // Returns: 'FF0000'
|
|
175
|
+
* normalizeColor('FF0000') // Returns: 'FF0000'
|
|
176
|
+
* normalizeColor('#ff0000') // Returns: 'FF0000'
|
|
177
|
+
* normalizeColor('f00') // Returns: 'FF0000'
|
|
178
|
+
* ```
|
|
179
|
+
*/
|
|
180
|
+
export function normalizeColor(color: string): string {
|
|
181
|
+
const hex = color.replace(/^#/, '');
|
|
182
|
+
|
|
183
|
+
// Validate hex format
|
|
184
|
+
if (!/^[0-9A-Fa-f]{3}$|^[0-9A-Fa-f]{6}$/.test(hex)) {
|
|
185
|
+
throw new Error(
|
|
186
|
+
`Invalid color format: "${color}". Expected 3 or 6-character hex ` +
|
|
187
|
+
`(e.g., "FF0000", "#FF0000", "F00", or "#F00")`
|
|
188
|
+
);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Expand 3-character to 6-character
|
|
192
|
+
if (hex.length === 3) {
|
|
193
|
+
return (hex.charAt(0) + hex.charAt(0) + hex.charAt(1) + hex.charAt(1) + hex.charAt(2) + hex.charAt(2)).toUpperCase();
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return hex.toUpperCase();
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Validates a hexadecimal color value
|
|
201
|
+
* Must be 6 characters (RRGGBB format)
|
|
202
|
+
* @param color - The color hex string to validate (without #)
|
|
203
|
+
* @param fieldName - Name of the field (for error messages)
|
|
204
|
+
* @throws {Error} If the color is invalid
|
|
205
|
+
*/
|
|
206
|
+
export function validateColor(color: string, fieldName = 'color'): void {
|
|
207
|
+
if (typeof color !== 'string') {
|
|
208
|
+
throw new Error(`${fieldName} must be a string, got ${typeof color}`);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// Allow both with and without # prefix
|
|
212
|
+
const cleanColor = color.startsWith('#') ? color.substring(1) : color;
|
|
213
|
+
|
|
214
|
+
if (!/^[0-9A-Fa-f]{6}$/.test(cleanColor)) {
|
|
215
|
+
throw new Error(
|
|
216
|
+
`${fieldName} must be a 6-digit hex color (e.g., 'FF0000' or '#FF0000'), got '${color}'`
|
|
217
|
+
);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Alias for validateColor for backwards compatibility
|
|
223
|
+
*/
|
|
224
|
+
export const validateHexColor = validateColor;
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Validates a numbering ID (must be non-negative integer)
|
|
228
|
+
* @param numId - The numbering ID to validate
|
|
229
|
+
* @param fieldName - Name of the field (for error messages)
|
|
230
|
+
* @throws {Error} If the ID is invalid
|
|
231
|
+
*/
|
|
232
|
+
export function validateNumberingId(numId: number, fieldName = 'numbering ID'): void {
|
|
233
|
+
if (!Number.isInteger(numId)) {
|
|
234
|
+
throw new Error(`${fieldName} must be an integer, got ${numId}`);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if (numId < 0) {
|
|
238
|
+
throw new Error(`${fieldName} must be non-negative, got ${numId}`);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Word supports numbering IDs up to 2147483647
|
|
242
|
+
const MAX_NUM_ID = 2147483647;
|
|
243
|
+
if (numId > MAX_NUM_ID) {
|
|
244
|
+
throw new Error(`${fieldName} exceeds maximum value ${MAX_NUM_ID}, got ${numId}`);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Validates a numbering level (0-8 for Word)
|
|
250
|
+
* @param level - The level to validate
|
|
251
|
+
* @param fieldName - Name of the field (for error messages)
|
|
252
|
+
* @param maxLevel - Maximum allowed level (default 8)
|
|
253
|
+
* @throws {Error} If the level is invalid
|
|
254
|
+
*/
|
|
255
|
+
export function validateLevel(
|
|
256
|
+
level: number,
|
|
257
|
+
fieldName = 'level',
|
|
258
|
+
maxLevel = 8
|
|
259
|
+
): void {
|
|
260
|
+
if (!Number.isInteger(level)) {
|
|
261
|
+
throw new Error(`${fieldName} must be an integer, got ${level}`);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
if (level < 0 || level > maxLevel) {
|
|
265
|
+
throw new Error(`${fieldName} must be between 0 and ${maxLevel}, got ${level}`);
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* Validates an alignment value against allowed values
|
|
271
|
+
* @param alignment - The alignment value to validate
|
|
272
|
+
* @param allowed - Array of allowed alignment values
|
|
273
|
+
* @param fieldName - Name of the field (for error messages)
|
|
274
|
+
* @throws {Error} If the alignment is invalid
|
|
275
|
+
*/
|
|
276
|
+
export function validateAlignment(
|
|
277
|
+
alignment: string,
|
|
278
|
+
allowed: readonly string[],
|
|
279
|
+
fieldName = 'alignment'
|
|
280
|
+
): void {
|
|
281
|
+
if (typeof alignment !== 'string') {
|
|
282
|
+
throw new Error(`${fieldName} must be a string, got ${typeof alignment}`);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
if (!allowed.includes(alignment)) {
|
|
286
|
+
throw new Error(
|
|
287
|
+
`Invalid ${fieldName}: '${alignment}' (allowed: ${allowed.join(', ')})`
|
|
288
|
+
);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Validates a font size (in half-points for Word)
|
|
294
|
+
* Reasonable range: 2-1638 (1-819 points)
|
|
295
|
+
* @param size - The font size in half-points to validate
|
|
296
|
+
* @param fieldName - Name of the field (for error messages)
|
|
297
|
+
* @throws {Error} If the size is invalid
|
|
298
|
+
*/
|
|
299
|
+
export function validateFontSize(size: number, fieldName = 'font size'): void {
|
|
300
|
+
if (!Number.isFinite(size)) {
|
|
301
|
+
throw new Error(`${fieldName} must be a finite number, got ${size}`);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
if (!Number.isInteger(size)) {
|
|
305
|
+
throw new Error(`${fieldName} must be an integer (in half-points), got ${size}`);
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// Reasonable range: 2-1638 half-points (1-819 points)
|
|
309
|
+
const MIN_SIZE = 2;
|
|
310
|
+
const MAX_SIZE = 1638;
|
|
311
|
+
|
|
312
|
+
if (size < MIN_SIZE || size > MAX_SIZE) {
|
|
313
|
+
throw new Error(
|
|
314
|
+
`${fieldName} out of range: ${size} half-points (allowed: ${MIN_SIZE}-${MAX_SIZE}, or ${MIN_SIZE / 2}-${MAX_SIZE / 2} points)`
|
|
315
|
+
);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Validates that a string is not empty
|
|
321
|
+
* @param value - The string to validate
|
|
322
|
+
* @param fieldName - Name of the field (for error messages)
|
|
323
|
+
* @throws {Error} If the string is empty or not a string
|
|
324
|
+
*/
|
|
325
|
+
export function validateNonEmptyString(value: string, fieldName = 'value'): void {
|
|
326
|
+
if (typeof value !== 'string') {
|
|
327
|
+
throw new Error(`${fieldName} must be a string, got ${typeof value}`);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
if (value.trim().length === 0) {
|
|
331
|
+
throw new Error(`${fieldName} cannot be empty`);
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Validates a percentage value (0-100)
|
|
337
|
+
* @param value - The percentage to validate
|
|
338
|
+
* @param fieldName - Name of the field (for error messages)
|
|
339
|
+
* @throws {Error} If the percentage is invalid
|
|
340
|
+
*/
|
|
341
|
+
export function validatePercentage(value: number, fieldName = 'percentage'): void {
|
|
342
|
+
if (!Number.isFinite(value)) {
|
|
343
|
+
throw new Error(`${fieldName} must be a finite number, got ${value}`);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
if (value < 0 || value > 100) {
|
|
347
|
+
throw new Error(`${fieldName} must be between 0 and 100, got ${value}`);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Validates EMUs (English Metric Units) value
|
|
353
|
+
* Used for image dimensions: 914400 EMUs = 1 inch
|
|
354
|
+
* Reasonable range: 0 to 50 million (about 55 inches)
|
|
355
|
+
* @param value - The EMUs value to validate
|
|
356
|
+
* @param fieldName - Name of the field (for error messages)
|
|
357
|
+
* @throws {Error} If the value is invalid
|
|
358
|
+
*/
|
|
359
|
+
export function validateEmus(value: number, fieldName = 'EMUs'): void {
|
|
360
|
+
if (!Number.isFinite(value)) {
|
|
361
|
+
throw new Error(`${fieldName} must be a finite number, got ${value}`);
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
if (!Number.isInteger(value)) {
|
|
365
|
+
throw new Error(`${fieldName} must be an integer, got ${value}`);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
if (value < 0) {
|
|
369
|
+
throw new Error(`${fieldName} must be non-negative, got ${value}`);
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// Reasonable maximum: 50 million EMUs (about 55 inches)
|
|
373
|
+
const MAX_EMUS = 50000000;
|
|
374
|
+
if (value > MAX_EMUS) {
|
|
375
|
+
throw new Error(
|
|
376
|
+
`${fieldName} exceeds maximum ${MAX_EMUS} (about 55 inches), got ${value}`
|
|
377
|
+
);
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* Result of text validation for XML-like content
|
|
383
|
+
*/
|
|
384
|
+
export interface TextValidationResult {
|
|
385
|
+
isValid: boolean;
|
|
386
|
+
hasXmlPatterns: boolean;
|
|
387
|
+
warnings: string[];
|
|
388
|
+
cleanedText?: string;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
/**
|
|
392
|
+
* Detects XML-like patterns in text that might cause display issues
|
|
393
|
+
*
|
|
394
|
+
* This function checks for patterns that look like XML markup which,
|
|
395
|
+
* when properly escaped in XML output, will display as literal text
|
|
396
|
+
* in Word documents rather than being interpreted as markup.
|
|
397
|
+
*
|
|
398
|
+
* @param text - The text to validate
|
|
399
|
+
* @param context - Optional context for better warning messages (e.g., "hyperlink text")
|
|
400
|
+
* @returns Validation result with warnings and optional cleaned text
|
|
401
|
+
*/
|
|
402
|
+
export function detectXmlInText(text: string, context?: string): TextValidationResult {
|
|
403
|
+
const warnings: string[] = [];
|
|
404
|
+
let hasXmlPatterns = false;
|
|
405
|
+
|
|
406
|
+
// Check for common XML element patterns
|
|
407
|
+
const xmlElementPattern = /<\/?w:[^>]+>|<w:[^>]+\/>/g;
|
|
408
|
+
const escapedXmlPattern = /<.*?>|"|'/g;
|
|
409
|
+
|
|
410
|
+
// Check for specific problematic patterns we've seen
|
|
411
|
+
const problematicPatterns = [
|
|
412
|
+
/<w:t\s+xml:space="preserve">/,
|
|
413
|
+
/<w:t\s+xml:space=["']preserve["']>/,
|
|
414
|
+
/<\/w:t>/,
|
|
415
|
+
/<w:t\s+xml:space="preserve">/,
|
|
416
|
+
];
|
|
417
|
+
|
|
418
|
+
// Check for any XML-like tags
|
|
419
|
+
if (xmlElementPattern.test(text)) {
|
|
420
|
+
hasXmlPatterns = true;
|
|
421
|
+
const contextStr = context ? ` in ${context}` : '';
|
|
422
|
+
warnings.push(
|
|
423
|
+
`Text${contextStr} contains XML-like markup: "${text.substring(0, 100)}${text.length > 100 ? '...' : ''}". ` +
|
|
424
|
+
`This will be displayed as literal text in the document. ` +
|
|
425
|
+
`If you intended to add formatting, use the appropriate API methods instead.`
|
|
426
|
+
);
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// Check for already-escaped XML entities
|
|
430
|
+
if (escapedXmlPattern.test(text)) {
|
|
431
|
+
hasXmlPatterns = true;
|
|
432
|
+
const contextStr = context ? ` in ${context}` : '';
|
|
433
|
+
warnings.push(
|
|
434
|
+
`Text${contextStr} contains escaped XML entities (e.g., <, >, "). ` +
|
|
435
|
+
`These will appear as literal characters in the document.`
|
|
436
|
+
);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// Check for specific known problematic patterns
|
|
440
|
+
for (const pattern of problematicPatterns) {
|
|
441
|
+
if (pattern.test(text)) {
|
|
442
|
+
hasXmlPatterns = true;
|
|
443
|
+
const contextStr = context ? ` in ${context}` : '';
|
|
444
|
+
warnings.push(
|
|
445
|
+
`Text${contextStr} contains a known problematic XML pattern that suggests ` +
|
|
446
|
+
`the text may have been corrupted by previous processing.`
|
|
447
|
+
);
|
|
448
|
+
break;
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
return {
|
|
453
|
+
isValid: true, // Text is always "valid" - we just warn about potential issues
|
|
454
|
+
hasXmlPatterns,
|
|
455
|
+
warnings,
|
|
456
|
+
};
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
/**
|
|
460
|
+
* Cleans XML-like patterns from text
|
|
461
|
+
*
|
|
462
|
+
* This function removes or cleans various XML patterns that might
|
|
463
|
+
* appear in text content, typically from corrupted or improperly
|
|
464
|
+
* processed documents.
|
|
465
|
+
*
|
|
466
|
+
* @param text - The text to clean
|
|
467
|
+
* @param aggressive - If true, removes all angle brackets; if false, only removes clear XML tags
|
|
468
|
+
* @returns Cleaned text with XML patterns removed
|
|
469
|
+
*/
|
|
470
|
+
export function cleanXmlFromText(text: string, aggressive = false): string {
|
|
471
|
+
let cleaned = text;
|
|
472
|
+
|
|
473
|
+
// First, unescape any HTML/XML entities
|
|
474
|
+
cleaned = cleaned
|
|
475
|
+
.replace(/</g, '<')
|
|
476
|
+
.replace(/>/g, '>')
|
|
477
|
+
.replace(/"/g, '"')
|
|
478
|
+
.replace(/'/g, "'")
|
|
479
|
+
.replace(/&/g, '&');
|
|
480
|
+
|
|
481
|
+
// Remove specific Word XML patterns
|
|
482
|
+
// This targets patterns like <w:t xml:space="preserve">
|
|
483
|
+
cleaned = cleaned.replace(/<w:[^>]+>/g, '');
|
|
484
|
+
cleaned = cleaned.replace(/<\/w:[^>]+>/g, '');
|
|
485
|
+
|
|
486
|
+
// Remove any remaining XML-like tags if aggressive mode
|
|
487
|
+
if (aggressive) {
|
|
488
|
+
cleaned = cleaned.replace(/<[^>]+>/g, '');
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// Clean up any double spaces left behind
|
|
492
|
+
cleaned = cleaned.replace(/\s+/g, ' ').trim();
|
|
493
|
+
|
|
494
|
+
return cleaned;
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
/**
|
|
498
|
+
* Validates text for use in Run or Hyperlink elements
|
|
499
|
+
*
|
|
500
|
+
* This is the main validation function that should be called when
|
|
501
|
+
* setting text content in Run or Hyperlink elements. It provides
|
|
502
|
+
* warnings about problematic content and optionally cleans the text.
|
|
503
|
+
*
|
|
504
|
+
* @param text - The text to validate
|
|
505
|
+
* @param options - Validation options
|
|
506
|
+
* @returns Validation result with warnings and optionally cleaned text
|
|
507
|
+
*/
|
|
508
|
+
export function validateRunText(
|
|
509
|
+
text: string,
|
|
510
|
+
options: {
|
|
511
|
+
context?: string;
|
|
512
|
+
autoClean?: boolean;
|
|
513
|
+
aggressive?: boolean;
|
|
514
|
+
warnToConsole?: boolean;
|
|
515
|
+
} = {}
|
|
516
|
+
): TextValidationResult {
|
|
517
|
+
const { context, autoClean = false, aggressive = false, warnToConsole = true } = options;
|
|
518
|
+
|
|
519
|
+
// Detect XML patterns
|
|
520
|
+
const result = detectXmlInText(text, context);
|
|
521
|
+
|
|
522
|
+
// If auto-cleaning is enabled and XML patterns were found
|
|
523
|
+
if (autoClean && result.hasXmlPatterns) {
|
|
524
|
+
result.cleanedText = cleanXmlFromText(text, aggressive);
|
|
525
|
+
|
|
526
|
+
// Add a note about cleaning
|
|
527
|
+
result.warnings.push(
|
|
528
|
+
`Text has been automatically cleaned. ` +
|
|
529
|
+
`Original: "${text.substring(0, 50)}${text.length > 50 ? '...' : ''}" ` +
|
|
530
|
+
`Cleaned: "${result.cleanedText.substring(0, 50)}${result.cleanedText.length > 50 ? '...' : ''}"`
|
|
531
|
+
);
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// Log warnings to console in development if requested
|
|
535
|
+
if (warnToConsole && result.warnings.length > 0 && typeof console !== 'undefined') {
|
|
536
|
+
const contextStr = context ? ` [${context}]` : '';
|
|
537
|
+
defaultLogger.warn(`DocXML Text Validation Warning${contextStr}:`);
|
|
538
|
+
result.warnings.forEach(warning => defaultLogger.warn(` - ${warning}`));
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
return result;
|
|
542
|
+
}
|