docxmlater 10.1.3 → 10.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +759 -754
- package/dist/constants/legacyCompatFlags.js +1 -1
- package/dist/constants/legacyCompatFlags.js.map +1 -1
- package/dist/constants/limits.js.map +1 -1
- package/dist/core/Document.d.ts +50 -50
- package/dist/core/Document.d.ts.map +1 -1
- package/dist/core/Document.js +483 -471
- package/dist/core/Document.js.map +1 -1
- package/dist/core/DocumentContent.d.ts +9 -9
- package/dist/core/DocumentContent.d.ts.map +1 -1
- package/dist/core/DocumentContent.js +1 -1
- package/dist/core/DocumentContent.js.map +1 -1
- package/dist/core/DocumentGenerator.d.ts +11 -11
- package/dist/core/DocumentGenerator.d.ts.map +1 -1
- package/dist/core/DocumentGenerator.js +251 -251
- package/dist/core/DocumentGenerator.js.map +1 -1
- package/dist/core/DocumentIdManager.js.map +1 -1
- package/dist/core/DocumentParser.d.ts +15 -15
- package/dist/core/DocumentParser.d.ts.map +1 -1
- package/dist/core/DocumentParser.js +2123 -2155
- package/dist/core/DocumentParser.js.map +1 -1
- package/dist/core/DocumentValidator.d.ts.map +1 -1
- package/dist/core/DocumentValidator.js +2 -5
- package/dist/core/DocumentValidator.js.map +1 -1
- package/dist/core/Relationship.js.map +1 -1
- package/dist/core/RelationshipManager.d.ts.map +1 -1
- package/dist/core/RelationshipManager.js +3 -3
- package/dist/core/RelationshipManager.js.map +1 -1
- package/dist/elements/AlternateContent.js.map +1 -1
- package/dist/elements/Bookmark.d.ts.map +1 -1
- package/dist/elements/Bookmark.js +3 -1
- package/dist/elements/Bookmark.js.map +1 -1
- package/dist/elements/BookmarkManager.d.ts.map +1 -1
- package/dist/elements/BookmarkManager.js.map +1 -1
- package/dist/elements/Comment.d.ts.map +1 -1
- package/dist/elements/Comment.js +9 -6
- package/dist/elements/Comment.js.map +1 -1
- package/dist/elements/CommentManager.d.ts.map +1 -1
- package/dist/elements/CommentManager.js +18 -17
- package/dist/elements/CommentManager.js.map +1 -1
- package/dist/elements/CommonTypes.d.ts +21 -21
- package/dist/elements/CommonTypes.d.ts.map +1 -1
- package/dist/elements/CommonTypes.js +56 -56
- package/dist/elements/CommonTypes.js.map +1 -1
- package/dist/elements/CustomXml.js.map +1 -1
- package/dist/elements/Endnote.d.ts.map +1 -1
- package/dist/elements/Endnote.js +6 -6
- package/dist/elements/Endnote.js.map +1 -1
- package/dist/elements/EndnoteManager.d.ts.map +1 -1
- package/dist/elements/EndnoteManager.js +6 -7
- package/dist/elements/EndnoteManager.js.map +1 -1
- package/dist/elements/Field.d.ts.map +1 -1
- package/dist/elements/Field.js +82 -25
- package/dist/elements/Field.js.map +1 -1
- package/dist/elements/FieldHelpers.d.ts.map +1 -1
- package/dist/elements/FieldHelpers.js.map +1 -1
- package/dist/elements/FontManager.d.ts.map +1 -1
- package/dist/elements/FontManager.js +1 -1
- package/dist/elements/FontManager.js.map +1 -1
- package/dist/elements/Footer.js +2 -2
- package/dist/elements/Footer.js.map +1 -1
- package/dist/elements/Footnote.d.ts.map +1 -1
- package/dist/elements/Footnote.js +6 -6
- package/dist/elements/Footnote.js.map +1 -1
- package/dist/elements/FootnoteManager.d.ts.map +1 -1
- package/dist/elements/FootnoteManager.js +6 -7
- package/dist/elements/FootnoteManager.js.map +1 -1
- package/dist/elements/Header.js +2 -2
- package/dist/elements/Header.js.map +1 -1
- package/dist/elements/HeaderFooterManager.js.map +1 -1
- package/dist/elements/Hyperlink.d.ts +5 -3
- package/dist/elements/Hyperlink.d.ts.map +1 -1
- package/dist/elements/Hyperlink.js +134 -76
- package/dist/elements/Hyperlink.js.map +1 -1
- package/dist/elements/Image.d.ts.map +1 -1
- package/dist/elements/Image.js +238 -106
- package/dist/elements/Image.js.map +1 -1
- package/dist/elements/ImageManager.d.ts.map +1 -1
- package/dist/elements/ImageManager.js +1 -1
- package/dist/elements/ImageManager.js.map +1 -1
- package/dist/elements/ImageRun.js +1 -1
- package/dist/elements/ImageRun.js.map +1 -1
- package/dist/elements/MathElement.js.map +1 -1
- package/dist/elements/Paragraph.d.ts +24 -24
- package/dist/elements/Paragraph.d.ts.map +1 -1
- package/dist/elements/Paragraph.js +181 -188
- package/dist/elements/Paragraph.js.map +1 -1
- package/dist/elements/PreservedElement.js.map +1 -1
- package/dist/elements/PropertyChangeTypes.d.ts.map +1 -1
- package/dist/elements/PropertyChangeTypes.js +6 -6
- package/dist/elements/PropertyChangeTypes.js.map +1 -1
- package/dist/elements/RangeMarker.d.ts.map +1 -1
- package/dist/elements/RangeMarker.js.map +1 -1
- package/dist/elements/Revision.d.ts.map +1 -1
- package/dist/elements/Revision.js +4 -5
- package/dist/elements/Revision.js.map +1 -1
- package/dist/elements/RevisionContent.js.map +1 -1
- package/dist/elements/RevisionManager.d.ts.map +1 -1
- package/dist/elements/RevisionManager.js +40 -48
- package/dist/elements/RevisionManager.js.map +1 -1
- package/dist/elements/Run.d.ts +16 -16
- package/dist/elements/Run.d.ts.map +1 -1
- package/dist/elements/Run.js +256 -238
- package/dist/elements/Run.js.map +1 -1
- package/dist/elements/Section.d.ts.map +1 -1
- package/dist/elements/Section.js +36 -11
- package/dist/elements/Section.js.map +1 -1
- package/dist/elements/Shape.d.ts.map +1 -1
- package/dist/elements/Shape.js.map +1 -1
- package/dist/elements/StructuredDocumentTag.d.ts +6 -6
- package/dist/elements/StructuredDocumentTag.d.ts.map +1 -1
- package/dist/elements/StructuredDocumentTag.js +99 -104
- package/dist/elements/StructuredDocumentTag.js.map +1 -1
- package/dist/elements/Table.d.ts +11 -11
- package/dist/elements/Table.d.ts.map +1 -1
- package/dist/elements/Table.js +102 -107
- package/dist/elements/Table.js.map +1 -1
- package/dist/elements/TableCell.d.ts +10 -10
- package/dist/elements/TableCell.d.ts.map +1 -1
- package/dist/elements/TableCell.js +105 -106
- package/dist/elements/TableCell.js.map +1 -1
- package/dist/elements/TableGridChange.d.ts.map +1 -1
- package/dist/elements/TableGridChange.js.map +1 -1
- package/dist/elements/TableOfContents.d.ts.map +1 -1
- package/dist/elements/TableOfContents.js +4 -4
- package/dist/elements/TableOfContents.js.map +1 -1
- package/dist/elements/TableOfContentsElement.js.map +1 -1
- package/dist/elements/TableRow.d.ts.map +1 -1
- package/dist/elements/TableRow.js +13 -6
- package/dist/elements/TableRow.js.map +1 -1
- package/dist/elements/TextBox.d.ts.map +1 -1
- package/dist/elements/TextBox.js +3 -5
- package/dist/elements/TextBox.js.map +1 -1
- package/dist/formatting/AbstractNumbering.d.ts +4 -4
- package/dist/formatting/AbstractNumbering.d.ts.map +1 -1
- package/dist/formatting/AbstractNumbering.js +54 -49
- package/dist/formatting/AbstractNumbering.js.map +1 -1
- package/dist/formatting/NumberingInstance.d.ts.map +1 -1
- package/dist/formatting/NumberingInstance.js +1 -3
- package/dist/formatting/NumberingInstance.js.map +1 -1
- package/dist/formatting/NumberingLevel.d.ts +5 -5
- package/dist/formatting/NumberingLevel.d.ts.map +1 -1
- package/dist/formatting/NumberingLevel.js +119 -125
- package/dist/formatting/NumberingLevel.js.map +1 -1
- package/dist/formatting/NumberingManager.d.ts.map +1 -1
- package/dist/formatting/NumberingManager.js +9 -9
- package/dist/formatting/NumberingManager.js.map +1 -1
- package/dist/formatting/Style.d.ts +11 -11
- package/dist/formatting/Style.d.ts.map +1 -1
- package/dist/formatting/Style.js +219 -247
- package/dist/formatting/Style.js.map +1 -1
- package/dist/formatting/StylesManager.d.ts +2 -2
- package/dist/formatting/StylesManager.d.ts.map +1 -1
- package/dist/formatting/StylesManager.js +96 -102
- package/dist/formatting/StylesManager.js.map +1 -1
- package/dist/helpers/CleanupHelper.d.ts +1 -1
- package/dist/helpers/CleanupHelper.d.ts.map +1 -1
- package/dist/helpers/CleanupHelper.js +6 -6
- package/dist/helpers/CleanupHelper.js.map +1 -1
- package/dist/images/ImageOptimizer.js +7 -7
- package/dist/images/ImageOptimizer.js.map +1 -1
- package/dist/index.d.ts +9 -9
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js.map +1 -1
- package/dist/managers/DrawingManager.js.map +1 -1
- package/dist/tracking/DocumentTrackingContext.d.ts.map +1 -1
- package/dist/tracking/DocumentTrackingContext.js +23 -7
- package/dist/tracking/DocumentTrackingContext.js.map +1 -1
- package/dist/tracking/TrackingContext.d.ts.map +1 -1
- package/dist/tracking/TrackingContext.js.map +1 -1
- package/dist/types/compatibility-types.js.map +1 -1
- package/dist/types/formatting.js.map +1 -1
- package/dist/types/list-types.d.ts +6 -6
- package/dist/types/list-types.js.map +1 -1
- package/dist/types/settings-types.js.map +1 -1
- package/dist/types/styleConfig.d.ts +2 -2
- package/dist/types/styleConfig.js.map +1 -1
- package/dist/utils/ChangelogGenerator.d.ts.map +1 -1
- package/dist/utils/ChangelogGenerator.js +97 -101
- package/dist/utils/ChangelogGenerator.js.map +1 -1
- package/dist/utils/CompatibilityUpgrader.d.ts.map +1 -1
- package/dist/utils/CompatibilityUpgrader.js +1 -1
- package/dist/utils/CompatibilityUpgrader.js.map +1 -1
- package/dist/utils/InMemoryRevisionAcceptor.d.ts.map +1 -1
- package/dist/utils/InMemoryRevisionAcceptor.js +1 -6
- package/dist/utils/InMemoryRevisionAcceptor.js.map +1 -1
- package/dist/utils/MoveOperationHelper.d.ts.map +1 -1
- package/dist/utils/MoveOperationHelper.js +1 -1
- package/dist/utils/MoveOperationHelper.js.map +1 -1
- package/dist/utils/RevisionAwareProcessor.d.ts.map +1 -1
- package/dist/utils/RevisionAwareProcessor.js +2 -4
- package/dist/utils/RevisionAwareProcessor.js.map +1 -1
- package/dist/utils/RevisionWalker.d.ts.map +1 -1
- package/dist/utils/RevisionWalker.js +4 -12
- package/dist/utils/RevisionWalker.js.map +1 -1
- package/dist/utils/SelectiveRevisionAcceptor.d.ts.map +1 -1
- package/dist/utils/SelectiveRevisionAcceptor.js +2 -6
- package/dist/utils/SelectiveRevisionAcceptor.js.map +1 -1
- package/dist/utils/ShadingResolver.d.ts.map +1 -1
- package/dist/utils/ShadingResolver.js +1 -1
- package/dist/utils/ShadingResolver.js.map +1 -1
- package/dist/utils/acceptRevisions.d.ts.map +1 -1
- package/dist/utils/acceptRevisions.js +23 -12
- package/dist/utils/acceptRevisions.js.map +1 -1
- package/dist/utils/cnfStyleDecoder.d.ts +1 -1
- package/dist/utils/cnfStyleDecoder.d.ts.map +1 -1
- package/dist/utils/cnfStyleDecoder.js +40 -40
- package/dist/utils/cnfStyleDecoder.js.map +1 -1
- package/dist/utils/corruptionDetection.d.ts.map +1 -1
- package/dist/utils/corruptionDetection.js.map +1 -1
- package/dist/utils/dateFormatting.js.map +1 -1
- package/dist/utils/deepClone.js +1 -1
- package/dist/utils/deepClone.js.map +1 -1
- package/dist/utils/diagnostics.d.ts.map +1 -1
- package/dist/utils/diagnostics.js +1 -1
- package/dist/utils/diagnostics.js.map +1 -1
- package/dist/utils/errorHandling.js.map +1 -1
- package/dist/utils/formatting.d.ts.map +1 -1
- package/dist/utils/formatting.js +10 -2
- package/dist/utils/formatting.js.map +1 -1
- package/dist/utils/list-detection.d.ts +2 -2
- package/dist/utils/list-detection.d.ts.map +1 -1
- package/dist/utils/list-detection.js +21 -23
- package/dist/utils/list-detection.js.map +1 -1
- package/dist/utils/logger.d.ts.map +1 -1
- package/dist/utils/logger.js +12 -7
- package/dist/utils/logger.js.map +1 -1
- package/dist/utils/parsingHelpers.js.map +1 -1
- package/dist/utils/stripTrackedChanges.d.ts.map +1 -1
- package/dist/utils/stripTrackedChanges.js +3 -3
- package/dist/utils/stripTrackedChanges.js.map +1 -1
- package/dist/utils/textDiff.d.ts +1 -1
- package/dist/utils/textDiff.js +8 -8
- package/dist/utils/textDiff.js.map +1 -1
- package/dist/utils/units.js.map +1 -1
- package/dist/utils/validation.d.ts.map +1 -1
- package/dist/utils/validation.js +24 -7
- package/dist/utils/validation.js.map +1 -1
- package/dist/utils/xmlSanitization.d.ts.map +1 -1
- package/dist/utils/xmlSanitization.js +3 -3
- package/dist/utils/xmlSanitization.js.map +1 -1
- package/dist/validation/RevisionAutoFixer.d.ts.map +1 -1
- package/dist/validation/RevisionAutoFixer.js +5 -5
- package/dist/validation/RevisionAutoFixer.js.map +1 -1
- package/dist/validation/RevisionValidator.d.ts.map +1 -1
- package/dist/validation/RevisionValidator.js +7 -9
- package/dist/validation/RevisionValidator.js.map +1 -1
- package/dist/validation/ValidationRules.js +3 -3
- package/dist/validation/ValidationRules.js.map +1 -1
- package/dist/validation/index.js.map +1 -1
- package/dist/xml/XMLBuilder.d.ts +1 -1
- package/dist/xml/XMLBuilder.d.ts.map +1 -1
- package/dist/xml/XMLBuilder.js +98 -100
- package/dist/xml/XMLBuilder.js.map +1 -1
- package/dist/xml/XMLParser.d.ts.map +1 -1
- package/dist/xml/XMLParser.js +61 -66
- package/dist/xml/XMLParser.js.map +1 -1
- package/dist/zip/ZipHandler.d.ts.map +1 -1
- package/dist/zip/ZipHandler.js.map +1 -1
- package/dist/zip/ZipReader.d.ts.map +1 -1
- package/dist/zip/ZipReader.js +1 -3
- package/dist/zip/ZipReader.js.map +1 -1
- package/dist/zip/ZipWriter.d.ts +1 -1
- package/dist/zip/ZipWriter.d.ts.map +1 -1
- package/dist/zip/ZipWriter.js +28 -36
- package/dist/zip/ZipWriter.js.map +1 -1
- package/dist/zip/types.js +1 -1
- package/dist/zip/types.js.map +1 -1
- package/package.json +92 -92
- package/src/__tests__/helper-methods.test.ts +512 -512
- package/src/constants/legacyCompatFlags.ts +138 -138
- package/src/constants/limits.ts +50 -50
- package/src/core/Document.ts +985 -1145
- package/src/core/DocumentContent.ts +461 -467
- package/src/core/DocumentGenerator.ts +1133 -1104
- package/src/core/DocumentIdManager.ts +158 -158
- package/src/core/DocumentParser.ts +2347 -2716
- package/src/core/DocumentValidator.ts +363 -372
- package/src/core/Relationship.ts +367 -367
- package/src/core/RelationshipManager.ts +429 -428
- package/src/elements/AlternateContent.ts +42 -42
- package/src/elements/Bookmark.ts +212 -210
- package/src/elements/BookmarkManager.ts +247 -250
- package/src/elements/Comment.ts +356 -359
- package/src/elements/CommentManager.ts +499 -502
- package/src/elements/CommonTypes.ts +524 -549
- package/src/elements/CustomXml.ts +36 -36
- package/src/elements/Endnote.ts +221 -217
- package/src/elements/EndnoteManager.ts +246 -249
- package/src/elements/Field.ts +1292 -1233
- package/src/elements/FieldHelpers.ts +329 -333
- package/src/elements/FontManager.ts +336 -339
- package/src/elements/Footer.ts +269 -269
- package/src/elements/Footnote.ts +221 -217
- package/src/elements/FootnoteManager.ts +246 -249
- package/src/elements/Header.ts +269 -269
- package/src/elements/HeaderFooterManager.ts +219 -219
- package/src/elements/Hyperlink.ts +1288 -1193
- package/src/elements/Image.ts +1982 -1756
- package/src/elements/ImageManager.ts +437 -432
- package/src/elements/ImageRun.ts +59 -59
- package/src/elements/MathElement.ts +65 -65
- package/src/elements/Paragraph.ts +4347 -4287
- package/src/elements/PreservedElement.ts +53 -53
- package/src/elements/PropertyChangeTypes.ts +458 -442
- package/src/elements/RangeMarker.ts +382 -400
- package/src/elements/Revision.ts +1198 -1217
- package/src/elements/RevisionContent.ts +73 -73
- package/src/elements/RevisionManager.ts +1070 -1070
- package/src/elements/Run.ts +3103 -3073
- package/src/elements/Section.ts +1521 -1421
- package/src/elements/Shape.ts +884 -873
- package/src/elements/StructuredDocumentTag.ts +1176 -1207
- package/src/elements/Table.ts +2468 -2524
- package/src/elements/TableCell.ts +1617 -1621
- package/src/elements/TableGridChange.ts +149 -151
- package/src/elements/TableOfContents.ts +701 -691
- package/src/elements/TableOfContentsElement.ts +89 -89
- package/src/elements/TableRow.ts +960 -929
- package/src/elements/TextBox.ts +766 -768
- package/src/formatting/AbstractNumbering.ts +580 -579
- package/src/formatting/NumberingInstance.ts +295 -299
- package/src/formatting/NumberingLevel.ts +981 -1040
- package/src/formatting/NumberingManager.ts +833 -827
- package/src/formatting/Style.ts +1785 -1879
- package/src/formatting/StylesManager.ts +1090 -1130
- package/src/helpers/CleanupHelper.ts +524 -524
- package/src/images/ImageOptimizer.ts +274 -274
- package/src/index.ts +559 -554
- package/src/managers/DrawingManager.ts +319 -319
- package/src/tracking/DocumentTrackingContext.ts +687 -674
- package/src/tracking/TrackingContext.ts +175 -173
- package/src/types/compatibility-types.ts +49 -49
- package/src/types/formatting.ts +210 -210
- package/src/types/list-types.ts +14 -14
- package/src/types/settings-types.ts +59 -59
- package/src/types/styleConfig.ts +189 -189
- package/src/utils/ChangelogGenerator.ts +1583 -1581
- package/src/utils/CompatibilityUpgrader.ts +235 -237
- package/src/utils/InMemoryRevisionAcceptor.ts +691 -696
- package/src/utils/MoveOperationHelper.ts +233 -238
- package/src/utils/RevisionAwareProcessor.ts +518 -526
- package/src/utils/RevisionWalker.ts +427 -457
- package/src/utils/SelectiveRevisionAcceptor.ts +662 -683
- package/src/utils/ShadingResolver.ts +105 -107
- package/src/utils/acceptRevisions.ts +723 -714
- package/src/utils/cnfStyleDecoder.ts +212 -217
- package/src/utils/corruptionDetection.ts +346 -345
- package/src/utils/dateFormatting.ts +20 -20
- package/src/utils/deepClone.ts +77 -78
- package/src/utils/diagnostics.ts +125 -129
- package/src/utils/errorHandling.ts +80 -80
- package/src/utils/formatting.ts +220 -213
- package/src/utils/list-detection.ts +32 -42
- package/src/utils/logger.ts +412 -404
- package/src/utils/parsingHelpers.ts +190 -190
- package/src/utils/stripTrackedChanges.ts +356 -353
- package/src/utils/textDiff.ts +100 -100
- package/src/utils/units.ts +421 -421
- package/src/utils/validation.ts +553 -542
- package/src/utils/xmlSanitization.ts +179 -182
- package/src/validation/RevisionAutoFixer.ts +541 -542
- package/src/validation/RevisionValidator.ts +470 -460
- package/src/validation/ValidationRules.ts +338 -338
- package/src/validation/index.ts +30 -30
- package/src/xml/XMLBuilder.ts +857 -871
- package/src/xml/XMLParser.ts +877 -919
- package/src/zip/ZipHandler.ts +629 -637
- package/src/zip/ZipReader.ts +295 -299
- package/src/zip/ZipWriter.ts +374 -390
- package/src/zip/types.ts +116 -116
|
@@ -1,345 +1,346 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Corruption Detection - Utilities to detect and diagnose XML corruption in documents
|
|
3
|
-
*
|
|
4
|
-
* This module helps users identify when they've accidentally passed XML-like strings
|
|
5
|
-
* to text methods instead of using the proper API. This is a common mistake that
|
|
6
|
-
* results in escaped XML tags being displayed as literal text in Word.
|
|
7
|
-
*/
|
|
8
|
-
|
|
9
|
-
import { Paragraph } from '../elements/Paragraph';
|
|
10
|
-
import { Run } from '../elements/Run';
|
|
11
|
-
|
|
12
|
-
/** Minimal interface for document corruption scanning (avoids circular import) */
|
|
13
|
-
interface DocumentLike {
|
|
14
|
-
getAllParagraphs?(): Paragraph[];
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* Types of corruption that can be detected
|
|
19
|
-
*/
|
|
20
|
-
export type CorruptionType =
|
|
21
|
-
| 'escaped-xml'
|
|
22
|
-
| 'xml-tags'
|
|
23
|
-
| 'entities'
|
|
24
|
-
| 'mixed';
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* Location of corruption within a document
|
|
28
|
-
*/
|
|
29
|
-
export interface CorruptionLocation {
|
|
30
|
-
/** Index of the paragraph containing corruption */
|
|
31
|
-
paragraphIndex: number;
|
|
32
|
-
/** Index of the run within the paragraph */
|
|
33
|
-
runIndex: number;
|
|
34
|
-
/** The corrupted text content */
|
|
35
|
-
text: string;
|
|
36
|
-
/** Type of corruption detected */
|
|
37
|
-
corruptionType: CorruptionType;
|
|
38
|
-
/** Suggested fix for the corruption */
|
|
39
|
-
suggestedFix: string;
|
|
40
|
-
/** Length of corrupted text */
|
|
41
|
-
length: number;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
/**
|
|
45
|
-
* Comprehensive corruption report for a document
|
|
46
|
-
*/
|
|
47
|
-
export interface CorruptionReport {
|
|
48
|
-
/** Whether any corruption was found */
|
|
49
|
-
isCorrupted: boolean;
|
|
50
|
-
/** Total number of corrupted locations */
|
|
51
|
-
totalLocations: number;
|
|
52
|
-
/** Detailed list of corruption locations */
|
|
53
|
-
locations: CorruptionLocation[];
|
|
54
|
-
/** Human-readable summary */
|
|
55
|
-
summary: string;
|
|
56
|
-
/** Statistics about corruption types */
|
|
57
|
-
statistics: {
|
|
58
|
-
escapedXml: number;
|
|
59
|
-
xmlTags: number;
|
|
60
|
-
entities: number;
|
|
61
|
-
mixed: number;
|
|
62
|
-
};
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
/**
|
|
66
|
-
* Detects XML corruption in a document
|
|
67
|
-
*
|
|
68
|
-
* Scans all paragraphs and runs to find text that contains escaped XML
|
|
69
|
-
* or XML-like patterns that suggest the user passed XML strings to text methods.
|
|
70
|
-
*
|
|
71
|
-
* @param doc - The document to scan
|
|
72
|
-
* @returns Corruption report with locations and suggested fixes
|
|
73
|
-
*
|
|
74
|
-
* @example
|
|
75
|
-
* ```typescript
|
|
76
|
-
* const doc = await Document.load('corrupted.docx');
|
|
77
|
-
* const report = detectCorruptionInDocument(doc);
|
|
78
|
-
*
|
|
79
|
-
* if (report.isCorrupted) {
|
|
80
|
-
* console.log(report.summary);
|
|
81
|
-
* report.locations.forEach(loc => {
|
|
82
|
-
* console.log(`Paragraph ${loc.paragraphIndex}, Run ${loc.runIndex}: ${loc.suggestedFix}`);
|
|
83
|
-
* });
|
|
84
|
-
* }
|
|
85
|
-
* ```
|
|
86
|
-
*/
|
|
87
|
-
export function detectCorruptionInDocument(doc: DocumentLike): CorruptionReport {
|
|
88
|
-
const locations: CorruptionLocation[] = [];
|
|
89
|
-
const stats = {
|
|
90
|
-
escapedXml: 0,
|
|
91
|
-
xmlTags: 0,
|
|
92
|
-
entities: 0,
|
|
93
|
-
mixed: 0,
|
|
94
|
-
};
|
|
95
|
-
|
|
96
|
-
// Get all paragraphs from the document
|
|
97
|
-
const paragraphs = doc.getAllParagraphs ? doc.getAllParagraphs() : [];
|
|
98
|
-
|
|
99
|
-
// Scan each paragraph
|
|
100
|
-
for (let pIdx = 0; pIdx < paragraphs.length; pIdx++) {
|
|
101
|
-
const paragraph = paragraphs[pIdx];
|
|
102
|
-
if (!paragraph || !(paragraph instanceof Paragraph)) {
|
|
103
|
-
continue;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
// Get runs from paragraph
|
|
107
|
-
const runs = paragraph.getRuns();
|
|
108
|
-
|
|
109
|
-
// Scan each run
|
|
110
|
-
for (let rIdx = 0; rIdx < runs.length; rIdx++) {
|
|
111
|
-
const run = runs[rIdx];
|
|
112
|
-
if (!run || !(run instanceof Run)) {
|
|
113
|
-
continue;
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
const text = run.getText();
|
|
117
|
-
if (!text || text.length === 0) {
|
|
118
|
-
continue;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
// Check for corruption in this text
|
|
122
|
-
const corruption = detectCorruptionInText(text);
|
|
123
|
-
|
|
124
|
-
if (corruption.isCorrupted) {
|
|
125
|
-
locations.push({
|
|
126
|
-
paragraphIndex: pIdx,
|
|
127
|
-
runIndex: rIdx,
|
|
128
|
-
text: text,
|
|
129
|
-
corruptionType: corruption.type,
|
|
130
|
-
suggestedFix: corruption.suggestedFix,
|
|
131
|
-
length: text.length,
|
|
132
|
-
});
|
|
133
|
-
|
|
134
|
-
// Update statistics
|
|
135
|
-
if (corruption.type === 'escaped-xml') stats.escapedXml++;
|
|
136
|
-
else if (corruption.type === 'xml-tags') stats.xmlTags++;
|
|
137
|
-
else if (corruption.type === 'entities') stats.entities++;
|
|
138
|
-
else if (corruption.type === 'mixed') stats.mixed++;
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
// Generate summary
|
|
144
|
-
const summary = generateSummary(locations, stats);
|
|
145
|
-
|
|
146
|
-
return {
|
|
147
|
-
isCorrupted: locations.length > 0,
|
|
148
|
-
totalLocations: locations.length,
|
|
149
|
-
locations,
|
|
150
|
-
summary,
|
|
151
|
-
statistics: stats,
|
|
152
|
-
};
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
/**
|
|
156
|
-
* Internal result from text corruption detection
|
|
157
|
-
*/
|
|
158
|
-
interface TextCorruptionResult {
|
|
159
|
-
isCorrupted: boolean;
|
|
160
|
-
type: CorruptionType;
|
|
161
|
-
suggestedFix: string;
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
/**
|
|
165
|
-
* Detects XML corruption in a single text string
|
|
166
|
-
*
|
|
167
|
-
* Checks for common patterns that indicate the user passed XML strings
|
|
168
|
-
* instead of plain text.
|
|
169
|
-
*
|
|
170
|
-
* @param text - Text to check
|
|
171
|
-
* @returns True if corruption detected
|
|
172
|
-
*
|
|
173
|
-
* @example
|
|
174
|
-
* ```typescript
|
|
175
|
-
* const corrupted = detectCorruptionInText('Hello <w:t>World');
|
|
176
|
-
* // Returns: true
|
|
177
|
-
*
|
|
178
|
-
* const clean = detectCorruptionInText('Hello World');
|
|
179
|
-
* // Returns: false
|
|
180
|
-
* ```
|
|
181
|
-
*/
|
|
182
|
-
export function detectCorruptionInText(text: string): TextCorruptionResult {
|
|
183
|
-
if (!text || typeof text !== 'string') {
|
|
184
|
-
return { isCorrupted: false, type: 'mixed', suggestedFix: String(text || '') };
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
let hasEscapedXml = false;
|
|
188
|
-
let hasXmlTags = false;
|
|
189
|
-
let hasEntities = false;
|
|
190
|
-
|
|
191
|
-
// Pattern 1: Escaped XML tags (most common corruption)
|
|
192
|
-
// Matches: <w:t>, </w:t>, <w:r>, etc.
|
|
193
|
-
const escapedXmlPattern = /<\/?w:[a-z]+[^&]*>/i;
|
|
194
|
-
if (escapedXmlPattern.test(text)) {
|
|
195
|
-
hasEscapedXml = true;
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
// Pattern 2: Raw XML tags (less common, but possible)
|
|
199
|
-
// Matches: <w:t>, </w:t>, <w:r>, etc.
|
|
200
|
-
const xmlTagPattern = /<\/?w:[a-z]+[^>]*>/i;
|
|
201
|
-
if (xmlTagPattern.test(text)) {
|
|
202
|
-
hasXmlTags = true;
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
// Pattern 3: Escaped entities combined with Word XML attributes
|
|
206
|
-
// ONLY flag if we see Word-specific patterns, not just any entities
|
|
207
|
-
// This avoids false positives from legitimate escaped characters
|
|
208
|
-
// Matches all OOXML namespaces: w: (word), a: (drawingML), pic: (picture), r: (relationships), wp: (word drawing)
|
|
209
|
-
const wordXmlAttributePattern =
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
*
|
|
246
|
-
*
|
|
247
|
-
*
|
|
248
|
-
*
|
|
249
|
-
*
|
|
250
|
-
* @
|
|
251
|
-
*
|
|
252
|
-
*
|
|
253
|
-
*
|
|
254
|
-
*
|
|
255
|
-
*
|
|
256
|
-
*
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
.replace(/&
|
|
269
|
-
.replace(/&
|
|
270
|
-
.replace(/&
|
|
271
|
-
.replace(/&
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
//
|
|
275
|
-
|
|
276
|
-
cleaned = cleaned.replace(
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
lines
|
|
300
|
-
lines.push(
|
|
301
|
-
lines.push('
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
}
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
lines.push('
|
|
318
|
-
lines.push('
|
|
319
|
-
lines.push('
|
|
320
|
-
lines.push('');
|
|
321
|
-
lines.push('
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
*
|
|
329
|
-
*
|
|
330
|
-
*
|
|
331
|
-
*
|
|
332
|
-
* @
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Corruption Detection - Utilities to detect and diagnose XML corruption in documents
|
|
3
|
+
*
|
|
4
|
+
* This module helps users identify when they've accidentally passed XML-like strings
|
|
5
|
+
* to text methods instead of using the proper API. This is a common mistake that
|
|
6
|
+
* results in escaped XML tags being displayed as literal text in Word.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { Paragraph } from '../elements/Paragraph';
|
|
10
|
+
import { Run } from '../elements/Run';
|
|
11
|
+
|
|
12
|
+
/** Minimal interface for document corruption scanning (avoids circular import) */
|
|
13
|
+
interface DocumentLike {
|
|
14
|
+
getAllParagraphs?(): Paragraph[];
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Types of corruption that can be detected
|
|
19
|
+
*/
|
|
20
|
+
export type CorruptionType =
|
|
21
|
+
| 'escaped-xml' // <w:t> style escaping
|
|
22
|
+
| 'xml-tags' // <w:t> tags in text
|
|
23
|
+
| 'entities' // " ' etc.
|
|
24
|
+
| 'mixed'; // Multiple types
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Location of corruption within a document
|
|
28
|
+
*/
|
|
29
|
+
export interface CorruptionLocation {
|
|
30
|
+
/** Index of the paragraph containing corruption */
|
|
31
|
+
paragraphIndex: number;
|
|
32
|
+
/** Index of the run within the paragraph */
|
|
33
|
+
runIndex: number;
|
|
34
|
+
/** The corrupted text content */
|
|
35
|
+
text: string;
|
|
36
|
+
/** Type of corruption detected */
|
|
37
|
+
corruptionType: CorruptionType;
|
|
38
|
+
/** Suggested fix for the corruption */
|
|
39
|
+
suggestedFix: string;
|
|
40
|
+
/** Length of corrupted text */
|
|
41
|
+
length: number;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Comprehensive corruption report for a document
|
|
46
|
+
*/
|
|
47
|
+
export interface CorruptionReport {
|
|
48
|
+
/** Whether any corruption was found */
|
|
49
|
+
isCorrupted: boolean;
|
|
50
|
+
/** Total number of corrupted locations */
|
|
51
|
+
totalLocations: number;
|
|
52
|
+
/** Detailed list of corruption locations */
|
|
53
|
+
locations: CorruptionLocation[];
|
|
54
|
+
/** Human-readable summary */
|
|
55
|
+
summary: string;
|
|
56
|
+
/** Statistics about corruption types */
|
|
57
|
+
statistics: {
|
|
58
|
+
escapedXml: number;
|
|
59
|
+
xmlTags: number;
|
|
60
|
+
entities: number;
|
|
61
|
+
mixed: number;
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Detects XML corruption in a document
|
|
67
|
+
*
|
|
68
|
+
* Scans all paragraphs and runs to find text that contains escaped XML
|
|
69
|
+
* or XML-like patterns that suggest the user passed XML strings to text methods.
|
|
70
|
+
*
|
|
71
|
+
* @param doc - The document to scan
|
|
72
|
+
* @returns Corruption report with locations and suggested fixes
|
|
73
|
+
*
|
|
74
|
+
* @example
|
|
75
|
+
* ```typescript
|
|
76
|
+
* const doc = await Document.load('corrupted.docx');
|
|
77
|
+
* const report = detectCorruptionInDocument(doc);
|
|
78
|
+
*
|
|
79
|
+
* if (report.isCorrupted) {
|
|
80
|
+
* console.log(report.summary);
|
|
81
|
+
* report.locations.forEach(loc => {
|
|
82
|
+
* console.log(`Paragraph ${loc.paragraphIndex}, Run ${loc.runIndex}: ${loc.suggestedFix}`);
|
|
83
|
+
* });
|
|
84
|
+
* }
|
|
85
|
+
* ```
|
|
86
|
+
*/
|
|
87
|
+
export function detectCorruptionInDocument(doc: DocumentLike): CorruptionReport {
|
|
88
|
+
const locations: CorruptionLocation[] = [];
|
|
89
|
+
const stats = {
|
|
90
|
+
escapedXml: 0,
|
|
91
|
+
xmlTags: 0,
|
|
92
|
+
entities: 0,
|
|
93
|
+
mixed: 0,
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
// Get all paragraphs from the document
|
|
97
|
+
const paragraphs = doc.getAllParagraphs ? doc.getAllParagraphs() : [];
|
|
98
|
+
|
|
99
|
+
// Scan each paragraph
|
|
100
|
+
for (let pIdx = 0; pIdx < paragraphs.length; pIdx++) {
|
|
101
|
+
const paragraph = paragraphs[pIdx];
|
|
102
|
+
if (!paragraph || !(paragraph instanceof Paragraph)) {
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Get runs from paragraph
|
|
107
|
+
const runs = paragraph.getRuns();
|
|
108
|
+
|
|
109
|
+
// Scan each run
|
|
110
|
+
for (let rIdx = 0; rIdx < runs.length; rIdx++) {
|
|
111
|
+
const run = runs[rIdx];
|
|
112
|
+
if (!run || !(run instanceof Run)) {
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const text = run.getText();
|
|
117
|
+
if (!text || text.length === 0) {
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Check for corruption in this text
|
|
122
|
+
const corruption = detectCorruptionInText(text);
|
|
123
|
+
|
|
124
|
+
if (corruption.isCorrupted) {
|
|
125
|
+
locations.push({
|
|
126
|
+
paragraphIndex: pIdx,
|
|
127
|
+
runIndex: rIdx,
|
|
128
|
+
text: text,
|
|
129
|
+
corruptionType: corruption.type,
|
|
130
|
+
suggestedFix: corruption.suggestedFix,
|
|
131
|
+
length: text.length,
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
// Update statistics
|
|
135
|
+
if (corruption.type === 'escaped-xml') stats.escapedXml++;
|
|
136
|
+
else if (corruption.type === 'xml-tags') stats.xmlTags++;
|
|
137
|
+
else if (corruption.type === 'entities') stats.entities++;
|
|
138
|
+
else if (corruption.type === 'mixed') stats.mixed++;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// Generate summary
|
|
144
|
+
const summary = generateSummary(locations, stats);
|
|
145
|
+
|
|
146
|
+
return {
|
|
147
|
+
isCorrupted: locations.length > 0,
|
|
148
|
+
totalLocations: locations.length,
|
|
149
|
+
locations,
|
|
150
|
+
summary,
|
|
151
|
+
statistics: stats,
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/**
|
|
156
|
+
* Internal result from text corruption detection
|
|
157
|
+
*/
|
|
158
|
+
interface TextCorruptionResult {
|
|
159
|
+
isCorrupted: boolean;
|
|
160
|
+
type: CorruptionType;
|
|
161
|
+
suggestedFix: string;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Detects XML corruption in a single text string
|
|
166
|
+
*
|
|
167
|
+
* Checks for common patterns that indicate the user passed XML strings
|
|
168
|
+
* instead of plain text.
|
|
169
|
+
*
|
|
170
|
+
* @param text - Text to check
|
|
171
|
+
* @returns True if corruption detected
|
|
172
|
+
*
|
|
173
|
+
* @example
|
|
174
|
+
* ```typescript
|
|
175
|
+
* const corrupted = detectCorruptionInText('Hello <w:t>World');
|
|
176
|
+
* // Returns: true
|
|
177
|
+
*
|
|
178
|
+
* const clean = detectCorruptionInText('Hello World');
|
|
179
|
+
* // Returns: false
|
|
180
|
+
* ```
|
|
181
|
+
*/
|
|
182
|
+
export function detectCorruptionInText(text: string): TextCorruptionResult {
|
|
183
|
+
if (!text || typeof text !== 'string') {
|
|
184
|
+
return { isCorrupted: false, type: 'mixed', suggestedFix: String(text || '') };
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
let hasEscapedXml = false;
|
|
188
|
+
let hasXmlTags = false;
|
|
189
|
+
let hasEntities = false;
|
|
190
|
+
|
|
191
|
+
// Pattern 1: Escaped XML tags (most common corruption)
|
|
192
|
+
// Matches: <w:t>, </w:t>, <w:r>, etc.
|
|
193
|
+
const escapedXmlPattern = /<\/?w:[a-z]+[^&]*>/i;
|
|
194
|
+
if (escapedXmlPattern.test(text)) {
|
|
195
|
+
hasEscapedXml = true;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Pattern 2: Raw XML tags (less common, but possible)
|
|
199
|
+
// Matches: <w:t>, </w:t>, <w:r>, etc.
|
|
200
|
+
const xmlTagPattern = /<\/?w:[a-z]+[^>]*>/i;
|
|
201
|
+
if (xmlTagPattern.test(text)) {
|
|
202
|
+
hasXmlTags = true;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Pattern 3: Escaped entities combined with Word XML attributes
|
|
206
|
+
// ONLY flag if we see Word-specific patterns, not just any entities
|
|
207
|
+
// This avoids false positives from legitimate escaped characters
|
|
208
|
+
// Matches all OOXML namespaces: w: (word), a: (drawingML), pic: (picture), r: (relationships), wp: (word drawing)
|
|
209
|
+
const wordXmlAttributePattern =
|
|
210
|
+
/(<(?:w|a|r|pic|wp|m|mc|wpc|wps|wpg|c|dgm|o|v):|xml:space="preserve")/i;
|
|
211
|
+
if (wordXmlAttributePattern.test(text)) {
|
|
212
|
+
hasEntities = true;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Determine corruption type
|
|
216
|
+
const corruptionCount = [hasEscapedXml, hasXmlTags, hasEntities].filter(Boolean).length;
|
|
217
|
+
|
|
218
|
+
if (corruptionCount === 0) {
|
|
219
|
+
return { isCorrupted: false, type: 'mixed', suggestedFix: text };
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// More precise type detection - check primary indicator first
|
|
223
|
+
let type: CorruptionType;
|
|
224
|
+
if (hasEscapedXml && (hasEntities || hasXmlTags)) {
|
|
225
|
+
// Escaped XML combined with other patterns - this is the classic corruption case
|
|
226
|
+
type = 'escaped-xml';
|
|
227
|
+
} else if (hasXmlTags && hasEntities) {
|
|
228
|
+
type = 'xml-tags';
|
|
229
|
+
} else if (corruptionCount > 1) {
|
|
230
|
+
type = 'mixed';
|
|
231
|
+
} else if (hasEscapedXml) {
|
|
232
|
+
type = 'escaped-xml';
|
|
233
|
+
} else if (hasXmlTags) {
|
|
234
|
+
type = 'xml-tags';
|
|
235
|
+
} else {
|
|
236
|
+
type = 'entities';
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
const suggestedFix = suggestFix(text);
|
|
240
|
+
|
|
241
|
+
return { isCorrupted: true, type, suggestedFix };
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Suggests a fix for corrupted text
|
|
246
|
+
*
|
|
247
|
+
* Attempts to clean XML patterns from text to restore the intended content.
|
|
248
|
+
* Uses the same cleaning logic as cleanXmlFromText() from validation.ts.
|
|
249
|
+
*
|
|
250
|
+
* @param corruptedText - Text containing XML corruption
|
|
251
|
+
* @returns Cleaned text with XML patterns removed
|
|
252
|
+
*
|
|
253
|
+
* @example
|
|
254
|
+
* ```typescript
|
|
255
|
+
* const fixed = suggestFix('Hello <w:t>World</w:t>');
|
|
256
|
+
* // Returns: 'Hello World'
|
|
257
|
+
* ```
|
|
258
|
+
*/
|
|
259
|
+
export function suggestFix(corruptedText: string): string {
|
|
260
|
+
if (!corruptedText || typeof corruptedText !== 'string') {
|
|
261
|
+
return corruptedText;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
let cleaned = corruptedText;
|
|
265
|
+
|
|
266
|
+
// Step 1: Unescape XML entities first
|
|
267
|
+
cleaned = cleaned
|
|
268
|
+
.replace(/</g, '<')
|
|
269
|
+
.replace(/>/g, '>')
|
|
270
|
+
.replace(/"/g, '"')
|
|
271
|
+
.replace(/'/g, "'")
|
|
272
|
+
.replace(/&/g, '&');
|
|
273
|
+
|
|
274
|
+
// Step 2: Remove Word XML tags
|
|
275
|
+
// Matches: <w:t xml:space="preserve">, </w:t>, <w:r>, etc.
|
|
276
|
+
cleaned = cleaned.replace(/<w:[^>]+>/g, '');
|
|
277
|
+
cleaned = cleaned.replace(/<\/w:[^>]+>/g, '');
|
|
278
|
+
|
|
279
|
+
// Step 3: Remove any remaining XML-like tags
|
|
280
|
+
cleaned = cleaned.replace(/<[^>]+>/g, '');
|
|
281
|
+
|
|
282
|
+
// Step 4: Clean up whitespace
|
|
283
|
+
cleaned = cleaned.replace(/\s+/g, ' ').trim();
|
|
284
|
+
|
|
285
|
+
return cleaned;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Generates a human-readable summary of corruption
|
|
290
|
+
*/
|
|
291
|
+
function generateSummary(
|
|
292
|
+
locations: CorruptionLocation[],
|
|
293
|
+
stats: { escapedXml: number; xmlTags: number; entities: number; mixed: number }
|
|
294
|
+
): string {
|
|
295
|
+
if (locations.length === 0) {
|
|
296
|
+
return 'No corruption detected. Document is clean.';
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
const lines: string[] = [];
|
|
300
|
+
lines.push(`Found ${locations.length} corrupted text location(s) in the document.`);
|
|
301
|
+
lines.push('');
|
|
302
|
+
lines.push('Corruption breakdown:');
|
|
303
|
+
|
|
304
|
+
if (stats.escapedXml > 0) {
|
|
305
|
+
lines.push(` - Escaped XML: ${stats.escapedXml} location(s)`);
|
|
306
|
+
}
|
|
307
|
+
if (stats.xmlTags > 0) {
|
|
308
|
+
lines.push(` - XML Tags: ${stats.xmlTags} location(s)`);
|
|
309
|
+
}
|
|
310
|
+
if (stats.entities > 0) {
|
|
311
|
+
lines.push(` - XML Entities: ${stats.entities} location(s)`);
|
|
312
|
+
}
|
|
313
|
+
if (stats.mixed > 0) {
|
|
314
|
+
lines.push(` - Mixed: ${stats.mixed} location(s)`);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
lines.push('');
|
|
318
|
+
lines.push('This corruption typically occurs when XML strings are passed to text methods.');
|
|
319
|
+
lines.push('Instead of: paragraph.addText("Text<w:t>1</w:t>")');
|
|
320
|
+
lines.push('Use: paragraph.addText("Text"); paragraph.addText("1");');
|
|
321
|
+
lines.push('');
|
|
322
|
+
lines.push('To automatically clean text, use: new Run(text, { cleanXmlFromText: true })');
|
|
323
|
+
|
|
324
|
+
return lines.join('\n');
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Checks if text looks like it might be corrupted (less strict check)
|
|
329
|
+
*
|
|
330
|
+
* This is a quick check that can be used for warnings without full analysis.
|
|
331
|
+
*
|
|
332
|
+
* @param text - Text to check
|
|
333
|
+
* @returns True if text might be corrupted
|
|
334
|
+
*/
|
|
335
|
+
export function looksCorrupted(text: string): boolean {
|
|
336
|
+
if (!text || typeof text !== 'string') {
|
|
337
|
+
return false;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
// Quick regex checks for common corruption patterns
|
|
341
|
+
return (
|
|
342
|
+
/<\/?(w|r|p):[a-z]+/i.test(text) ||
|
|
343
|
+
/<\/?(w|r|p):[a-z]+/i.test(text) ||
|
|
344
|
+
/xml:space="/i.test(text)
|
|
345
|
+
);
|
|
346
|
+
}
|