documentation-hub 5.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +43 -0
- package/.github/workflows/build.yml +64 -0
- package/.github/workflows/ci.yml +39 -0
- package/.vscode/extensions.json +3 -0
- package/Current.md +97 -0
- package/DocHub_Image.png +0 -0
- package/README.md +666 -0
- package/USER_GUIDE.md +1173 -0
- package/Updater.md +311 -0
- package/build/256x256.png +0 -0
- package/build/512x512.png +0 -0
- package/build/app-update.yml +4 -0
- package/build/create-icon.js +208 -0
- package/build/icon.ico +0 -0
- package/build/icon.png +0 -0
- package/build/icon_1024x1024.png +0 -0
- package/dist/assets/Analytics-BpsG9895.js +1 -0
- package/dist/assets/Card-IAZin8kp.js +1 -0
- package/dist/assets/CurrentSession-B-rFkHvf.js +12 -0
- package/dist/assets/Dashboard-C_5gMb0q.js +1 -0
- package/dist/assets/Documents-CqZ25axS.js +1 -0
- package/dist/assets/Input-l89xwXBi.js +1 -0
- package/dist/assets/Reporting-DqdHJY_a.js +1 -0
- package/dist/assets/Search-XNbu5z_3.js +1 -0
- package/dist/assets/SessionManager-lH9hZfzH.js +1 -0
- package/dist/assets/Sessions-ClZOPYNc.js +1 -0
- package/dist/assets/Settings-DUEHGURa.js +11 -0
- package/dist/assets/index-8xUe8ptc.js +24 -0
- package/dist/assets/index-RYyJqF7O.css +1 -0
- package/dist/assets/path-BkOl0AGO.js +1 -0
- package/dist/assets/promises-ID_B9S-h.js +1 -0
- package/dist/assets/urlHelpers-TvgahX0r.js +1 -0
- package/dist/assets/useToast-yRSO1dkm.js +1 -0
- package/dist/assets/vendor-charts-RkGK5ROP.js +36 -0
- package/dist/assets/vendor-db-l0sNRNKZ.js +1 -0
- package/dist/assets/vendor-react-BVZ_anCF.js +4 -0
- package/dist/assets/vendor-search-Dw8P0qyA.js +1 -0
- package/dist/assets/vendor-ui-BU7NfluV.js +53 -0
- package/dist/electron/PowerAutomateApiService-LfW09ZGr.js +147 -0
- package/dist/electron/main-CXkNtyv-.js +19789 -0
- package/dist/electron/main.js +5 -0
- package/dist/electron/preload.js +1 -0
- package/dist/icon.png +0 -0
- package/dist/index.html +27 -0
- package/docs/CODEBASE_ANALYSIS_REPORT.md +309 -0
- package/docs/DEBUG_LOGGING_GUIDE.md +244 -0
- package/docs/README.md +115 -0
- package/docs/TOC_WIRING_GUIDE.md +344 -0
- package/docs/analysis/Bullet_Symbol_Bug_Analysis.md +136 -0
- package/docs/analysis/DOCXMLATER_ANALYSIS_SUMMARY.txt +169 -0
- package/docs/analysis/Document_Processing_Issues_Analysis.md +704 -0
- package/docs/analysis/FIELD_PRESERVATION_ANALYSIS.md +1200 -0
- package/docs/analysis/INDENTATION_PRESERVE_ANALYSIS.md +181 -0
- package/docs/analysis/INDENTATION_PRESERVE_IMPLEMENTATION.md +207 -0
- package/docs/analysis/List_Implementation.md +206 -0
- package/docs/analysis/List_Implementation_Accuracy_Report.md +366 -0
- package/docs/analysis/PROCESSING_OPTIONS_UI_UPDATES.md +220 -0
- package/docs/analysis/RefactorStyles.md +852 -0
- package/docs/analysis/STYLE_PARAMETER_ENHANCEMENT.md +143 -0
- package/docs/analysis/docxmlater-comparison-todo-2025-11-13.md +636 -0
- package/docs/analysis/docxmlater-implementation-analysis-2025-11-13.md +340 -0
- package/docs/analysis/docxmlater-template_ui-integration-analysis.md +263 -0
- package/docs/analysis/github-issues-to-create.md +237 -0
- package/docs/api/API_README.md +538 -0
- package/docs/api/API_REFERENCE.md +751 -0
- package/docs/api/TYPE_DEFINITIONS.md +869 -0
- package/docs/architecture/FONT_EMBEDDING_GUIDE.md +318 -0
- package/docs/architecture/docxmlater-functions-and-structure.md +726 -0
- package/docs/docxmlater-readme.md +1341 -0
- package/docs/fixes/EXECUTION_LOG_TEST_BASE.md +573 -0
- package/docs/fixes/HYPERLINK_TEXT_SANITIZATION.md +253 -0
- package/docs/fixes/README.md +37 -0
- package/docs/github-issues/issue-1-body.md +125 -0
- package/docs/github-issues/issue-10-body.md +850 -0
- package/docs/github-issues/issue-2-body.md +200 -0
- package/docs/github-issues/issue-3-body.md +270 -0
- package/docs/github-issues/issue-4-body.md +169 -0
- package/docs/github-issues/issue-5-body.md +173 -0
- package/docs/github-issues/issue-6-body.md +158 -0
- package/docs/github-issues/issue-7-body.md +171 -0
- package/docs/github-issues/issue-8-body.md +407 -0
- package/docs/github-issues/issue-9-body.md +515 -0
- package/docs/github-issues/issue-tracker.md +274 -0
- package/docs/github-issues/predictive-analysis-2025-10-18.md +2131 -0
- package/docs/implementation/List_Framework_Refactor_Plan.md +336 -0
- package/docs/implementation/PRIMARY_TEXT_COLOR_FEATURE.md +217 -0
- package/docs/implementation/RELEASE_PLAN_v2.1.0.md +362 -0
- package/docs/implementation/RefactorStyles.md +588 -0
- package/docs/implementation/implement-plan.md +489 -0
- package/docs/implementation/missing-helpers-implementation.md +391 -0
- package/docs/implementation/refactor-plan.md +520 -0
- package/docs/implementation/session-implementation-complete.md +233 -0
- package/docs/implementation/session-management-plan.md +250 -0
- package/docs/setup-checklist.md +77 -0
- package/docs/versions/changelog.md +345 -0
- package/electron/customUpdater.ts +656 -0
- package/electron/main.ts +2441 -0
- package/electron/memoryConfig.ts +187 -0
- package/electron/preload.ts +394 -0
- package/electron/proxyConfig.ts +340 -0
- package/electron/services/BackupService.ts +452 -0
- package/electron/services/DictionaryService.ts +402 -0
- package/electron/services/LocalDictionaryLookupService.ts +147 -0
- package/electron/services/PowerAutomateApiService.ts +231 -0
- package/electron/services/SharePointSyncService.ts +474 -0
- package/electron/windowsCertStore.ts +427 -0
- package/electron/zscalerConfig.ts +381 -0
- package/eslint.config.js +92 -0
- package/jest.config.js +52 -0
- package/package.json +214 -0
- package/postcss.config.mjs +6 -0
- package/public/icon.png +0 -0
- package/publish-release.ps1 +5 -0
- package/renovate.json +30 -0
- package/src/App.tsx +216 -0
- package/src/__mocks__/p-limit.js +12 -0
- package/src/__mocks__/styleMock.js +1 -0
- package/src/components/common/BugReportButton.tsx +44 -0
- package/src/components/common/BugReportDialog.tsx +193 -0
- package/src/components/common/Button.tsx +153 -0
- package/src/components/common/Card.tsx +86 -0
- package/src/components/common/ColorPickerDialog.tsx +177 -0
- package/src/components/common/ConfirmDialog.tsx +96 -0
- package/src/components/common/DebugConsole.tsx +275 -0
- package/src/components/common/EmptyState.tsx +183 -0
- package/src/components/common/ErrorBoundary.tsx +98 -0
- package/src/components/common/ErrorDetailsDialog.tsx +153 -0
- package/src/components/common/ErrorFallback.tsx +218 -0
- package/src/components/common/Input.tsx +109 -0
- package/src/components/common/Skeleton.tsx +184 -0
- package/src/components/common/SplashScreen.tsx +81 -0
- package/src/components/common/Toast.tsx +155 -0
- package/src/components/common/Tooltip.tsx +79 -0
- package/src/components/common/UpdateNotification.tsx +320 -0
- package/src/components/comparison/ComparisonWindow.tsx +374 -0
- package/src/components/comparison/SideBySideDiff.tsx +486 -0
- package/src/components/comparison/index.ts +8 -0
- package/src/components/document/DocumentUploader.tsx +288 -0
- package/src/components/document/HyperlinkPreview.tsx +430 -0
- package/src/components/document/HyperlinkService.md +1484 -0
- package/src/components/document/Hyperlink_Technical_Documentation.md +496 -0
- package/src/components/document/InlineChangesView.tsx +707 -0
- package/src/components/document/ProcessingProgress.tsx +303 -0
- package/src/components/document/ProcessingResults.tsx +256 -0
- package/src/components/document/TrackedChangesDetail.tsx +530 -0
- package/src/components/document/TrackedChangesPanel.tsx +546 -0
- package/src/components/document/VirtualDocumentList.tsx +240 -0
- package/src/components/editor/DocumentEditor.tsx +723 -0
- package/src/components/editor/DocumentEditorModal.tsx +640 -0
- package/src/components/editor/EditorQuickActions.tsx +502 -0
- package/src/components/editor/EditorToolbar.tsx +312 -0
- package/src/components/editor/TableEditor.tsx +926 -0
- package/src/components/editor/index.ts +18 -0
- package/src/components/layout/Header.tsx +190 -0
- package/src/components/layout/Sidebar.tsx +313 -0
- package/src/components/layout/TitleBar.tsx +190 -0
- package/src/components/navigation/CommandPalette.tsx +233 -0
- package/src/components/navigation/KeyboardShortcutsModal.tsx +173 -0
- package/src/components/sessions/ChangeItem.tsx +408 -0
- package/src/components/sessions/ChangeViewer.tsx +1155 -0
- package/src/components/sessions/DocumentComparisonModal.tsx +314 -0
- package/src/components/sessions/ProcessingOptions.tsx +297 -0
- package/src/components/sessions/ReplacementsTab.tsx +438 -0
- package/src/components/sessions/RevisionHandlingOptions.tsx +87 -0
- package/src/components/sessions/SessionManager.tsx +188 -0
- package/src/components/sessions/StylesEditor.tsx +1335 -0
- package/src/components/sessions/TabContainer.tsx +151 -0
- package/src/components/sessions/VirtualSessionList.tsx +157 -0
- package/src/components/sessions/sessionToProcessorManager.tsx +420 -0
- package/src/components/settings/CertificateManager.tsx +410 -0
- package/src/components/settings/SegmentedControl.tsx +88 -0
- package/src/components/settings/SettingRow.tsx +52 -0
- package/src/contexts/GlobalStatsContext.tsx +396 -0
- package/src/contexts/SessionContext.tsx +2129 -0
- package/src/contexts/ThemeContext.tsx +428 -0
- package/src/contexts/UserSettingsContext.tsx +290 -0
- package/src/contexts/__tests__/GlobalStatsContext.test.tsx +390 -0
- package/src/global.d.ts +273 -0
- package/src/hooks/useDocumentQueue.tsx +210 -0
- package/src/hooks/useToast.tsx +55 -0
- package/src/main.tsx +10 -0
- package/src/pages/Analytics.tsx +386 -0
- package/src/pages/CurrentSession.tsx +1174 -0
- package/src/pages/Dashboard.tsx +319 -0
- package/src/pages/Documents.tsx +317 -0
- package/src/pages/Projects.tsx +250 -0
- package/src/pages/Reporting.tsx +386 -0
- package/src/pages/Search.tsx +349 -0
- package/src/pages/Sessions.tsx +285 -0
- package/src/pages/Settings.tsx +2662 -0
- package/src/services/HyperlinkService.ts +1085 -0
- package/src/services/document/DocXMLaterProcessor.ts +617 -0
- package/src/services/document/DocumentProcessingComparison.ts +856 -0
- package/src/services/document/DocumentSnapshotService.ts +575 -0
- package/src/services/document/WordDocumentProcessor.ts +10509 -0
- package/src/services/document/__tests__/DocXMLaterProcessor.hyperlinks.test.md +311 -0
- package/src/services/document/__tests__/WordDocumentProcessor.integration.test.ts +515 -0
- package/src/services/document/__tests__/WordDocumentProcessor.test.ts +812 -0
- package/src/services/document/blanklines/BlankLineManager.ts +658 -0
- package/src/services/document/blanklines/__tests__/paragraphChecks.test.ts +281 -0
- package/src/services/document/blanklines/helpers/blankLineInsertion.ts +87 -0
- package/src/services/document/blanklines/helpers/blankLineSnapshot.ts +251 -0
- package/src/services/document/blanklines/helpers/clearCustom.ts +121 -0
- package/src/services/document/blanklines/helpers/contextChecks.ts +117 -0
- package/src/services/document/blanklines/helpers/imageChecks.ts +51 -0
- package/src/services/document/blanklines/helpers/paragraphChecks.ts +236 -0
- package/src/services/document/blanklines/helpers/removeBlanksBetweenListItems.ts +91 -0
- package/src/services/document/blanklines/helpers/removeTrailingBlanks.ts +35 -0
- package/src/services/document/blanklines/helpers/tableGuards.ts +21 -0
- package/src/services/document/blanklines/index.ts +67 -0
- package/src/services/document/blanklines/rules/additionRules.ts +337 -0
- package/src/services/document/blanklines/rules/indentationRules.ts +317 -0
- package/src/services/document/blanklines/rules/removalRules.ts +362 -0
- package/src/services/document/blanklines/rules/ruleTypes.ts +92 -0
- package/src/services/document/blanklines/types.ts +29 -0
- package/src/services/document/helpers/ImageBorderCropper.ts +377 -0
- package/src/services/document/helpers/__tests__/whitespace.test.ts +272 -0
- package/src/services/document/helpers/whitespace.ts +117 -0
- package/src/services/document/list/ListNormalizer.ts +947 -0
- package/src/services/document/list/index.ts +45 -0
- package/src/services/document/list/list-detection.ts +275 -0
- package/src/services/document/list/list-types.ts +162 -0
- package/src/services/document/processors/HyperlinkProcessor.ts +370 -0
- package/src/services/document/processors/ListProcessor.ts +257 -0
- package/src/services/document/processors/StructureProcessor.ts +176 -0
- package/src/services/document/processors/StyleProcessor.ts +389 -0
- package/src/services/document/processors/TableProcessor.ts +2238 -0
- package/src/services/document/processors/__tests__/HyperlinkProcessor.test.ts +314 -0
- package/src/services/document/processors/__tests__/ListProcessor.test.ts +291 -0
- package/src/services/document/processors/__tests__/StructureProcessor.test.ts +257 -0
- package/src/services/document/processors/__tests__/TableProcessor.hlp-tips-bullets.test.ts +459 -0
- package/src/services/document/processors/__tests__/TableProcessor.test.ts +1604 -0
- package/src/services/document/processors/index.ts +28 -0
- package/src/services/document/types/docx-processing.ts +310 -0
- package/src/services/editor/EditorActionHandlers.ts +901 -0
- package/src/services/editor/index.ts +13 -0
- package/src/setupTests.ts +47 -0
- package/src/styles/global.css +782 -0
- package/src/types/backup.ts +132 -0
- package/src/types/dictionary.ts +125 -0
- package/src/types/document-processing.ts +331 -0
- package/src/types/docxmlater-augments.d.ts +142 -0
- package/src/types/editor.ts +280 -0
- package/src/types/electron.ts +340 -0
- package/src/types/globalStats.ts +155 -0
- package/src/types/hyperlink.ts +471 -0
- package/src/types/operations.ts +354 -0
- package/src/types/session.ts +427 -0
- package/src/types/settings.ts +112 -0
- package/src/utils/MemoryMonitor.ts +248 -0
- package/src/utils/cn.ts +6 -0
- package/src/utils/colorConvert.ts +306 -0
- package/src/utils/diffUtils.ts +347 -0
- package/src/utils/documentUtils.ts +202 -0
- package/src/utils/electronGuard.ts +62 -0
- package/src/utils/indexedDB.ts +915 -0
- package/src/utils/logger.ts +717 -0
- package/src/utils/pathSecurity.ts +232 -0
- package/src/utils/pathValidator.ts +236 -0
- package/src/utils/processingTimeEstimator.ts +153 -0
- package/src/utils/safeJsonParse.ts +62 -0
- package/src/utils/textSanitizer.ts +162 -0
- package/src/utils/urlHelpers.ts +304 -0
- package/src/utils/urlPatterns.ts +198 -0
- package/src/utils/urlSanitizer.ts +152 -0
- package/src/vite-env.d.ts +11 -0
- package/tsconfig.electron.json +19 -0
- package/tsconfig.json +36 -0
- package/tsconfig.node.json +12 -0
- package/typedoc.json +45 -0
- package/vite.config.ts +152 -0
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
## Problem Description
|
|
2
|
+
|
|
3
|
+
**Type:** Bug / Data Integrity
|
|
4
|
+
**Priority:** Critical
|
|
5
|
+
**Impact:** DOCX documents become corrupted and unreadable after processing
|
|
6
|
+
**Root Cause:** Violations of Office Open XML (OOXML) specification during programmatic editing
|
|
7
|
+
|
|
8
|
+
DOCX files are ZIP archives containing multiple XML files with strict structural requirements. When code modifies these files without adhering to the OOXML specification, it can result in corrupted documents that won't open in Microsoft Word or other applications.
|
|
9
|
+
|
|
10
|
+
## Background: OOXML Architecture
|
|
11
|
+
|
|
12
|
+
Based on project documentation ([`OOXML_HYPERLINK_ARCHITECTURE.md`](../OOXML_HYPERLINK_ARCHITECTURE.md) and [`docxmlater-functions-and-structure.md`](../docxmlater-functions-and-structure.md)):
|
|
13
|
+
|
|
14
|
+
### Document Structure
|
|
15
|
+
|
|
16
|
+
A `.docx` file is a ZIP archive containing:
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
document.docx
|
|
20
|
+
├── [Content_Types].xml
|
|
21
|
+
├── _rels/.rels
|
|
22
|
+
├── word/
|
|
23
|
+
│ ├── document.xml # Main content
|
|
24
|
+
│ ├── _rels/
|
|
25
|
+
│ │ └── document.xml.rels # Relationships (hyperlinks, images, etc.)
|
|
26
|
+
│ ├── styles.xml
|
|
27
|
+
│ ├── numbering.xml
|
|
28
|
+
│ └── fontTable.xml
|
|
29
|
+
└── docProps/
|
|
30
|
+
├── core.xml
|
|
31
|
+
└── app.xml
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Critical Rules for OOXML Compliance
|
|
35
|
+
|
|
36
|
+
**1. XML Namespace Requirements**
|
|
37
|
+
|
|
38
|
+
| Prefix | Namespace | Purpose | Example |
|
|
39
|
+
| ------ | --------------------------------------------------------------------- | ---------------- | --------------------------- |
|
|
40
|
+
| `w:` | `http://schemas.openxmlformats.org/wordprocessingml/2006/main` | Document content | `<w:p>`, `<w:r>`, `<w:t>` |
|
|
41
|
+
| `r:` | `http://schemas.openxmlformats.org/officeDocument/2006/relationships` | Relationships | `<w:hyperlink r:id="rId5">` |
|
|
42
|
+
|
|
43
|
+
**Violation Example:**
|
|
44
|
+
|
|
45
|
+
```xml
|
|
46
|
+
<!-- WRONG - Missing namespace prefix -->
|
|
47
|
+
<hyperlink r:id="rId5">
|
|
48
|
+
<t>Click here</t>
|
|
49
|
+
</hyperlink>
|
|
50
|
+
|
|
51
|
+
<!-- CORRECT -->
|
|
52
|
+
<w:hyperlink r:id="rId5">
|
|
53
|
+
<w:r>
|
|
54
|
+
<w:t>Click here</w:t>
|
|
55
|
+
</w:r>
|
|
56
|
+
</w:hyperlink>
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
**2. Two-Part Hyperlink System**
|
|
60
|
+
|
|
61
|
+
Every hyperlink requires TWO parts that must stay synchronized:
|
|
62
|
+
|
|
63
|
+
**Part 1: Content in `word/document.xml`**
|
|
64
|
+
|
|
65
|
+
```xml
|
|
66
|
+
<w:hyperlink r:id="rId5">
|
|
67
|
+
<w:r>
|
|
68
|
+
<w:rPr>
|
|
69
|
+
<w:rStyle w:val="Hyperlink"/>
|
|
70
|
+
</w:rPr>
|
|
71
|
+
<w:t>Link Text</w:t>
|
|
72
|
+
</w:r>
|
|
73
|
+
</w:hyperlink>
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
**Part 2: Relationship in `word/_rels/document.xml.rels`**
|
|
77
|
+
|
|
78
|
+
```xml
|
|
79
|
+
<Relationship Id="rId5"
|
|
80
|
+
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
|
|
81
|
+
Target="https://example.com"
|
|
82
|
+
TargetMode="External" />
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
**Common Corruption Causes:**
|
|
86
|
+
|
|
87
|
+
- **Orphaned Relationships:** `r:id` in document.xml has no matching relationship
|
|
88
|
+
- **Missing Relationships:** Hyperlink exists but relationship file not updated
|
|
89
|
+
- **Duplicate IDs:** Two different hyperlinks use same `r:id`
|
|
90
|
+
- **Wrong Relationship Type:** Using wrong `Type` attribute
|
|
91
|
+
|
|
92
|
+
**3. XML Structure Integrity**
|
|
93
|
+
|
|
94
|
+
Documents must maintain proper nesting:
|
|
95
|
+
|
|
96
|
+
```xml
|
|
97
|
+
<!-- CORRECT Structure -->
|
|
98
|
+
<w:p> <!-- Paragraph -->
|
|
99
|
+
<w:pPr> <!-- Paragraph properties -->
|
|
100
|
+
<w:pStyle w:val="Normal"/>
|
|
101
|
+
</w:pPr>
|
|
102
|
+
<w:r> <!-- Run (text container) -->
|
|
103
|
+
<w:rPr> <!-- Run properties -->
|
|
104
|
+
<w:b/> <!-- Bold -->
|
|
105
|
+
</w:rPr>
|
|
106
|
+
<w:t>Text</w:t> <!-- Text -->
|
|
107
|
+
</w:r>
|
|
108
|
+
</w:p>
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**Violation Example:**
|
|
112
|
+
|
|
113
|
+
```xml
|
|
114
|
+
<!-- WRONG - Run properties outside run -->
|
|
115
|
+
<w:p>
|
|
116
|
+
<w:rPr>
|
|
117
|
+
<w:b/>
|
|
118
|
+
</w:rPr>
|
|
119
|
+
<w:t>Text</w:t> <!-- Text not wrapped in run -->
|
|
120
|
+
</w:p>
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Required Investigation
|
|
124
|
+
|
|
125
|
+
### 1. Review All DOCX Manipulation Code
|
|
126
|
+
|
|
127
|
+
Examine all files that modify DOCX/XML structures:
|
|
128
|
+
|
|
129
|
+
**Identified Files (from project structure):**
|
|
130
|
+
|
|
131
|
+
- `src/services/document/DocumentProcessingComparison.ts`
|
|
132
|
+
- `src/services/HyperlinkService.ts`
|
|
133
|
+
- `electron/services/HyperlinkProcessor.ts`
|
|
134
|
+
- Any code using JSZip or xml2js to modify document.xml
|
|
135
|
+
|
|
136
|
+
**Check For:**
|
|
137
|
+
|
|
138
|
+
- [ ] Proper XML namespace usage (`w:`, `r:`, etc.)
|
|
139
|
+
- [ ] Synchronization between document.xml and document.xml.rels
|
|
140
|
+
- [ ] Correct element nesting (p → r → t)
|
|
141
|
+
- [ ] Relationship ID uniqueness
|
|
142
|
+
- [ ] Proper attribute escaping (XML special characters)
|
|
143
|
+
|
|
144
|
+
### 2. Validate Against OOXML Specification
|
|
145
|
+
|
|
146
|
+
**Key Documentation References:**
|
|
147
|
+
|
|
148
|
+
- Project: [`OOXML_HYPERLINK_ARCHITECTURE.md`](../OOXML_HYPERLINK_ARCHITECTURE.md)
|
|
149
|
+
- Project: [`docxmlater-functions-and-structure.md`](../docxmlater-functions-and-structure.md)
|
|
150
|
+
- External: [ECMA-376 Office Open XML Specification](http://www.ecma-international.org/publications/standards/Ecma-376.htm)
|
|
151
|
+
|
|
152
|
+
**Validation Checklist:**
|
|
153
|
+
|
|
154
|
+
- [ ] All hyperlinks have matching relationships
|
|
155
|
+
- [ ] No orphaned relationship IDs
|
|
156
|
+
- [ ] XML namespaces declared in root element
|
|
157
|
+
- [ ] Content-Type entries exist for all parts
|
|
158
|
+
- [ ] No duplicate relationship IDs across all .rels files
|
|
159
|
+
|
|
160
|
+
### 3. Implement Corruption Detection
|
|
161
|
+
|
|
162
|
+
Add validation before saving documents:
|
|
163
|
+
|
|
164
|
+
```typescript
|
|
165
|
+
// Proposed validation function
|
|
166
|
+
async function validateDocumentIntegrity(zipFile: JSZip): Promise<ValidationResult> {
|
|
167
|
+
const errors: string[] = [];
|
|
168
|
+
const warnings: string[] = [];
|
|
169
|
+
|
|
170
|
+
// Load main document and relationships
|
|
171
|
+
const documentXml = await zipFile.file('word/document.xml')?.async('text');
|
|
172
|
+
const relsXml = await zipFile.file('word/_rels/document.xml.rels')?.async('text');
|
|
173
|
+
|
|
174
|
+
if (!documentXml || !relsXml) {
|
|
175
|
+
errors.push('Missing required files: document.xml or document.xml.rels');
|
|
176
|
+
return { valid: false, errors, warnings };
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Parse XML
|
|
180
|
+
const doc = parseXML(documentXml);
|
|
181
|
+
const rels = parseXML(relsXml);
|
|
182
|
+
|
|
183
|
+
// Check 1: Validate all hyperlink r:id references exist in relationships
|
|
184
|
+
const hyperlinkIds = extractHyperlinkIds(doc);
|
|
185
|
+
const relationshipIds = extractRelationshipIds(rels);
|
|
186
|
+
|
|
187
|
+
for (const id of hyperlinkIds) {
|
|
188
|
+
if (!relationshipIds.has(id)) {
|
|
189
|
+
errors.push(`Orphaned hyperlink: r:id="${id}" has no matching relationship`);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Check 2: Validate XML namespace declarations
|
|
194
|
+
const requiredNamespaces = [
|
|
195
|
+
'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
|
|
196
|
+
'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
|
|
197
|
+
];
|
|
198
|
+
|
|
199
|
+
for (const ns of requiredNamespaces) {
|
|
200
|
+
if (!documentXml.includes(ns)) {
|
|
201
|
+
warnings.push(`Missing namespace declaration: ${ns}`);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Check 3: Validate element structure
|
|
206
|
+
const structureErrors = validateElementNesting(doc);
|
|
207
|
+
errors.push(...structureErrors);
|
|
208
|
+
|
|
209
|
+
return {
|
|
210
|
+
valid: errors.length === 0,
|
|
211
|
+
errors,
|
|
212
|
+
warnings,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### 4. Add Pre-Save Backup
|
|
218
|
+
|
|
219
|
+
Implement automatic backup before modifications:
|
|
220
|
+
|
|
221
|
+
```typescript
|
|
222
|
+
// Proposed safety mechanism
|
|
223
|
+
async function processDocumentSafely(filePath: string, operations: Operation[]): Promise<void> {
|
|
224
|
+
// Create backup
|
|
225
|
+
const backupPath = `${filePath}.backup`;
|
|
226
|
+
await fs.copyFile(filePath, backupPath);
|
|
227
|
+
|
|
228
|
+
try {
|
|
229
|
+
// Perform modifications
|
|
230
|
+
await modifyDocument(filePath, operations);
|
|
231
|
+
|
|
232
|
+
// Validate result
|
|
233
|
+
const zip = await JSZip.loadAsync(fs.readFileSync(filePath));
|
|
234
|
+
const validation = await validateDocumentIntegrity(zip);
|
|
235
|
+
|
|
236
|
+
if (!validation.valid) {
|
|
237
|
+
// Restore from backup
|
|
238
|
+
await fs.copyFile(backupPath, filePath);
|
|
239
|
+
throw new Error(`Document corruption detected: ${validation.errors.join(', ')}`);
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// Success - remove backup
|
|
243
|
+
await fs.unlink(backupPath);
|
|
244
|
+
} catch (error) {
|
|
245
|
+
// Restore from backup on any error
|
|
246
|
+
await fs.copyFile(backupPath, filePath);
|
|
247
|
+
throw error;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
## Common Corruption Scenarios
|
|
253
|
+
|
|
254
|
+
Based on OOXML documentation, these are the most frequent causes:
|
|
255
|
+
|
|
256
|
+
### Scenario 1: Hyperlink Content ID Appending
|
|
257
|
+
|
|
258
|
+
**Problem:** Adding `_content` to hyperlink IDs without updating relationships
|
|
259
|
+
|
|
260
|
+
**Bad Code:**
|
|
261
|
+
|
|
262
|
+
```typescript
|
|
263
|
+
// Modifies document.xml hyperlink
|
|
264
|
+
hyperlink.setAttribute('r:id', `${originalId}_content`);
|
|
265
|
+
// But doesn't create new relationship in document.xml.rels!
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
**Fix:**
|
|
269
|
+
|
|
270
|
+
```typescript
|
|
271
|
+
// 1. Create new relationship
|
|
272
|
+
const newRelId = await addRelationship(relsXml, {
|
|
273
|
+
type: 'hyperlink',
|
|
274
|
+
target: originalTarget + '#_content',
|
|
275
|
+
targetMode: 'External',
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
// 2. Update hyperlink reference
|
|
279
|
+
hyperlink.setAttribute('r:id', newRelId);
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
### Scenario 2: Text Replacement Breaking Structure
|
|
283
|
+
|
|
284
|
+
**Problem:** Replacing text without maintaining run structure
|
|
285
|
+
|
|
286
|
+
**Bad Code:**
|
|
287
|
+
|
|
288
|
+
```typescript
|
|
289
|
+
textNode.textContent = newText; // Loses formatting
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
**Fix:**
|
|
293
|
+
|
|
294
|
+
```typescript
|
|
295
|
+
// Preserve run structure
|
|
296
|
+
const run = textNode.closest('w:r');
|
|
297
|
+
const newRun = createRun(newText, {
|
|
298
|
+
bold: run.querySelector('w:b') !== null,
|
|
299
|
+
italic: run.querySelector('w:i') !== null,
|
|
300
|
+
// ... copy all formatting
|
|
301
|
+
});
|
|
302
|
+
run.replaceWith(newRun);
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
### Scenario 3: Namespace Loss During Parsing
|
|
306
|
+
|
|
307
|
+
**Problem:** XML parser strips namespaces
|
|
308
|
+
|
|
309
|
+
**Bad Code:**
|
|
310
|
+
|
|
311
|
+
```typescript
|
|
312
|
+
const xml2js = require('xml2js');
|
|
313
|
+
const parser = new xml2js.Parser(); // Default settings strip namespaces!
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
**Fix:**
|
|
317
|
+
|
|
318
|
+
```typescript
|
|
319
|
+
const parser = new xml2js.Parser({
|
|
320
|
+
xmlns: true, // Preserve namespaces
|
|
321
|
+
explicitArray: false,
|
|
322
|
+
preserveChildrenOrder: true,
|
|
323
|
+
});
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
## Acceptance Criteria
|
|
327
|
+
|
|
328
|
+
- [ ] All DOCX manipulation code reviewed for OOXML compliance
|
|
329
|
+
- [ ] Document validation added before every save operation
|
|
330
|
+
- [ ] Automatic backup/restore implemented
|
|
331
|
+
- [ ] Test suite includes corrupted document detection
|
|
332
|
+
- [ ] Corruption errors logged with specific violation details
|
|
333
|
+
- [ ] All hyperlink modifications maintain two-part system integrity
|
|
334
|
+
- [ ] XML namespaces properly preserved during parsing/serialization
|
|
335
|
+
- [ ] Element nesting validated (p → r → t structure)
|
|
336
|
+
- [ ] No orphaned relationships remain after processing
|
|
337
|
+
- [ ] Processed documents open successfully in Microsoft Word
|
|
338
|
+
|
|
339
|
+
## Testing Strategy
|
|
340
|
+
|
|
341
|
+
**1. Corruption Detection Tests**
|
|
342
|
+
|
|
343
|
+
```typescript
|
|
344
|
+
describe('Document Integrity', () => {
|
|
345
|
+
it('should detect orphaned hyperlink IDs', async () => {
|
|
346
|
+
const doc = createDocumentWithOrphanedHyperlink();
|
|
347
|
+
const validation = await validateDocumentIntegrity(doc);
|
|
348
|
+
expect(validation.valid).toBe(false);
|
|
349
|
+
expect(validation.errors).toContain('Orphaned hyperlink');
|
|
350
|
+
});
|
|
351
|
+
|
|
352
|
+
it('should detect missing namespaces', async () => {
|
|
353
|
+
const doc = createDocumentWithoutNamespaces();
|
|
354
|
+
const validation = await validateDocumentIntegrity(doc);
|
|
355
|
+
expect(validation.warnings).toContain('Missing namespace declaration');
|
|
356
|
+
});
|
|
357
|
+
});
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
**2. Round-Trip Tests**
|
|
361
|
+
|
|
362
|
+
```typescript
|
|
363
|
+
it('should maintain document integrity after processing', async () => {
|
|
364
|
+
const original = await loadDocument('test.docx');
|
|
365
|
+
await processDocument(original, operations);
|
|
366
|
+
const processed = await loadDocument('test.docx');
|
|
367
|
+
|
|
368
|
+
// Document should still open in Word
|
|
369
|
+
const validation = await validateDocumentIntegrity(processed);
|
|
370
|
+
expect(validation.valid).toBe(true);
|
|
371
|
+
});
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
**3. Real-World Document Tests**
|
|
375
|
+
|
|
376
|
+
- Test with documents containing complex structures (tables, images, TOC)
|
|
377
|
+
- Test with large documents (100+ pages)
|
|
378
|
+
- Test with documents from different Word versions (2007, 2013, 2016, 2019, 365)
|
|
379
|
+
|
|
380
|
+
## Estimated Effort
|
|
381
|
+
|
|
382
|
+
**Phase 1: Investigation** (4 hours)
|
|
383
|
+
|
|
384
|
+
- Audit all DOCX manipulation code
|
|
385
|
+
- Identify current violation patterns
|
|
386
|
+
- Document specific corruption scenarios
|
|
387
|
+
|
|
388
|
+
**Phase 2: Implementation** (8 hours)
|
|
389
|
+
|
|
390
|
+
- Add document validation function
|
|
391
|
+
- Implement backup/restore mechanism
|
|
392
|
+
- Fix identified OOXML violations
|
|
393
|
+
|
|
394
|
+
**Phase 3: Testing** (4 hours)
|
|
395
|
+
|
|
396
|
+
- Create corruption detection test suite
|
|
397
|
+
- Run round-trip tests on sample documents
|
|
398
|
+
- Validate against Word compatibility
|
|
399
|
+
|
|
400
|
+
**Total: 16 hours**
|
|
401
|
+
|
|
402
|
+
## References
|
|
403
|
+
|
|
404
|
+
- [`OOXML_HYPERLINK_ARCHITECTURE.md`](../OOXML_HYPERLINK_ARCHITECTURE.md) - Project's OOXML technical documentation
|
|
405
|
+
- [`docxmlater-functions-and-structure.md`](../docxmlater-functions-and-structure.md) - DOCX manipulation API reference
|
|
406
|
+
- [ECMA-376 Standard](http://www.ecma-international.org/publications/standards/Ecma-376.htm) - Official Office Open XML specification
|
|
407
|
+
- [Microsoft Office Dev Center](https://docs.microsoft.com/en-us/office/open-xml/structure-of-a-wordprocessingml-document) - OOXML structure guide
|