documentation-hub 5.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +43 -0
- package/.github/workflows/build.yml +64 -0
- package/.github/workflows/ci.yml +39 -0
- package/.vscode/extensions.json +3 -0
- package/Current.md +97 -0
- package/DocHub_Image.png +0 -0
- package/README.md +666 -0
- package/USER_GUIDE.md +1173 -0
- package/Updater.md +311 -0
- package/build/256x256.png +0 -0
- package/build/512x512.png +0 -0
- package/build/app-update.yml +4 -0
- package/build/create-icon.js +208 -0
- package/build/icon.ico +0 -0
- package/build/icon.png +0 -0
- package/build/icon_1024x1024.png +0 -0
- package/dist/assets/Analytics-BpsG9895.js +1 -0
- package/dist/assets/Card-IAZin8kp.js +1 -0
- package/dist/assets/CurrentSession-B-rFkHvf.js +12 -0
- package/dist/assets/Dashboard-C_5gMb0q.js +1 -0
- package/dist/assets/Documents-CqZ25axS.js +1 -0
- package/dist/assets/Input-l89xwXBi.js +1 -0
- package/dist/assets/Reporting-DqdHJY_a.js +1 -0
- package/dist/assets/Search-XNbu5z_3.js +1 -0
- package/dist/assets/SessionManager-lH9hZfzH.js +1 -0
- package/dist/assets/Sessions-ClZOPYNc.js +1 -0
- package/dist/assets/Settings-DUEHGURa.js +11 -0
- package/dist/assets/index-8xUe8ptc.js +24 -0
- package/dist/assets/index-RYyJqF7O.css +1 -0
- package/dist/assets/path-BkOl0AGO.js +1 -0
- package/dist/assets/promises-ID_B9S-h.js +1 -0
- package/dist/assets/urlHelpers-TvgahX0r.js +1 -0
- package/dist/assets/useToast-yRSO1dkm.js +1 -0
- package/dist/assets/vendor-charts-RkGK5ROP.js +36 -0
- package/dist/assets/vendor-db-l0sNRNKZ.js +1 -0
- package/dist/assets/vendor-react-BVZ_anCF.js +4 -0
- package/dist/assets/vendor-search-Dw8P0qyA.js +1 -0
- package/dist/assets/vendor-ui-BU7NfluV.js +53 -0
- package/dist/electron/PowerAutomateApiService-LfW09ZGr.js +147 -0
- package/dist/electron/main-CXkNtyv-.js +19789 -0
- package/dist/electron/main.js +5 -0
- package/dist/electron/preload.js +1 -0
- package/dist/icon.png +0 -0
- package/dist/index.html +27 -0
- package/docs/CODEBASE_ANALYSIS_REPORT.md +309 -0
- package/docs/DEBUG_LOGGING_GUIDE.md +244 -0
- package/docs/README.md +115 -0
- package/docs/TOC_WIRING_GUIDE.md +344 -0
- package/docs/analysis/Bullet_Symbol_Bug_Analysis.md +136 -0
- package/docs/analysis/DOCXMLATER_ANALYSIS_SUMMARY.txt +169 -0
- package/docs/analysis/Document_Processing_Issues_Analysis.md +704 -0
- package/docs/analysis/FIELD_PRESERVATION_ANALYSIS.md +1200 -0
- package/docs/analysis/INDENTATION_PRESERVE_ANALYSIS.md +181 -0
- package/docs/analysis/INDENTATION_PRESERVE_IMPLEMENTATION.md +207 -0
- package/docs/analysis/List_Implementation.md +206 -0
- package/docs/analysis/List_Implementation_Accuracy_Report.md +366 -0
- package/docs/analysis/PROCESSING_OPTIONS_UI_UPDATES.md +220 -0
- package/docs/analysis/RefactorStyles.md +852 -0
- package/docs/analysis/STYLE_PARAMETER_ENHANCEMENT.md +143 -0
- package/docs/analysis/docxmlater-comparison-todo-2025-11-13.md +636 -0
- package/docs/analysis/docxmlater-implementation-analysis-2025-11-13.md +340 -0
- package/docs/analysis/docxmlater-template_ui-integration-analysis.md +263 -0
- package/docs/analysis/github-issues-to-create.md +237 -0
- package/docs/api/API_README.md +538 -0
- package/docs/api/API_REFERENCE.md +751 -0
- package/docs/api/TYPE_DEFINITIONS.md +869 -0
- package/docs/architecture/FONT_EMBEDDING_GUIDE.md +318 -0
- package/docs/architecture/docxmlater-functions-and-structure.md +726 -0
- package/docs/docxmlater-readme.md +1341 -0
- package/docs/fixes/EXECUTION_LOG_TEST_BASE.md +573 -0
- package/docs/fixes/HYPERLINK_TEXT_SANITIZATION.md +253 -0
- package/docs/fixes/README.md +37 -0
- package/docs/github-issues/issue-1-body.md +125 -0
- package/docs/github-issues/issue-10-body.md +850 -0
- package/docs/github-issues/issue-2-body.md +200 -0
- package/docs/github-issues/issue-3-body.md +270 -0
- package/docs/github-issues/issue-4-body.md +169 -0
- package/docs/github-issues/issue-5-body.md +173 -0
- package/docs/github-issues/issue-6-body.md +158 -0
- package/docs/github-issues/issue-7-body.md +171 -0
- package/docs/github-issues/issue-8-body.md +407 -0
- package/docs/github-issues/issue-9-body.md +515 -0
- package/docs/github-issues/issue-tracker.md +274 -0
- package/docs/github-issues/predictive-analysis-2025-10-18.md +2131 -0
- package/docs/implementation/List_Framework_Refactor_Plan.md +336 -0
- package/docs/implementation/PRIMARY_TEXT_COLOR_FEATURE.md +217 -0
- package/docs/implementation/RELEASE_PLAN_v2.1.0.md +362 -0
- package/docs/implementation/RefactorStyles.md +588 -0
- package/docs/implementation/implement-plan.md +489 -0
- package/docs/implementation/missing-helpers-implementation.md +391 -0
- package/docs/implementation/refactor-plan.md +520 -0
- package/docs/implementation/session-implementation-complete.md +233 -0
- package/docs/implementation/session-management-plan.md +250 -0
- package/docs/setup-checklist.md +77 -0
- package/docs/versions/changelog.md +345 -0
- package/electron/customUpdater.ts +656 -0
- package/electron/main.ts +2441 -0
- package/electron/memoryConfig.ts +187 -0
- package/electron/preload.ts +394 -0
- package/electron/proxyConfig.ts +340 -0
- package/electron/services/BackupService.ts +452 -0
- package/electron/services/DictionaryService.ts +402 -0
- package/electron/services/LocalDictionaryLookupService.ts +147 -0
- package/electron/services/PowerAutomateApiService.ts +231 -0
- package/electron/services/SharePointSyncService.ts +474 -0
- package/electron/windowsCertStore.ts +427 -0
- package/electron/zscalerConfig.ts +381 -0
- package/eslint.config.js +92 -0
- package/jest.config.js +52 -0
- package/package.json +214 -0
- package/postcss.config.mjs +6 -0
- package/public/icon.png +0 -0
- package/publish-release.ps1 +5 -0
- package/renovate.json +30 -0
- package/src/App.tsx +216 -0
- package/src/__mocks__/p-limit.js +12 -0
- package/src/__mocks__/styleMock.js +1 -0
- package/src/components/common/BugReportButton.tsx +44 -0
- package/src/components/common/BugReportDialog.tsx +193 -0
- package/src/components/common/Button.tsx +153 -0
- package/src/components/common/Card.tsx +86 -0
- package/src/components/common/ColorPickerDialog.tsx +177 -0
- package/src/components/common/ConfirmDialog.tsx +96 -0
- package/src/components/common/DebugConsole.tsx +275 -0
- package/src/components/common/EmptyState.tsx +183 -0
- package/src/components/common/ErrorBoundary.tsx +98 -0
- package/src/components/common/ErrorDetailsDialog.tsx +153 -0
- package/src/components/common/ErrorFallback.tsx +218 -0
- package/src/components/common/Input.tsx +109 -0
- package/src/components/common/Skeleton.tsx +184 -0
- package/src/components/common/SplashScreen.tsx +81 -0
- package/src/components/common/Toast.tsx +155 -0
- package/src/components/common/Tooltip.tsx +79 -0
- package/src/components/common/UpdateNotification.tsx +320 -0
- package/src/components/comparison/ComparisonWindow.tsx +374 -0
- package/src/components/comparison/SideBySideDiff.tsx +486 -0
- package/src/components/comparison/index.ts +8 -0
- package/src/components/document/DocumentUploader.tsx +288 -0
- package/src/components/document/HyperlinkPreview.tsx +430 -0
- package/src/components/document/HyperlinkService.md +1484 -0
- package/src/components/document/Hyperlink_Technical_Documentation.md +496 -0
- package/src/components/document/InlineChangesView.tsx +707 -0
- package/src/components/document/ProcessingProgress.tsx +303 -0
- package/src/components/document/ProcessingResults.tsx +256 -0
- package/src/components/document/TrackedChangesDetail.tsx +530 -0
- package/src/components/document/TrackedChangesPanel.tsx +546 -0
- package/src/components/document/VirtualDocumentList.tsx +240 -0
- package/src/components/editor/DocumentEditor.tsx +723 -0
- package/src/components/editor/DocumentEditorModal.tsx +640 -0
- package/src/components/editor/EditorQuickActions.tsx +502 -0
- package/src/components/editor/EditorToolbar.tsx +312 -0
- package/src/components/editor/TableEditor.tsx +926 -0
- package/src/components/editor/index.ts +18 -0
- package/src/components/layout/Header.tsx +190 -0
- package/src/components/layout/Sidebar.tsx +313 -0
- package/src/components/layout/TitleBar.tsx +190 -0
- package/src/components/navigation/CommandPalette.tsx +233 -0
- package/src/components/navigation/KeyboardShortcutsModal.tsx +173 -0
- package/src/components/sessions/ChangeItem.tsx +408 -0
- package/src/components/sessions/ChangeViewer.tsx +1155 -0
- package/src/components/sessions/DocumentComparisonModal.tsx +314 -0
- package/src/components/sessions/ProcessingOptions.tsx +297 -0
- package/src/components/sessions/ReplacementsTab.tsx +438 -0
- package/src/components/sessions/RevisionHandlingOptions.tsx +87 -0
- package/src/components/sessions/SessionManager.tsx +188 -0
- package/src/components/sessions/StylesEditor.tsx +1335 -0
- package/src/components/sessions/TabContainer.tsx +151 -0
- package/src/components/sessions/VirtualSessionList.tsx +157 -0
- package/src/components/sessions/sessionToProcessorManager.tsx +420 -0
- package/src/components/settings/CertificateManager.tsx +410 -0
- package/src/components/settings/SegmentedControl.tsx +88 -0
- package/src/components/settings/SettingRow.tsx +52 -0
- package/src/contexts/GlobalStatsContext.tsx +396 -0
- package/src/contexts/SessionContext.tsx +2129 -0
- package/src/contexts/ThemeContext.tsx +428 -0
- package/src/contexts/UserSettingsContext.tsx +290 -0
- package/src/contexts/__tests__/GlobalStatsContext.test.tsx +390 -0
- package/src/global.d.ts +273 -0
- package/src/hooks/useDocumentQueue.tsx +210 -0
- package/src/hooks/useToast.tsx +55 -0
- package/src/main.tsx +10 -0
- package/src/pages/Analytics.tsx +386 -0
- package/src/pages/CurrentSession.tsx +1174 -0
- package/src/pages/Dashboard.tsx +319 -0
- package/src/pages/Documents.tsx +317 -0
- package/src/pages/Projects.tsx +250 -0
- package/src/pages/Reporting.tsx +386 -0
- package/src/pages/Search.tsx +349 -0
- package/src/pages/Sessions.tsx +285 -0
- package/src/pages/Settings.tsx +2662 -0
- package/src/services/HyperlinkService.ts +1085 -0
- package/src/services/document/DocXMLaterProcessor.ts +617 -0
- package/src/services/document/DocumentProcessingComparison.ts +856 -0
- package/src/services/document/DocumentSnapshotService.ts +575 -0
- package/src/services/document/WordDocumentProcessor.ts +10509 -0
- package/src/services/document/__tests__/DocXMLaterProcessor.hyperlinks.test.md +311 -0
- package/src/services/document/__tests__/WordDocumentProcessor.integration.test.ts +515 -0
- package/src/services/document/__tests__/WordDocumentProcessor.test.ts +812 -0
- package/src/services/document/blanklines/BlankLineManager.ts +658 -0
- package/src/services/document/blanklines/__tests__/paragraphChecks.test.ts +281 -0
- package/src/services/document/blanklines/helpers/blankLineInsertion.ts +87 -0
- package/src/services/document/blanklines/helpers/blankLineSnapshot.ts +251 -0
- package/src/services/document/blanklines/helpers/clearCustom.ts +121 -0
- package/src/services/document/blanklines/helpers/contextChecks.ts +117 -0
- package/src/services/document/blanklines/helpers/imageChecks.ts +51 -0
- package/src/services/document/blanklines/helpers/paragraphChecks.ts +236 -0
- package/src/services/document/blanklines/helpers/removeBlanksBetweenListItems.ts +91 -0
- package/src/services/document/blanklines/helpers/removeTrailingBlanks.ts +35 -0
- package/src/services/document/blanklines/helpers/tableGuards.ts +21 -0
- package/src/services/document/blanklines/index.ts +67 -0
- package/src/services/document/blanklines/rules/additionRules.ts +337 -0
- package/src/services/document/blanklines/rules/indentationRules.ts +317 -0
- package/src/services/document/blanklines/rules/removalRules.ts +362 -0
- package/src/services/document/blanklines/rules/ruleTypes.ts +92 -0
- package/src/services/document/blanklines/types.ts +29 -0
- package/src/services/document/helpers/ImageBorderCropper.ts +377 -0
- package/src/services/document/helpers/__tests__/whitespace.test.ts +272 -0
- package/src/services/document/helpers/whitespace.ts +117 -0
- package/src/services/document/list/ListNormalizer.ts +947 -0
- package/src/services/document/list/index.ts +45 -0
- package/src/services/document/list/list-detection.ts +275 -0
- package/src/services/document/list/list-types.ts +162 -0
- package/src/services/document/processors/HyperlinkProcessor.ts +370 -0
- package/src/services/document/processors/ListProcessor.ts +257 -0
- package/src/services/document/processors/StructureProcessor.ts +176 -0
- package/src/services/document/processors/StyleProcessor.ts +389 -0
- package/src/services/document/processors/TableProcessor.ts +2238 -0
- package/src/services/document/processors/__tests__/HyperlinkProcessor.test.ts +314 -0
- package/src/services/document/processors/__tests__/ListProcessor.test.ts +291 -0
- package/src/services/document/processors/__tests__/StructureProcessor.test.ts +257 -0
- package/src/services/document/processors/__tests__/TableProcessor.hlp-tips-bullets.test.ts +459 -0
- package/src/services/document/processors/__tests__/TableProcessor.test.ts +1604 -0
- package/src/services/document/processors/index.ts +28 -0
- package/src/services/document/types/docx-processing.ts +310 -0
- package/src/services/editor/EditorActionHandlers.ts +901 -0
- package/src/services/editor/index.ts +13 -0
- package/src/setupTests.ts +47 -0
- package/src/styles/global.css +782 -0
- package/src/types/backup.ts +132 -0
- package/src/types/dictionary.ts +125 -0
- package/src/types/document-processing.ts +331 -0
- package/src/types/docxmlater-augments.d.ts +142 -0
- package/src/types/editor.ts +280 -0
- package/src/types/electron.ts +340 -0
- package/src/types/globalStats.ts +155 -0
- package/src/types/hyperlink.ts +471 -0
- package/src/types/operations.ts +354 -0
- package/src/types/session.ts +427 -0
- package/src/types/settings.ts +112 -0
- package/src/utils/MemoryMonitor.ts +248 -0
- package/src/utils/cn.ts +6 -0
- package/src/utils/colorConvert.ts +306 -0
- package/src/utils/diffUtils.ts +347 -0
- package/src/utils/documentUtils.ts +202 -0
- package/src/utils/electronGuard.ts +62 -0
- package/src/utils/indexedDB.ts +915 -0
- package/src/utils/logger.ts +717 -0
- package/src/utils/pathSecurity.ts +232 -0
- package/src/utils/pathValidator.ts +236 -0
- package/src/utils/processingTimeEstimator.ts +153 -0
- package/src/utils/safeJsonParse.ts +62 -0
- package/src/utils/textSanitizer.ts +162 -0
- package/src/utils/urlHelpers.ts +304 -0
- package/src/utils/urlPatterns.ts +198 -0
- package/src/utils/urlSanitizer.ts +152 -0
- package/src/vite-env.d.ts +11 -0
- package/tsconfig.electron.json +19 -0
- package/tsconfig.json +36 -0
- package/tsconfig.node.json +12 -0
- package/typedoc.json +45 -0
- package/vite.config.ts +152 -0
|
@@ -0,0 +1,617 @@
|
|
|
1
|
+
import {
|
|
2
|
+
Document,
|
|
3
|
+
isHyperlink,
|
|
4
|
+
isRevision,
|
|
5
|
+
isHyperlinkContent,
|
|
6
|
+
} from 'docxmlater';
|
|
7
|
+
import type { Hyperlink, Paragraph, Revision } from 'docxmlater';
|
|
8
|
+
import { ProcessorResult } from './types/docx-processing';
|
|
9
|
+
import { logger } from '@/utils/logger';
|
|
10
|
+
|
|
11
|
+
// Create namespaced logger for document processing operations
|
|
12
|
+
const log = logger.namespace('DocXMLater');
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Configuration options for the DocXMLaterProcessor
|
|
16
|
+
*
|
|
17
|
+
* @interface DocXMLaterOptions
|
|
18
|
+
* @property {boolean} [preserveFormatting=true] - Preserve existing formatting when applying styles
|
|
19
|
+
* @property {boolean} [validateOutput=false] - Validate document structure before saving
|
|
20
|
+
*/
|
|
21
|
+
export interface DocXMLaterOptions {
|
|
22
|
+
preserveFormatting?: boolean;
|
|
23
|
+
validateOutput?: boolean;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Main document processor class using the docxmlater library
|
|
28
|
+
*
|
|
29
|
+
* Provides a high-level API for DOCX document manipulation with comprehensive
|
|
30
|
+
* error handling, type safety, and performance optimizations.
|
|
31
|
+
*
|
|
32
|
+
* @class DocXMLaterProcessor
|
|
33
|
+
* @example
|
|
34
|
+
* ```typescript
|
|
35
|
+
* // Create processor with options
|
|
36
|
+
* const processor = new DocXMLaterProcessor({
|
|
37
|
+
* preserveFormatting: true,
|
|
38
|
+
* validateOutput: false
|
|
39
|
+
* });
|
|
40
|
+
*
|
|
41
|
+
* // Load and modify document
|
|
42
|
+
* const result = await processor.loadFromFile('input.docx');
|
|
43
|
+
* if (result.success) {
|
|
44
|
+
* // Work with document
|
|
45
|
+
* await processor.saveToFile(result.data, 'output.docx');
|
|
46
|
+
* }
|
|
47
|
+
* ```
|
|
48
|
+
*/
|
|
49
|
+
export class DocXMLaterProcessor {
|
|
50
|
+
private defaultOptions: DocXMLaterOptions = {
|
|
51
|
+
preserveFormatting: true,
|
|
52
|
+
validateOutput: false,
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Creates a new DocXMLaterProcessor instance
|
|
57
|
+
*
|
|
58
|
+
* @param {DocXMLaterOptions} [options={}] - Configuration options for the processor
|
|
59
|
+
* @param {boolean} [options.preserveFormatting=true] - Preserve existing formatting when applying styles
|
|
60
|
+
* @param {boolean} [options.validateOutput=false] - Validate document structure before saving
|
|
61
|
+
*
|
|
62
|
+
* @example
|
|
63
|
+
* ```typescript
|
|
64
|
+
* // Default options
|
|
65
|
+
* const processor = new DocXMLaterProcessor();
|
|
66
|
+
*
|
|
67
|
+
* // Custom options
|
|
68
|
+
* const strictProcessor = new DocXMLaterProcessor({
|
|
69
|
+
* preserveFormatting: false,
|
|
70
|
+
* validateOutput: true
|
|
71
|
+
* });
|
|
72
|
+
* ```
|
|
73
|
+
*/
|
|
74
|
+
constructor(options: DocXMLaterOptions = {}) {
|
|
75
|
+
this.defaultOptions = { ...this.defaultOptions, ...options };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// ========== Document I/O Operations ==========
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Load a DOCX document from a file path
|
|
82
|
+
*
|
|
83
|
+
* Uses the docxmlater framework defaults to ensure no corruption during loading.
|
|
84
|
+
* Supports both absolute and relative file paths.
|
|
85
|
+
*
|
|
86
|
+
* **IMPORTANT: Memory Management**
|
|
87
|
+
*
|
|
88
|
+
* Always call `dispose()` on the returned Document when finished to free resources
|
|
89
|
+
* and prevent memory leaks, especially in long-running applications or when processing
|
|
90
|
+
* multiple documents.
|
|
91
|
+
*
|
|
92
|
+
* @async
|
|
93
|
+
* @param {string} filePath - Absolute or relative path to the DOCX file
|
|
94
|
+
* @returns {Promise<ProcessorResult<Document>>} Result containing the loaded Document or error
|
|
95
|
+
*
|
|
96
|
+
* @group Document I/O
|
|
97
|
+
*
|
|
98
|
+
* @example
|
|
99
|
+
* ```typescript
|
|
100
|
+
* const processor = new DocXMLaterProcessor();
|
|
101
|
+
*
|
|
102
|
+
* // Load document
|
|
103
|
+
* const result = await processor.loadFromFile('./documents/report.docx');
|
|
104
|
+
*
|
|
105
|
+
* if (result.success) {
|
|
106
|
+
* console.log('Document loaded successfully');
|
|
107
|
+
* const doc = result.data;
|
|
108
|
+
* // Work with document...
|
|
109
|
+
* doc.dispose(); // Clean up when done
|
|
110
|
+
* } else {
|
|
111
|
+
* console.error('Failed to load:', result.error);
|
|
112
|
+
* }
|
|
113
|
+
* ```
|
|
114
|
+
*
|
|
115
|
+
* @see {@link Document} for document manipulation methods
|
|
116
|
+
* @see {@link ProcessorResult} for result handling
|
|
117
|
+
*/
|
|
118
|
+
async loadFromFile(filePath: string): Promise<ProcessorResult<Document>> {
|
|
119
|
+
log.debug('Loading document from file', { filePath });
|
|
120
|
+
try {
|
|
121
|
+
// Use framework defaults to ensure no corruption
|
|
122
|
+
const doc = await Document.load(filePath, { strictParsing: false });
|
|
123
|
+
log.info('Document loaded successfully', { filePath });
|
|
124
|
+
return {
|
|
125
|
+
success: true,
|
|
126
|
+
data: doc,
|
|
127
|
+
};
|
|
128
|
+
} catch (error: any) {
|
|
129
|
+
log.error('Failed to load document', { filePath, error: error.message });
|
|
130
|
+
return {
|
|
131
|
+
success: false,
|
|
132
|
+
error: `Failed to load document: ${error.message}`,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Load a DOCX document from a Buffer object
|
|
139
|
+
*
|
|
140
|
+
* Useful for processing documents from memory, HTTP responses, or other sources
|
|
141
|
+
* that provide data as Buffer objects. Uses docxmlater framework defaults to
|
|
142
|
+
* ensure no corruption during loading.
|
|
143
|
+
*
|
|
144
|
+
* **IMPORTANT: Memory Management**
|
|
145
|
+
*
|
|
146
|
+
* Always call `dispose()` on the returned Document when finished to free resources
|
|
147
|
+
* and prevent memory leaks, especially in long-running applications or when processing
|
|
148
|
+
* multiple documents.
|
|
149
|
+
*
|
|
150
|
+
* @async
|
|
151
|
+
* @param {Buffer} buffer - Buffer containing the DOCX file data
|
|
152
|
+
* @returns {Promise<ProcessorResult<Document>>} Result containing the loaded Document or error
|
|
153
|
+
*
|
|
154
|
+
* @group Document I/O
|
|
155
|
+
*
|
|
156
|
+
* @example
|
|
157
|
+
* ```typescript
|
|
158
|
+
* const processor = new DocXMLaterProcessor();
|
|
159
|
+
*
|
|
160
|
+
* // Load from HTTP response
|
|
161
|
+
* const response = await fetch('https://example.com/document.docx');
|
|
162
|
+
* const arrayBuffer = await response.arrayBuffer();
|
|
163
|
+
* const buffer = Buffer.from(arrayBuffer);
|
|
164
|
+
*
|
|
165
|
+
* const result = await processor.loadFromBuffer(buffer);
|
|
166
|
+
* if (result.success) {
|
|
167
|
+
* const doc = result.data;
|
|
168
|
+
* // Work with document...
|
|
169
|
+
* doc.dispose(); // Clean up when done
|
|
170
|
+
* }
|
|
171
|
+
* ```
|
|
172
|
+
*
|
|
173
|
+
* @see {@link loadFromFile} for loading from file paths
|
|
174
|
+
* @see {@link Document} for document manipulation methods
|
|
175
|
+
*/
|
|
176
|
+
async loadFromBuffer(buffer: Buffer): Promise<ProcessorResult<Document>> {
|
|
177
|
+
log.debug('Loading document from buffer', { bufferSize: buffer.length });
|
|
178
|
+
try {
|
|
179
|
+
// Use framework defaults to ensure no corruption
|
|
180
|
+
const doc = await Document.loadFromBuffer(buffer);
|
|
181
|
+
log.info('Document loaded from buffer successfully', { bufferSize: buffer.length });
|
|
182
|
+
return {
|
|
183
|
+
success: true,
|
|
184
|
+
data: doc,
|
|
185
|
+
};
|
|
186
|
+
} catch (error: any) {
|
|
187
|
+
log.error('Failed to load document from buffer', { bufferSize: buffer.length, error: error.message });
|
|
188
|
+
return {
|
|
189
|
+
success: false,
|
|
190
|
+
error: `Failed to load document from buffer: ${error.message}`,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Load a document with revision handling based on auto-accept setting.
|
|
197
|
+
*
|
|
198
|
+
* This method simplifies the common pattern of loading documents for processing
|
|
199
|
+
* where you need to handle tracked changes differently based on user preferences.
|
|
200
|
+
*
|
|
201
|
+
* **When acceptRevisions is TRUE:**
|
|
202
|
+
* - Document is loaded with revisions preserved for inspection
|
|
203
|
+
* - Revisions are then accepted using in-memory transformation
|
|
204
|
+
* - Document is clean and ready for modifications
|
|
205
|
+
* - Track changes is enabled for the specified author
|
|
206
|
+
*
|
|
207
|
+
* **When acceptRevisions is FALSE:**
|
|
208
|
+
* - Document is loaded with revisions preserved
|
|
209
|
+
* - All pre-existing tracked changes remain in the document
|
|
210
|
+
* - Track changes is enabled for the specified author
|
|
211
|
+
* - Both pre-existing AND new changes will be visible in Word
|
|
212
|
+
*
|
|
213
|
+
* @async
|
|
214
|
+
* @param filePath - Path to the DOCX file
|
|
215
|
+
* @param options - Revision handling options
|
|
216
|
+
* @param options.acceptRevisions - Whether to accept pre-existing revisions (default: false)
|
|
217
|
+
* @param options.author - Author name for tracked changes (default: 'Doc Hub')
|
|
218
|
+
* @param options.trackFormatting - Whether to track formatting changes (default: true)
|
|
219
|
+
* @returns ProcessorResult containing the loaded Document ready for processing
|
|
220
|
+
*
|
|
221
|
+
* @example
|
|
222
|
+
* ```typescript
|
|
223
|
+
* // Auto-Accept ON: Clean document, track DocHub changes
|
|
224
|
+
* const result = await processor.loadWithRevisionHandling('input.docx', {
|
|
225
|
+
* acceptRevisions: true,
|
|
226
|
+
* author: 'Doc Hub'
|
|
227
|
+
* });
|
|
228
|
+
*
|
|
229
|
+
* // Auto-Accept OFF: Preserve all revisions
|
|
230
|
+
* const result = await processor.loadWithRevisionHandling('input.docx', {
|
|
231
|
+
* acceptRevisions: false,
|
|
232
|
+
* author: 'Doc Hub'
|
|
233
|
+
* });
|
|
234
|
+
* ```
|
|
235
|
+
*/
|
|
236
|
+
async loadWithRevisionHandling(
|
|
237
|
+
filePath: string,
|
|
238
|
+
options: {
|
|
239
|
+
acceptRevisions?: boolean;
|
|
240
|
+
author?: string;
|
|
241
|
+
trackFormatting?: boolean;
|
|
242
|
+
} = {}
|
|
243
|
+
): Promise<ProcessorResult<Document>> {
|
|
244
|
+
const { acceptRevisions = false, author = 'Doc Hub', trackFormatting = true } = options;
|
|
245
|
+
|
|
246
|
+
log.debug('Loading document with revision handling', {
|
|
247
|
+
filePath,
|
|
248
|
+
acceptRevisions,
|
|
249
|
+
author,
|
|
250
|
+
});
|
|
251
|
+
|
|
252
|
+
try {
|
|
253
|
+
// Load document with appropriate revision handling
|
|
254
|
+
const doc = await Document.load(filePath, {
|
|
255
|
+
strictParsing: false,
|
|
256
|
+
acceptRevisions: acceptRevisions, // NEW: Uses in-memory acceptance if true
|
|
257
|
+
revisionHandling: acceptRevisions ? undefined : 'preserve', // Preserve if not accepting
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
// Enable track changes for subsequent modifications
|
|
261
|
+
doc.enableTrackChanges({
|
|
262
|
+
author,
|
|
263
|
+
trackFormatting,
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
log.info('Document loaded with revision handling', {
|
|
267
|
+
filePath,
|
|
268
|
+
acceptRevisions,
|
|
269
|
+
author,
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
return {
|
|
273
|
+
success: true,
|
|
274
|
+
data: doc,
|
|
275
|
+
};
|
|
276
|
+
} catch (error: any) {
|
|
277
|
+
log.error('Failed to load document with revision handling', {
|
|
278
|
+
filePath,
|
|
279
|
+
error: error.message,
|
|
280
|
+
});
|
|
281
|
+
return {
|
|
282
|
+
success: false,
|
|
283
|
+
error: `Failed to load document: ${error.message}`,
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
async saveToFile(doc: Document, filePath: string): Promise<ProcessorResult<void>> {
|
|
289
|
+
log.debug('Saving document to file', { filePath });
|
|
290
|
+
try {
|
|
291
|
+
await doc.save(filePath);
|
|
292
|
+
log.info('Document saved successfully', { filePath });
|
|
293
|
+
return {
|
|
294
|
+
success: true,
|
|
295
|
+
};
|
|
296
|
+
} catch (error: any) {
|
|
297
|
+
log.error('Failed to save document', { filePath, error: error.message });
|
|
298
|
+
return {
|
|
299
|
+
success: false,
|
|
300
|
+
error: `Failed to save document: ${error.message}`,
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Save a Document to a file path with validation
|
|
307
|
+
*
|
|
308
|
+
* Performs atomic save operations with pre-save validation to prevent corruption and
|
|
309
|
+
* oversized files. Estimates document size and blocks saves exceeding thresholds.
|
|
310
|
+
* If the file already exists, it will be overwritten. The directory path must exist.
|
|
311
|
+
*
|
|
312
|
+
* @async
|
|
313
|
+
* @param {Document} doc - Document instance to save
|
|
314
|
+
* @param {string} filePath - Absolute or relative path where the DOCX file will be saved
|
|
315
|
+
* @param {Object} [options] - Validation options
|
|
316
|
+
* @param {number} [options.maxSizeMB=50] - Maximum allowed file size in MB (default: 50MB)
|
|
317
|
+
* @param {boolean} [options.warnOnLarge=true] - Log warnings for files >10MB (default: true)
|
|
318
|
+
* @returns {Promise<ProcessorResult<{sizeMB: number, warnings?: string[]}>>} Result with save confirmation and validation data
|
|
319
|
+
*
|
|
320
|
+
* @group Document I/O
|
|
321
|
+
*
|
|
322
|
+
* @example
|
|
323
|
+
* ```typescript
|
|
324
|
+
* const processor = new DocXMLaterProcessor();
|
|
325
|
+
*
|
|
326
|
+
* // Load and modify document
|
|
327
|
+
* const loadResult = await processor.loadFromFile('input.docx');
|
|
328
|
+
* if (loadResult.success) {
|
|
329
|
+
* const doc = loadResult.data;
|
|
330
|
+
*
|
|
331
|
+
* // Make modifications...
|
|
332
|
+
* doc.replaceText('old', 'new');
|
|
333
|
+
*
|
|
334
|
+
* // Save with validation
|
|
335
|
+
* const saveResult = await processor.saveToFileWithValidation(doc, 'output.docx');
|
|
336
|
+
* if (saveResult.success) {
|
|
337
|
+
* console.log(`Document saved (${saveResult.data.sizeMB.toFixed(2)}MB)`);
|
|
338
|
+
* if (saveResult.data.warnings?.length) {
|
|
339
|
+
* console.warn('Warnings:', saveResult.data.warnings);
|
|
340
|
+
* }
|
|
341
|
+
* } else {
|
|
342
|
+
* console.error('Save failed:', saveResult.error);
|
|
343
|
+
* }
|
|
344
|
+
*
|
|
345
|
+
* doc.dispose();
|
|
346
|
+
* }
|
|
347
|
+
* ```
|
|
348
|
+
*/
|
|
349
|
+
async saveToFileWithValidation(
|
|
350
|
+
doc: Document,
|
|
351
|
+
filePath: string,
|
|
352
|
+
options?: {
|
|
353
|
+
maxSizeMB?: number;
|
|
354
|
+
warnOnLarge?: boolean;
|
|
355
|
+
}
|
|
356
|
+
): Promise<
|
|
357
|
+
ProcessorResult<{
|
|
358
|
+
sizeMB: number;
|
|
359
|
+
warnings?: string[];
|
|
360
|
+
}>
|
|
361
|
+
> {
|
|
362
|
+
try {
|
|
363
|
+
const maxSizeMB = options?.maxSizeMB || 50;
|
|
364
|
+
const warnOnLarge = options?.warnOnLarge !== false;
|
|
365
|
+
|
|
366
|
+
// First validate size
|
|
367
|
+
const sizeResult = await this.estimateSize(doc);
|
|
368
|
+
if (!sizeResult.success || !sizeResult.data) {
|
|
369
|
+
return {
|
|
370
|
+
success: false,
|
|
371
|
+
error: `Size validation failed: ${sizeResult.error || 'No size data returned'}`,
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
const sizeData = sizeResult.data;
|
|
376
|
+
const sizeMB = sizeData.totalEstimatedMB;
|
|
377
|
+
const warnings: string[] = [];
|
|
378
|
+
|
|
379
|
+
// Check size limits
|
|
380
|
+
if (sizeMB > maxSizeMB) {
|
|
381
|
+
return {
|
|
382
|
+
success: false,
|
|
383
|
+
error: `Document size (${sizeMB.toFixed(2)}MB) exceeds maximum allowed size (${maxSizeMB}MB)`,
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// Log warnings for large files
|
|
388
|
+
if (warnOnLarge && sizeMB > 10) {
|
|
389
|
+
warnings.push(`Large document size: ${sizeMB.toFixed(2)}MB (recommended <10MB)`);
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
if (sizeData.warning) {
|
|
393
|
+
warnings.push(sizeData.warning);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
// Perform the save
|
|
397
|
+
const saveResult = await this.saveToFile(doc, filePath);
|
|
398
|
+
if (!saveResult.success) {
|
|
399
|
+
return {
|
|
400
|
+
success: false,
|
|
401
|
+
error: saveResult.error || 'Save operation failed',
|
|
402
|
+
};
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
return {
|
|
406
|
+
success: true,
|
|
407
|
+
data: {
|
|
408
|
+
sizeMB,
|
|
409
|
+
warnings: warnings.length > 0 ? warnings : undefined,
|
|
410
|
+
},
|
|
411
|
+
};
|
|
412
|
+
} catch (error: any) {
|
|
413
|
+
return {
|
|
414
|
+
success: false,
|
|
415
|
+
error: `Validation save failed: ${error.message}`,
|
|
416
|
+
};
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
// ========== Document Statistics ==========
|
|
421
|
+
|
|
422
|
+
/**
|
|
423
|
+
* Estimate document size before saving
|
|
424
|
+
*
|
|
425
|
+
* Calculates the estimated file size of the document without actually saving it.
|
|
426
|
+
* Useful for validating documents before save operations or checking size limits.
|
|
427
|
+
*
|
|
428
|
+
* @async
|
|
429
|
+
* @param {Document} doc - Document to estimate
|
|
430
|
+
* @returns {Promise<ProcessorResult<{totalEstimatedMB: number, warning?: string}>>} Result with size estimate or error
|
|
431
|
+
*
|
|
432
|
+
* @group Document Statistics
|
|
433
|
+
*/
|
|
434
|
+
async estimateSize(doc: Document): Promise<
|
|
435
|
+
ProcessorResult<{
|
|
436
|
+
totalEstimatedMB: number;
|
|
437
|
+
warning?: string;
|
|
438
|
+
}>
|
|
439
|
+
> {
|
|
440
|
+
try {
|
|
441
|
+
const sizeEstimate = doc.estimateSize();
|
|
442
|
+
|
|
443
|
+
return {
|
|
444
|
+
success: true,
|
|
445
|
+
data: sizeEstimate,
|
|
446
|
+
};
|
|
447
|
+
} catch (error: any) {
|
|
448
|
+
return {
|
|
449
|
+
success: false,
|
|
450
|
+
error: `Failed to estimate size: ${error.message}`,
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/**
|
|
456
|
+
* Create a new blank document
|
|
457
|
+
*
|
|
458
|
+
* Creates an empty document with default settings and styles.
|
|
459
|
+
* Always call dispose() on the returned Document when finished.
|
|
460
|
+
*
|
|
461
|
+
* @returns {Document} New blank Document instance
|
|
462
|
+
*
|
|
463
|
+
* @group Utilities
|
|
464
|
+
*/
|
|
465
|
+
createNewDocument(): Document {
|
|
466
|
+
return Document.create();
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// ========== Hyperlink Operations ==========
|
|
470
|
+
|
|
471
|
+
/**
|
|
472
|
+
* Extract all hyperlinks from a document
|
|
473
|
+
*
|
|
474
|
+
* **CRITICAL METHOD - DO NOT REMOVE**
|
|
475
|
+
* This method is required by WordDocumentProcessor for hyperlink processing operations.
|
|
476
|
+
* It extracts all hyperlinks from the document along with their context (paragraph, URL, text).
|
|
477
|
+
*
|
|
478
|
+
* The returned text is automatically sanitized using sanitizeHyperlinkText() to prevent
|
|
479
|
+
* XML parsing issues and ensure consistent formatting.
|
|
480
|
+
*
|
|
481
|
+
* @async
|
|
482
|
+
* @param {Document} doc - Document to extract hyperlinks from
|
|
483
|
+
* @returns {Promise<Array>} Array of hyperlink objects with structure:
|
|
484
|
+
* - hyperlink: The Hyperlink instance from docxmlater
|
|
485
|
+
* - paragraph: The Paragraph containing this hyperlink
|
|
486
|
+
* - paragraphIndex: Index of the paragraph in the document
|
|
487
|
+
* - url: The hyperlink URL (or undefined if internal/anchor)
|
|
488
|
+
* - text: Sanitized display text of the hyperlink
|
|
489
|
+
*
|
|
490
|
+
* @group Hyperlink Operations
|
|
491
|
+
*
|
|
492
|
+
* @example
|
|
493
|
+
* ```typescript
|
|
494
|
+
* const processor = new DocXMLaterProcessor();
|
|
495
|
+
* const doc = await Document.load('document.docx');
|
|
496
|
+
*
|
|
497
|
+
* const hyperlinks = await processor.extractHyperlinks(doc);
|
|
498
|
+
* console.log(`Found ${hyperlinks.length} hyperlinks`);
|
|
499
|
+
*
|
|
500
|
+
* for (const link of hyperlinks) {
|
|
501
|
+
* console.log(`Text: "${link.text}", URL: ${link.url}`);
|
|
502
|
+
* console.log(`Located in paragraph ${link.paragraphIndex}`);
|
|
503
|
+
* }
|
|
504
|
+
* ```
|
|
505
|
+
*
|
|
506
|
+
* @see {@link WordDocumentProcessor} - Uses this method for document processing
|
|
507
|
+
*/
|
|
508
|
+
async extractHyperlinks(doc: Document): Promise<
|
|
509
|
+
Array<{
|
|
510
|
+
hyperlink: Hyperlink;
|
|
511
|
+
paragraph: Paragraph;
|
|
512
|
+
paragraphIndex: number;
|
|
513
|
+
hyperlinkIndexInParagraph: number; // Index of this hyperlink within its paragraph
|
|
514
|
+
url?: string;
|
|
515
|
+
text: string;
|
|
516
|
+
}>
|
|
517
|
+
> {
|
|
518
|
+
log.debug('Extracting hyperlinks from document');
|
|
519
|
+
// Dynamic import to avoid formatter issues with unused imports
|
|
520
|
+
const { sanitizeHyperlinkText } = await import('@/utils/textSanitizer');
|
|
521
|
+
|
|
522
|
+
const hyperlinks: Array<{
|
|
523
|
+
hyperlink: Hyperlink;
|
|
524
|
+
paragraph: Paragraph;
|
|
525
|
+
paragraphIndex: number;
|
|
526
|
+
hyperlinkIndexInParagraph: number;
|
|
527
|
+
url?: string;
|
|
528
|
+
text: string;
|
|
529
|
+
}> = [];
|
|
530
|
+
|
|
531
|
+
// Get all paragraphs from the document
|
|
532
|
+
const paragraphs = doc.getAllParagraphs();
|
|
533
|
+
log.debug('Scanning paragraphs for hyperlinks', { paragraphCount: paragraphs.length });
|
|
534
|
+
|
|
535
|
+
// Iterate through each paragraph to find hyperlinks
|
|
536
|
+
for (let i = 0; i < paragraphs.length; i++) {
|
|
537
|
+
const para = paragraphs[i];
|
|
538
|
+
|
|
539
|
+
// Get the content of the paragraph (can include Runs, Hyperlinks, Images, etc.)
|
|
540
|
+
const content = para.getContent();
|
|
541
|
+
|
|
542
|
+
// Track hyperlink index within this paragraph
|
|
543
|
+
let hyperlinkIndexInParagraph = 0;
|
|
544
|
+
|
|
545
|
+
// Helper function to extract URL from a hyperlink item
|
|
546
|
+
const extractUrlFromHyperlink = (hyperlinkItem: Hyperlink): string | undefined => {
|
|
547
|
+
let url = hyperlinkItem.getUrl();
|
|
548
|
+
|
|
549
|
+
// If getUrl() returns undefined, try fallback via relationship ID
|
|
550
|
+
// This handles file-type hyperlinks where the URL is stored in the relationship
|
|
551
|
+
if (!url) {
|
|
552
|
+
const relationshipId = hyperlinkItem.getRelationshipId?.();
|
|
553
|
+
if (relationshipId) {
|
|
554
|
+
// Log that URL couldn't be retrieved via primary API
|
|
555
|
+
// The caller may need to resolve the relationship externally
|
|
556
|
+
log.debug('URL not available via getUrl(), relationship lookup may be needed', {
|
|
557
|
+
relationshipId,
|
|
558
|
+
});
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
return url;
|
|
563
|
+
};
|
|
564
|
+
|
|
565
|
+
// Helper function to add a hyperlink to the results
|
|
566
|
+
const addHyperlink = (hyperlinkItem: Hyperlink, isInsideRevision: boolean = false) => {
|
|
567
|
+
const url = extractUrlFromHyperlink(hyperlinkItem);
|
|
568
|
+
const rawText = hyperlinkItem.getText() || '';
|
|
569
|
+
const sanitizedText = sanitizeHyperlinkText(rawText);
|
|
570
|
+
|
|
571
|
+
hyperlinks.push({
|
|
572
|
+
hyperlink: hyperlinkItem,
|
|
573
|
+
paragraph: para,
|
|
574
|
+
paragraphIndex: i,
|
|
575
|
+
hyperlinkIndexInParagraph,
|
|
576
|
+
url: url,
|
|
577
|
+
text: sanitizedText,
|
|
578
|
+
});
|
|
579
|
+
|
|
580
|
+
hyperlinkIndexInParagraph++;
|
|
581
|
+
|
|
582
|
+
if (isInsideRevision) {
|
|
583
|
+
log.debug('Found hyperlink inside revision element', { text: sanitizedText.substring(0, 50) });
|
|
584
|
+
}
|
|
585
|
+
};
|
|
586
|
+
|
|
587
|
+
// Check each content item for hyperlinks using proper type guards
|
|
588
|
+
for (const item of content) {
|
|
589
|
+
// Case 1: Direct Hyperlink instances
|
|
590
|
+
if (isHyperlink(item)) {
|
|
591
|
+
addHyperlink(item, false);
|
|
592
|
+
}
|
|
593
|
+
// Case 2: Hyperlinks inside Revision elements (w:ins, w:del tracked changes)
|
|
594
|
+
else if (isRevision(item)) {
|
|
595
|
+
const revisionContent = item.getContent();
|
|
596
|
+
for (const innerItem of revisionContent) {
|
|
597
|
+
// Check if the inner item is a Hyperlink using type guard
|
|
598
|
+
if (isHyperlinkContent(innerItem)) {
|
|
599
|
+
addHyperlink(innerItem, true);
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
// Log summary with type breakdown
|
|
607
|
+
const internalLinks = hyperlinks.filter((h) => !h.url).length;
|
|
608
|
+
const externalLinks = hyperlinks.filter((h) => h.url).length;
|
|
609
|
+
log.info('Hyperlinks extracted', {
|
|
610
|
+
total: hyperlinks.length,
|
|
611
|
+
external: externalLinks,
|
|
612
|
+
internal: internalLinks,
|
|
613
|
+
});
|
|
614
|
+
|
|
615
|
+
return hyperlinks;
|
|
616
|
+
}
|
|
617
|
+
}
|