documentation-hub 5.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. package/.eslintrc.json +43 -0
  2. package/.github/workflows/build.yml +64 -0
  3. package/.github/workflows/ci.yml +39 -0
  4. package/.vscode/extensions.json +3 -0
  5. package/Current.md +97 -0
  6. package/DocHub_Image.png +0 -0
  7. package/README.md +666 -0
  8. package/USER_GUIDE.md +1173 -0
  9. package/Updater.md +311 -0
  10. package/build/256x256.png +0 -0
  11. package/build/512x512.png +0 -0
  12. package/build/app-update.yml +4 -0
  13. package/build/create-icon.js +208 -0
  14. package/build/icon.ico +0 -0
  15. package/build/icon.png +0 -0
  16. package/build/icon_1024x1024.png +0 -0
  17. package/dist/assets/Analytics-BpsG9895.js +1 -0
  18. package/dist/assets/Card-IAZin8kp.js +1 -0
  19. package/dist/assets/CurrentSession-B-rFkHvf.js +12 -0
  20. package/dist/assets/Dashboard-C_5gMb0q.js +1 -0
  21. package/dist/assets/Documents-CqZ25axS.js +1 -0
  22. package/dist/assets/Input-l89xwXBi.js +1 -0
  23. package/dist/assets/Reporting-DqdHJY_a.js +1 -0
  24. package/dist/assets/Search-XNbu5z_3.js +1 -0
  25. package/dist/assets/SessionManager-lH9hZfzH.js +1 -0
  26. package/dist/assets/Sessions-ClZOPYNc.js +1 -0
  27. package/dist/assets/Settings-DUEHGURa.js +11 -0
  28. package/dist/assets/index-8xUe8ptc.js +24 -0
  29. package/dist/assets/index-RYyJqF7O.css +1 -0
  30. package/dist/assets/path-BkOl0AGO.js +1 -0
  31. package/dist/assets/promises-ID_B9S-h.js +1 -0
  32. package/dist/assets/urlHelpers-TvgahX0r.js +1 -0
  33. package/dist/assets/useToast-yRSO1dkm.js +1 -0
  34. package/dist/assets/vendor-charts-RkGK5ROP.js +36 -0
  35. package/dist/assets/vendor-db-l0sNRNKZ.js +1 -0
  36. package/dist/assets/vendor-react-BVZ_anCF.js +4 -0
  37. package/dist/assets/vendor-search-Dw8P0qyA.js +1 -0
  38. package/dist/assets/vendor-ui-BU7NfluV.js +53 -0
  39. package/dist/electron/PowerAutomateApiService-LfW09ZGr.js +147 -0
  40. package/dist/electron/main-CXkNtyv-.js +19789 -0
  41. package/dist/electron/main.js +5 -0
  42. package/dist/electron/preload.js +1 -0
  43. package/dist/icon.png +0 -0
  44. package/dist/index.html +27 -0
  45. package/docs/CODEBASE_ANALYSIS_REPORT.md +309 -0
  46. package/docs/DEBUG_LOGGING_GUIDE.md +244 -0
  47. package/docs/README.md +115 -0
  48. package/docs/TOC_WIRING_GUIDE.md +344 -0
  49. package/docs/analysis/Bullet_Symbol_Bug_Analysis.md +136 -0
  50. package/docs/analysis/DOCXMLATER_ANALYSIS_SUMMARY.txt +169 -0
  51. package/docs/analysis/Document_Processing_Issues_Analysis.md +704 -0
  52. package/docs/analysis/FIELD_PRESERVATION_ANALYSIS.md +1200 -0
  53. package/docs/analysis/INDENTATION_PRESERVE_ANALYSIS.md +181 -0
  54. package/docs/analysis/INDENTATION_PRESERVE_IMPLEMENTATION.md +207 -0
  55. package/docs/analysis/List_Implementation.md +206 -0
  56. package/docs/analysis/List_Implementation_Accuracy_Report.md +366 -0
  57. package/docs/analysis/PROCESSING_OPTIONS_UI_UPDATES.md +220 -0
  58. package/docs/analysis/RefactorStyles.md +852 -0
  59. package/docs/analysis/STYLE_PARAMETER_ENHANCEMENT.md +143 -0
  60. package/docs/analysis/docxmlater-comparison-todo-2025-11-13.md +636 -0
  61. package/docs/analysis/docxmlater-implementation-analysis-2025-11-13.md +340 -0
  62. package/docs/analysis/docxmlater-template_ui-integration-analysis.md +263 -0
  63. package/docs/analysis/github-issues-to-create.md +237 -0
  64. package/docs/api/API_README.md +538 -0
  65. package/docs/api/API_REFERENCE.md +751 -0
  66. package/docs/api/TYPE_DEFINITIONS.md +869 -0
  67. package/docs/architecture/FONT_EMBEDDING_GUIDE.md +318 -0
  68. package/docs/architecture/docxmlater-functions-and-structure.md +726 -0
  69. package/docs/docxmlater-readme.md +1341 -0
  70. package/docs/fixes/EXECUTION_LOG_TEST_BASE.md +573 -0
  71. package/docs/fixes/HYPERLINK_TEXT_SANITIZATION.md +253 -0
  72. package/docs/fixes/README.md +37 -0
  73. package/docs/github-issues/issue-1-body.md +125 -0
  74. package/docs/github-issues/issue-10-body.md +850 -0
  75. package/docs/github-issues/issue-2-body.md +200 -0
  76. package/docs/github-issues/issue-3-body.md +270 -0
  77. package/docs/github-issues/issue-4-body.md +169 -0
  78. package/docs/github-issues/issue-5-body.md +173 -0
  79. package/docs/github-issues/issue-6-body.md +158 -0
  80. package/docs/github-issues/issue-7-body.md +171 -0
  81. package/docs/github-issues/issue-8-body.md +407 -0
  82. package/docs/github-issues/issue-9-body.md +515 -0
  83. package/docs/github-issues/issue-tracker.md +274 -0
  84. package/docs/github-issues/predictive-analysis-2025-10-18.md +2131 -0
  85. package/docs/implementation/List_Framework_Refactor_Plan.md +336 -0
  86. package/docs/implementation/PRIMARY_TEXT_COLOR_FEATURE.md +217 -0
  87. package/docs/implementation/RELEASE_PLAN_v2.1.0.md +362 -0
  88. package/docs/implementation/RefactorStyles.md +588 -0
  89. package/docs/implementation/implement-plan.md +489 -0
  90. package/docs/implementation/missing-helpers-implementation.md +391 -0
  91. package/docs/implementation/refactor-plan.md +520 -0
  92. package/docs/implementation/session-implementation-complete.md +233 -0
  93. package/docs/implementation/session-management-plan.md +250 -0
  94. package/docs/setup-checklist.md +77 -0
  95. package/docs/versions/changelog.md +345 -0
  96. package/electron/customUpdater.ts +656 -0
  97. package/electron/main.ts +2441 -0
  98. package/electron/memoryConfig.ts +187 -0
  99. package/electron/preload.ts +394 -0
  100. package/electron/proxyConfig.ts +340 -0
  101. package/electron/services/BackupService.ts +452 -0
  102. package/electron/services/DictionaryService.ts +402 -0
  103. package/electron/services/LocalDictionaryLookupService.ts +147 -0
  104. package/electron/services/PowerAutomateApiService.ts +231 -0
  105. package/electron/services/SharePointSyncService.ts +474 -0
  106. package/electron/windowsCertStore.ts +427 -0
  107. package/electron/zscalerConfig.ts +381 -0
  108. package/eslint.config.js +92 -0
  109. package/jest.config.js +52 -0
  110. package/package.json +214 -0
  111. package/postcss.config.mjs +6 -0
  112. package/public/icon.png +0 -0
  113. package/publish-release.ps1 +5 -0
  114. package/renovate.json +30 -0
  115. package/src/App.tsx +216 -0
  116. package/src/__mocks__/p-limit.js +12 -0
  117. package/src/__mocks__/styleMock.js +1 -0
  118. package/src/components/common/BugReportButton.tsx +44 -0
  119. package/src/components/common/BugReportDialog.tsx +193 -0
  120. package/src/components/common/Button.tsx +153 -0
  121. package/src/components/common/Card.tsx +86 -0
  122. package/src/components/common/ColorPickerDialog.tsx +177 -0
  123. package/src/components/common/ConfirmDialog.tsx +96 -0
  124. package/src/components/common/DebugConsole.tsx +275 -0
  125. package/src/components/common/EmptyState.tsx +183 -0
  126. package/src/components/common/ErrorBoundary.tsx +98 -0
  127. package/src/components/common/ErrorDetailsDialog.tsx +153 -0
  128. package/src/components/common/ErrorFallback.tsx +218 -0
  129. package/src/components/common/Input.tsx +109 -0
  130. package/src/components/common/Skeleton.tsx +184 -0
  131. package/src/components/common/SplashScreen.tsx +81 -0
  132. package/src/components/common/Toast.tsx +155 -0
  133. package/src/components/common/Tooltip.tsx +79 -0
  134. package/src/components/common/UpdateNotification.tsx +320 -0
  135. package/src/components/comparison/ComparisonWindow.tsx +374 -0
  136. package/src/components/comparison/SideBySideDiff.tsx +486 -0
  137. package/src/components/comparison/index.ts +8 -0
  138. package/src/components/document/DocumentUploader.tsx +288 -0
  139. package/src/components/document/HyperlinkPreview.tsx +430 -0
  140. package/src/components/document/HyperlinkService.md +1484 -0
  141. package/src/components/document/Hyperlink_Technical_Documentation.md +496 -0
  142. package/src/components/document/InlineChangesView.tsx +707 -0
  143. package/src/components/document/ProcessingProgress.tsx +303 -0
  144. package/src/components/document/ProcessingResults.tsx +256 -0
  145. package/src/components/document/TrackedChangesDetail.tsx +530 -0
  146. package/src/components/document/TrackedChangesPanel.tsx +546 -0
  147. package/src/components/document/VirtualDocumentList.tsx +240 -0
  148. package/src/components/editor/DocumentEditor.tsx +723 -0
  149. package/src/components/editor/DocumentEditorModal.tsx +640 -0
  150. package/src/components/editor/EditorQuickActions.tsx +502 -0
  151. package/src/components/editor/EditorToolbar.tsx +312 -0
  152. package/src/components/editor/TableEditor.tsx +926 -0
  153. package/src/components/editor/index.ts +18 -0
  154. package/src/components/layout/Header.tsx +190 -0
  155. package/src/components/layout/Sidebar.tsx +313 -0
  156. package/src/components/layout/TitleBar.tsx +190 -0
  157. package/src/components/navigation/CommandPalette.tsx +233 -0
  158. package/src/components/navigation/KeyboardShortcutsModal.tsx +173 -0
  159. package/src/components/sessions/ChangeItem.tsx +408 -0
  160. package/src/components/sessions/ChangeViewer.tsx +1155 -0
  161. package/src/components/sessions/DocumentComparisonModal.tsx +314 -0
  162. package/src/components/sessions/ProcessingOptions.tsx +297 -0
  163. package/src/components/sessions/ReplacementsTab.tsx +438 -0
  164. package/src/components/sessions/RevisionHandlingOptions.tsx +87 -0
  165. package/src/components/sessions/SessionManager.tsx +188 -0
  166. package/src/components/sessions/StylesEditor.tsx +1335 -0
  167. package/src/components/sessions/TabContainer.tsx +151 -0
  168. package/src/components/sessions/VirtualSessionList.tsx +157 -0
  169. package/src/components/sessions/sessionToProcessorManager.tsx +420 -0
  170. package/src/components/settings/CertificateManager.tsx +410 -0
  171. package/src/components/settings/SegmentedControl.tsx +88 -0
  172. package/src/components/settings/SettingRow.tsx +52 -0
  173. package/src/contexts/GlobalStatsContext.tsx +396 -0
  174. package/src/contexts/SessionContext.tsx +2129 -0
  175. package/src/contexts/ThemeContext.tsx +428 -0
  176. package/src/contexts/UserSettingsContext.tsx +290 -0
  177. package/src/contexts/__tests__/GlobalStatsContext.test.tsx +390 -0
  178. package/src/global.d.ts +273 -0
  179. package/src/hooks/useDocumentQueue.tsx +210 -0
  180. package/src/hooks/useToast.tsx +55 -0
  181. package/src/main.tsx +10 -0
  182. package/src/pages/Analytics.tsx +386 -0
  183. package/src/pages/CurrentSession.tsx +1174 -0
  184. package/src/pages/Dashboard.tsx +319 -0
  185. package/src/pages/Documents.tsx +317 -0
  186. package/src/pages/Projects.tsx +250 -0
  187. package/src/pages/Reporting.tsx +386 -0
  188. package/src/pages/Search.tsx +349 -0
  189. package/src/pages/Sessions.tsx +285 -0
  190. package/src/pages/Settings.tsx +2662 -0
  191. package/src/services/HyperlinkService.ts +1085 -0
  192. package/src/services/document/DocXMLaterProcessor.ts +617 -0
  193. package/src/services/document/DocumentProcessingComparison.ts +856 -0
  194. package/src/services/document/DocumentSnapshotService.ts +575 -0
  195. package/src/services/document/WordDocumentProcessor.ts +10509 -0
  196. package/src/services/document/__tests__/DocXMLaterProcessor.hyperlinks.test.md +311 -0
  197. package/src/services/document/__tests__/WordDocumentProcessor.integration.test.ts +515 -0
  198. package/src/services/document/__tests__/WordDocumentProcessor.test.ts +812 -0
  199. package/src/services/document/blanklines/BlankLineManager.ts +658 -0
  200. package/src/services/document/blanklines/__tests__/paragraphChecks.test.ts +281 -0
  201. package/src/services/document/blanklines/helpers/blankLineInsertion.ts +87 -0
  202. package/src/services/document/blanklines/helpers/blankLineSnapshot.ts +251 -0
  203. package/src/services/document/blanklines/helpers/clearCustom.ts +121 -0
  204. package/src/services/document/blanklines/helpers/contextChecks.ts +117 -0
  205. package/src/services/document/blanklines/helpers/imageChecks.ts +51 -0
  206. package/src/services/document/blanklines/helpers/paragraphChecks.ts +236 -0
  207. package/src/services/document/blanklines/helpers/removeBlanksBetweenListItems.ts +91 -0
  208. package/src/services/document/blanklines/helpers/removeTrailingBlanks.ts +35 -0
  209. package/src/services/document/blanklines/helpers/tableGuards.ts +21 -0
  210. package/src/services/document/blanklines/index.ts +67 -0
  211. package/src/services/document/blanklines/rules/additionRules.ts +337 -0
  212. package/src/services/document/blanklines/rules/indentationRules.ts +317 -0
  213. package/src/services/document/blanklines/rules/removalRules.ts +362 -0
  214. package/src/services/document/blanklines/rules/ruleTypes.ts +92 -0
  215. package/src/services/document/blanklines/types.ts +29 -0
  216. package/src/services/document/helpers/ImageBorderCropper.ts +377 -0
  217. package/src/services/document/helpers/__tests__/whitespace.test.ts +272 -0
  218. package/src/services/document/helpers/whitespace.ts +117 -0
  219. package/src/services/document/list/ListNormalizer.ts +947 -0
  220. package/src/services/document/list/index.ts +45 -0
  221. package/src/services/document/list/list-detection.ts +275 -0
  222. package/src/services/document/list/list-types.ts +162 -0
  223. package/src/services/document/processors/HyperlinkProcessor.ts +370 -0
  224. package/src/services/document/processors/ListProcessor.ts +257 -0
  225. package/src/services/document/processors/StructureProcessor.ts +176 -0
  226. package/src/services/document/processors/StyleProcessor.ts +389 -0
  227. package/src/services/document/processors/TableProcessor.ts +2238 -0
  228. package/src/services/document/processors/__tests__/HyperlinkProcessor.test.ts +314 -0
  229. package/src/services/document/processors/__tests__/ListProcessor.test.ts +291 -0
  230. package/src/services/document/processors/__tests__/StructureProcessor.test.ts +257 -0
  231. package/src/services/document/processors/__tests__/TableProcessor.hlp-tips-bullets.test.ts +459 -0
  232. package/src/services/document/processors/__tests__/TableProcessor.test.ts +1604 -0
  233. package/src/services/document/processors/index.ts +28 -0
  234. package/src/services/document/types/docx-processing.ts +310 -0
  235. package/src/services/editor/EditorActionHandlers.ts +901 -0
  236. package/src/services/editor/index.ts +13 -0
  237. package/src/setupTests.ts +47 -0
  238. package/src/styles/global.css +782 -0
  239. package/src/types/backup.ts +132 -0
  240. package/src/types/dictionary.ts +125 -0
  241. package/src/types/document-processing.ts +331 -0
  242. package/src/types/docxmlater-augments.d.ts +142 -0
  243. package/src/types/editor.ts +280 -0
  244. package/src/types/electron.ts +340 -0
  245. package/src/types/globalStats.ts +155 -0
  246. package/src/types/hyperlink.ts +471 -0
  247. package/src/types/operations.ts +354 -0
  248. package/src/types/session.ts +427 -0
  249. package/src/types/settings.ts +112 -0
  250. package/src/utils/MemoryMonitor.ts +248 -0
  251. package/src/utils/cn.ts +6 -0
  252. package/src/utils/colorConvert.ts +306 -0
  253. package/src/utils/diffUtils.ts +347 -0
  254. package/src/utils/documentUtils.ts +202 -0
  255. package/src/utils/electronGuard.ts +62 -0
  256. package/src/utils/indexedDB.ts +915 -0
  257. package/src/utils/logger.ts +717 -0
  258. package/src/utils/pathSecurity.ts +232 -0
  259. package/src/utils/pathValidator.ts +236 -0
  260. package/src/utils/processingTimeEstimator.ts +153 -0
  261. package/src/utils/safeJsonParse.ts +62 -0
  262. package/src/utils/textSanitizer.ts +162 -0
  263. package/src/utils/urlHelpers.ts +304 -0
  264. package/src/utils/urlPatterns.ts +198 -0
  265. package/src/utils/urlSanitizer.ts +152 -0
  266. package/src/vite-env.d.ts +11 -0
  267. package/tsconfig.electron.json +19 -0
  268. package/tsconfig.json +36 -0
  269. package/tsconfig.node.json +12 -0
  270. package/typedoc.json +45 -0
  271. package/vite.config.ts +152 -0
@@ -0,0 +1,617 @@
1
+ import {
2
+ Document,
3
+ isHyperlink,
4
+ isRevision,
5
+ isHyperlinkContent,
6
+ } from 'docxmlater';
7
+ import type { Hyperlink, Paragraph, Revision } from 'docxmlater';
8
+ import { ProcessorResult } from './types/docx-processing';
9
+ import { logger } from '@/utils/logger';
10
+
11
+ // Create namespaced logger for document processing operations
12
+ const log = logger.namespace('DocXMLater');
13
+
14
+ /**
15
+ * Configuration options for the DocXMLaterProcessor
16
+ *
17
+ * @interface DocXMLaterOptions
18
+ * @property {boolean} [preserveFormatting=true] - Preserve existing formatting when applying styles
19
+ * @property {boolean} [validateOutput=false] - Validate document structure before saving
20
+ */
21
+ export interface DocXMLaterOptions {
22
+ preserveFormatting?: boolean;
23
+ validateOutput?: boolean;
24
+ }
25
+
26
+ /**
27
+ * Main document processor class using the docxmlater library
28
+ *
29
+ * Provides a high-level API for DOCX document manipulation with comprehensive
30
+ * error handling, type safety, and performance optimizations.
31
+ *
32
+ * @class DocXMLaterProcessor
33
+ * @example
34
+ * ```typescript
35
+ * // Create processor with options
36
+ * const processor = new DocXMLaterProcessor({
37
+ * preserveFormatting: true,
38
+ * validateOutput: false
39
+ * });
40
+ *
41
+ * // Load and modify document
42
+ * const result = await processor.loadFromFile('input.docx');
43
+ * if (result.success) {
44
+ * // Work with document
45
+ * await processor.saveToFile(result.data, 'output.docx');
46
+ * }
47
+ * ```
48
+ */
49
+ export class DocXMLaterProcessor {
50
+ private defaultOptions: DocXMLaterOptions = {
51
+ preserveFormatting: true,
52
+ validateOutput: false,
53
+ };
54
+
55
+ /**
56
+ * Creates a new DocXMLaterProcessor instance
57
+ *
58
+ * @param {DocXMLaterOptions} [options={}] - Configuration options for the processor
59
+ * @param {boolean} [options.preserveFormatting=true] - Preserve existing formatting when applying styles
60
+ * @param {boolean} [options.validateOutput=false] - Validate document structure before saving
61
+ *
62
+ * @example
63
+ * ```typescript
64
+ * // Default options
65
+ * const processor = new DocXMLaterProcessor();
66
+ *
67
+ * // Custom options
68
+ * const strictProcessor = new DocXMLaterProcessor({
69
+ * preserveFormatting: false,
70
+ * validateOutput: true
71
+ * });
72
+ * ```
73
+ */
74
+ constructor(options: DocXMLaterOptions = {}) {
75
+ this.defaultOptions = { ...this.defaultOptions, ...options };
76
+ }
77
+
78
+ // ========== Document I/O Operations ==========
79
+
80
+ /**
81
+ * Load a DOCX document from a file path
82
+ *
83
+ * Uses the docxmlater framework defaults to ensure no corruption during loading.
84
+ * Supports both absolute and relative file paths.
85
+ *
86
+ * **IMPORTANT: Memory Management**
87
+ *
88
+ * Always call `dispose()` on the returned Document when finished to free resources
89
+ * and prevent memory leaks, especially in long-running applications or when processing
90
+ * multiple documents.
91
+ *
92
+ * @async
93
+ * @param {string} filePath - Absolute or relative path to the DOCX file
94
+ * @returns {Promise<ProcessorResult<Document>>} Result containing the loaded Document or error
95
+ *
96
+ * @group Document I/O
97
+ *
98
+ * @example
99
+ * ```typescript
100
+ * const processor = new DocXMLaterProcessor();
101
+ *
102
+ * // Load document
103
+ * const result = await processor.loadFromFile('./documents/report.docx');
104
+ *
105
+ * if (result.success) {
106
+ * console.log('Document loaded successfully');
107
+ * const doc = result.data;
108
+ * // Work with document...
109
+ * doc.dispose(); // Clean up when done
110
+ * } else {
111
+ * console.error('Failed to load:', result.error);
112
+ * }
113
+ * ```
114
+ *
115
+ * @see {@link Document} for document manipulation methods
116
+ * @see {@link ProcessorResult} for result handling
117
+ */
118
+ async loadFromFile(filePath: string): Promise<ProcessorResult<Document>> {
119
+ log.debug('Loading document from file', { filePath });
120
+ try {
121
+ // Use framework defaults to ensure no corruption
122
+ const doc = await Document.load(filePath, { strictParsing: false });
123
+ log.info('Document loaded successfully', { filePath });
124
+ return {
125
+ success: true,
126
+ data: doc,
127
+ };
128
+ } catch (error: any) {
129
+ log.error('Failed to load document', { filePath, error: error.message });
130
+ return {
131
+ success: false,
132
+ error: `Failed to load document: ${error.message}`,
133
+ };
134
+ }
135
+ }
136
+
137
+ /**
138
+ * Load a DOCX document from a Buffer object
139
+ *
140
+ * Useful for processing documents from memory, HTTP responses, or other sources
141
+ * that provide data as Buffer objects. Uses docxmlater framework defaults to
142
+ * ensure no corruption during loading.
143
+ *
144
+ * **IMPORTANT: Memory Management**
145
+ *
146
+ * Always call `dispose()` on the returned Document when finished to free resources
147
+ * and prevent memory leaks, especially in long-running applications or when processing
148
+ * multiple documents.
149
+ *
150
+ * @async
151
+ * @param {Buffer} buffer - Buffer containing the DOCX file data
152
+ * @returns {Promise<ProcessorResult<Document>>} Result containing the loaded Document or error
153
+ *
154
+ * @group Document I/O
155
+ *
156
+ * @example
157
+ * ```typescript
158
+ * const processor = new DocXMLaterProcessor();
159
+ *
160
+ * // Load from HTTP response
161
+ * const response = await fetch('https://example.com/document.docx');
162
+ * const arrayBuffer = await response.arrayBuffer();
163
+ * const buffer = Buffer.from(arrayBuffer);
164
+ *
165
+ * const result = await processor.loadFromBuffer(buffer);
166
+ * if (result.success) {
167
+ * const doc = result.data;
168
+ * // Work with document...
169
+ * doc.dispose(); // Clean up when done
170
+ * }
171
+ * ```
172
+ *
173
+ * @see {@link loadFromFile} for loading from file paths
174
+ * @see {@link Document} for document manipulation methods
175
+ */
176
+ async loadFromBuffer(buffer: Buffer): Promise<ProcessorResult<Document>> {
177
+ log.debug('Loading document from buffer', { bufferSize: buffer.length });
178
+ try {
179
+ // Use framework defaults to ensure no corruption
180
+ const doc = await Document.loadFromBuffer(buffer);
181
+ log.info('Document loaded from buffer successfully', { bufferSize: buffer.length });
182
+ return {
183
+ success: true,
184
+ data: doc,
185
+ };
186
+ } catch (error: any) {
187
+ log.error('Failed to load document from buffer', { bufferSize: buffer.length, error: error.message });
188
+ return {
189
+ success: false,
190
+ error: `Failed to load document from buffer: ${error.message}`,
191
+ };
192
+ }
193
+ }
194
+
195
+ /**
196
+ * Load a document with revision handling based on auto-accept setting.
197
+ *
198
+ * This method simplifies the common pattern of loading documents for processing
199
+ * where you need to handle tracked changes differently based on user preferences.
200
+ *
201
+ * **When acceptRevisions is TRUE:**
202
+ * - Document is loaded with revisions preserved for inspection
203
+ * - Revisions are then accepted using in-memory transformation
204
+ * - Document is clean and ready for modifications
205
+ * - Track changes is enabled for the specified author
206
+ *
207
+ * **When acceptRevisions is FALSE:**
208
+ * - Document is loaded with revisions preserved
209
+ * - All pre-existing tracked changes remain in the document
210
+ * - Track changes is enabled for the specified author
211
+ * - Both pre-existing AND new changes will be visible in Word
212
+ *
213
+ * @async
214
+ * @param filePath - Path to the DOCX file
215
+ * @param options - Revision handling options
216
+ * @param options.acceptRevisions - Whether to accept pre-existing revisions (default: false)
217
+ * @param options.author - Author name for tracked changes (default: 'Doc Hub')
218
+ * @param options.trackFormatting - Whether to track formatting changes (default: true)
219
+ * @returns ProcessorResult containing the loaded Document ready for processing
220
+ *
221
+ * @example
222
+ * ```typescript
223
+ * // Auto-Accept ON: Clean document, track DocHub changes
224
+ * const result = await processor.loadWithRevisionHandling('input.docx', {
225
+ * acceptRevisions: true,
226
+ * author: 'Doc Hub'
227
+ * });
228
+ *
229
+ * // Auto-Accept OFF: Preserve all revisions
230
+ * const result = await processor.loadWithRevisionHandling('input.docx', {
231
+ * acceptRevisions: false,
232
+ * author: 'Doc Hub'
233
+ * });
234
+ * ```
235
+ */
236
+ async loadWithRevisionHandling(
237
+ filePath: string,
238
+ options: {
239
+ acceptRevisions?: boolean;
240
+ author?: string;
241
+ trackFormatting?: boolean;
242
+ } = {}
243
+ ): Promise<ProcessorResult<Document>> {
244
+ const { acceptRevisions = false, author = 'Doc Hub', trackFormatting = true } = options;
245
+
246
+ log.debug('Loading document with revision handling', {
247
+ filePath,
248
+ acceptRevisions,
249
+ author,
250
+ });
251
+
252
+ try {
253
+ // Load document with appropriate revision handling
254
+ const doc = await Document.load(filePath, {
255
+ strictParsing: false,
256
+ acceptRevisions: acceptRevisions, // NEW: Uses in-memory acceptance if true
257
+ revisionHandling: acceptRevisions ? undefined : 'preserve', // Preserve if not accepting
258
+ });
259
+
260
+ // Enable track changes for subsequent modifications
261
+ doc.enableTrackChanges({
262
+ author,
263
+ trackFormatting,
264
+ });
265
+
266
+ log.info('Document loaded with revision handling', {
267
+ filePath,
268
+ acceptRevisions,
269
+ author,
270
+ });
271
+
272
+ return {
273
+ success: true,
274
+ data: doc,
275
+ };
276
+ } catch (error: any) {
277
+ log.error('Failed to load document with revision handling', {
278
+ filePath,
279
+ error: error.message,
280
+ });
281
+ return {
282
+ success: false,
283
+ error: `Failed to load document: ${error.message}`,
284
+ };
285
+ }
286
+ }
287
+
288
+ async saveToFile(doc: Document, filePath: string): Promise<ProcessorResult<void>> {
289
+ log.debug('Saving document to file', { filePath });
290
+ try {
291
+ await doc.save(filePath);
292
+ log.info('Document saved successfully', { filePath });
293
+ return {
294
+ success: true,
295
+ };
296
+ } catch (error: any) {
297
+ log.error('Failed to save document', { filePath, error: error.message });
298
+ return {
299
+ success: false,
300
+ error: `Failed to save document: ${error.message}`,
301
+ };
302
+ }
303
+ }
304
+
305
+ /**
306
+ * Save a Document to a file path with validation
307
+ *
308
+ * Performs atomic save operations with pre-save validation to prevent corruption and
309
+ * oversized files. Estimates document size and blocks saves exceeding thresholds.
310
+ * If the file already exists, it will be overwritten. The directory path must exist.
311
+ *
312
+ * @async
313
+ * @param {Document} doc - Document instance to save
314
+ * @param {string} filePath - Absolute or relative path where the DOCX file will be saved
315
+ * @param {Object} [options] - Validation options
316
+ * @param {number} [options.maxSizeMB=50] - Maximum allowed file size in MB (default: 50MB)
317
+ * @param {boolean} [options.warnOnLarge=true] - Log warnings for files >10MB (default: true)
318
+ * @returns {Promise<ProcessorResult<{sizeMB: number, warnings?: string[]}>>} Result with save confirmation and validation data
319
+ *
320
+ * @group Document I/O
321
+ *
322
+ * @example
323
+ * ```typescript
324
+ * const processor = new DocXMLaterProcessor();
325
+ *
326
+ * // Load and modify document
327
+ * const loadResult = await processor.loadFromFile('input.docx');
328
+ * if (loadResult.success) {
329
+ * const doc = loadResult.data;
330
+ *
331
+ * // Make modifications...
332
+ * doc.replaceText('old', 'new');
333
+ *
334
+ * // Save with validation
335
+ * const saveResult = await processor.saveToFileWithValidation(doc, 'output.docx');
336
+ * if (saveResult.success) {
337
+ * console.log(`Document saved (${saveResult.data.sizeMB.toFixed(2)}MB)`);
338
+ * if (saveResult.data.warnings?.length) {
339
+ * console.warn('Warnings:', saveResult.data.warnings);
340
+ * }
341
+ * } else {
342
+ * console.error('Save failed:', saveResult.error);
343
+ * }
344
+ *
345
+ * doc.dispose();
346
+ * }
347
+ * ```
348
+ */
349
+ async saveToFileWithValidation(
350
+ doc: Document,
351
+ filePath: string,
352
+ options?: {
353
+ maxSizeMB?: number;
354
+ warnOnLarge?: boolean;
355
+ }
356
+ ): Promise<
357
+ ProcessorResult<{
358
+ sizeMB: number;
359
+ warnings?: string[];
360
+ }>
361
+ > {
362
+ try {
363
+ const maxSizeMB = options?.maxSizeMB || 50;
364
+ const warnOnLarge = options?.warnOnLarge !== false;
365
+
366
+ // First validate size
367
+ const sizeResult = await this.estimateSize(doc);
368
+ if (!sizeResult.success || !sizeResult.data) {
369
+ return {
370
+ success: false,
371
+ error: `Size validation failed: ${sizeResult.error || 'No size data returned'}`,
372
+ };
373
+ }
374
+
375
+ const sizeData = sizeResult.data;
376
+ const sizeMB = sizeData.totalEstimatedMB;
377
+ const warnings: string[] = [];
378
+
379
+ // Check size limits
380
+ if (sizeMB > maxSizeMB) {
381
+ return {
382
+ success: false,
383
+ error: `Document size (${sizeMB.toFixed(2)}MB) exceeds maximum allowed size (${maxSizeMB}MB)`,
384
+ };
385
+ }
386
+
387
+ // Log warnings for large files
388
+ if (warnOnLarge && sizeMB > 10) {
389
+ warnings.push(`Large document size: ${sizeMB.toFixed(2)}MB (recommended <10MB)`);
390
+ }
391
+
392
+ if (sizeData.warning) {
393
+ warnings.push(sizeData.warning);
394
+ }
395
+
396
+ // Perform the save
397
+ const saveResult = await this.saveToFile(doc, filePath);
398
+ if (!saveResult.success) {
399
+ return {
400
+ success: false,
401
+ error: saveResult.error || 'Save operation failed',
402
+ };
403
+ }
404
+
405
+ return {
406
+ success: true,
407
+ data: {
408
+ sizeMB,
409
+ warnings: warnings.length > 0 ? warnings : undefined,
410
+ },
411
+ };
412
+ } catch (error: any) {
413
+ return {
414
+ success: false,
415
+ error: `Validation save failed: ${error.message}`,
416
+ };
417
+ }
418
+ }
419
+
420
+ // ========== Document Statistics ==========
421
+
422
+ /**
423
+ * Estimate document size before saving
424
+ *
425
+ * Calculates the estimated file size of the document without actually saving it.
426
+ * Useful for validating documents before save operations or checking size limits.
427
+ *
428
+ * @async
429
+ * @param {Document} doc - Document to estimate
430
+ * @returns {Promise<ProcessorResult<{totalEstimatedMB: number, warning?: string}>>} Result with size estimate or error
431
+ *
432
+ * @group Document Statistics
433
+ */
434
+ async estimateSize(doc: Document): Promise<
435
+ ProcessorResult<{
436
+ totalEstimatedMB: number;
437
+ warning?: string;
438
+ }>
439
+ > {
440
+ try {
441
+ const sizeEstimate = doc.estimateSize();
442
+
443
+ return {
444
+ success: true,
445
+ data: sizeEstimate,
446
+ };
447
+ } catch (error: any) {
448
+ return {
449
+ success: false,
450
+ error: `Failed to estimate size: ${error.message}`,
451
+ };
452
+ }
453
+ }
454
+
455
+ /**
456
+ * Create a new blank document
457
+ *
458
+ * Creates an empty document with default settings and styles.
459
+ * Always call dispose() on the returned Document when finished.
460
+ *
461
+ * @returns {Document} New blank Document instance
462
+ *
463
+ * @group Utilities
464
+ */
465
+ createNewDocument(): Document {
466
+ return Document.create();
467
+ }
468
+
469
+ // ========== Hyperlink Operations ==========
470
+
471
+ /**
472
+ * Extract all hyperlinks from a document
473
+ *
474
+ * **CRITICAL METHOD - DO NOT REMOVE**
475
+ * This method is required by WordDocumentProcessor for hyperlink processing operations.
476
+ * It extracts all hyperlinks from the document along with their context (paragraph, URL, text).
477
+ *
478
+ * The returned text is automatically sanitized using sanitizeHyperlinkText() to prevent
479
+ * XML parsing issues and ensure consistent formatting.
480
+ *
481
+ * @async
482
+ * @param {Document} doc - Document to extract hyperlinks from
483
+ * @returns {Promise<Array>} Array of hyperlink objects with structure:
484
+ * - hyperlink: The Hyperlink instance from docxmlater
485
+ * - paragraph: The Paragraph containing this hyperlink
486
+ * - paragraphIndex: Index of the paragraph in the document
487
+ * - url: The hyperlink URL (or undefined if internal/anchor)
488
+ * - text: Sanitized display text of the hyperlink
489
+ *
490
+ * @group Hyperlink Operations
491
+ *
492
+ * @example
493
+ * ```typescript
494
+ * const processor = new DocXMLaterProcessor();
495
+ * const doc = await Document.load('document.docx');
496
+ *
497
+ * const hyperlinks = await processor.extractHyperlinks(doc);
498
+ * console.log(`Found ${hyperlinks.length} hyperlinks`);
499
+ *
500
+ * for (const link of hyperlinks) {
501
+ * console.log(`Text: "${link.text}", URL: ${link.url}`);
502
+ * console.log(`Located in paragraph ${link.paragraphIndex}`);
503
+ * }
504
+ * ```
505
+ *
506
+ * @see {@link WordDocumentProcessor} - Uses this method for document processing
507
+ */
508
+ async extractHyperlinks(doc: Document): Promise<
509
+ Array<{
510
+ hyperlink: Hyperlink;
511
+ paragraph: Paragraph;
512
+ paragraphIndex: number;
513
+ hyperlinkIndexInParagraph: number; // Index of this hyperlink within its paragraph
514
+ url?: string;
515
+ text: string;
516
+ }>
517
+ > {
518
+ log.debug('Extracting hyperlinks from document');
519
+ // Dynamic import to avoid formatter issues with unused imports
520
+ const { sanitizeHyperlinkText } = await import('@/utils/textSanitizer');
521
+
522
+ const hyperlinks: Array<{
523
+ hyperlink: Hyperlink;
524
+ paragraph: Paragraph;
525
+ paragraphIndex: number;
526
+ hyperlinkIndexInParagraph: number;
527
+ url?: string;
528
+ text: string;
529
+ }> = [];
530
+
531
+ // Get all paragraphs from the document
532
+ const paragraphs = doc.getAllParagraphs();
533
+ log.debug('Scanning paragraphs for hyperlinks', { paragraphCount: paragraphs.length });
534
+
535
+ // Iterate through each paragraph to find hyperlinks
536
+ for (let i = 0; i < paragraphs.length; i++) {
537
+ const para = paragraphs[i];
538
+
539
+ // Get the content of the paragraph (can include Runs, Hyperlinks, Images, etc.)
540
+ const content = para.getContent();
541
+
542
+ // Track hyperlink index within this paragraph
543
+ let hyperlinkIndexInParagraph = 0;
544
+
545
+ // Helper function to extract URL from a hyperlink item
546
+ const extractUrlFromHyperlink = (hyperlinkItem: Hyperlink): string | undefined => {
547
+ let url = hyperlinkItem.getUrl();
548
+
549
+ // If getUrl() returns undefined, try fallback via relationship ID
550
+ // This handles file-type hyperlinks where the URL is stored in the relationship
551
+ if (!url) {
552
+ const relationshipId = hyperlinkItem.getRelationshipId?.();
553
+ if (relationshipId) {
554
+ // Log that URL couldn't be retrieved via primary API
555
+ // The caller may need to resolve the relationship externally
556
+ log.debug('URL not available via getUrl(), relationship lookup may be needed', {
557
+ relationshipId,
558
+ });
559
+ }
560
+ }
561
+
562
+ return url;
563
+ };
564
+
565
+ // Helper function to add a hyperlink to the results
566
+ const addHyperlink = (hyperlinkItem: Hyperlink, isInsideRevision: boolean = false) => {
567
+ const url = extractUrlFromHyperlink(hyperlinkItem);
568
+ const rawText = hyperlinkItem.getText() || '';
569
+ const sanitizedText = sanitizeHyperlinkText(rawText);
570
+
571
+ hyperlinks.push({
572
+ hyperlink: hyperlinkItem,
573
+ paragraph: para,
574
+ paragraphIndex: i,
575
+ hyperlinkIndexInParagraph,
576
+ url: url,
577
+ text: sanitizedText,
578
+ });
579
+
580
+ hyperlinkIndexInParagraph++;
581
+
582
+ if (isInsideRevision) {
583
+ log.debug('Found hyperlink inside revision element', { text: sanitizedText.substring(0, 50) });
584
+ }
585
+ };
586
+
587
+ // Check each content item for hyperlinks using proper type guards
588
+ for (const item of content) {
589
+ // Case 1: Direct Hyperlink instances
590
+ if (isHyperlink(item)) {
591
+ addHyperlink(item, false);
592
+ }
593
+ // Case 2: Hyperlinks inside Revision elements (w:ins, w:del tracked changes)
594
+ else if (isRevision(item)) {
595
+ const revisionContent = item.getContent();
596
+ for (const innerItem of revisionContent) {
597
+ // Check if the inner item is a Hyperlink using type guard
598
+ if (isHyperlinkContent(innerItem)) {
599
+ addHyperlink(innerItem, true);
600
+ }
601
+ }
602
+ }
603
+ }
604
+ }
605
+
606
+ // Log summary with type breakdown
607
+ const internalLinks = hyperlinks.filter((h) => !h.url).length;
608
+ const externalLinks = hyperlinks.filter((h) => h.url).length;
609
+ log.info('Hyperlinks extracted', {
610
+ total: hyperlinks.length,
611
+ external: externalLinks,
612
+ internal: internalLinks,
613
+ });
614
+
615
+ return hyperlinks;
616
+ }
617
+ }