documentation-hub 5.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. package/.eslintrc.json +43 -0
  2. package/.github/workflows/build.yml +64 -0
  3. package/.github/workflows/ci.yml +39 -0
  4. package/.vscode/extensions.json +3 -0
  5. package/Current.md +97 -0
  6. package/DocHub_Image.png +0 -0
  7. package/README.md +666 -0
  8. package/USER_GUIDE.md +1173 -0
  9. package/Updater.md +311 -0
  10. package/build/256x256.png +0 -0
  11. package/build/512x512.png +0 -0
  12. package/build/app-update.yml +4 -0
  13. package/build/create-icon.js +208 -0
  14. package/build/icon.ico +0 -0
  15. package/build/icon.png +0 -0
  16. package/build/icon_1024x1024.png +0 -0
  17. package/dist/assets/Analytics-BpsG9895.js +1 -0
  18. package/dist/assets/Card-IAZin8kp.js +1 -0
  19. package/dist/assets/CurrentSession-B-rFkHvf.js +12 -0
  20. package/dist/assets/Dashboard-C_5gMb0q.js +1 -0
  21. package/dist/assets/Documents-CqZ25axS.js +1 -0
  22. package/dist/assets/Input-l89xwXBi.js +1 -0
  23. package/dist/assets/Reporting-DqdHJY_a.js +1 -0
  24. package/dist/assets/Search-XNbu5z_3.js +1 -0
  25. package/dist/assets/SessionManager-lH9hZfzH.js +1 -0
  26. package/dist/assets/Sessions-ClZOPYNc.js +1 -0
  27. package/dist/assets/Settings-DUEHGURa.js +11 -0
  28. package/dist/assets/index-8xUe8ptc.js +24 -0
  29. package/dist/assets/index-RYyJqF7O.css +1 -0
  30. package/dist/assets/path-BkOl0AGO.js +1 -0
  31. package/dist/assets/promises-ID_B9S-h.js +1 -0
  32. package/dist/assets/urlHelpers-TvgahX0r.js +1 -0
  33. package/dist/assets/useToast-yRSO1dkm.js +1 -0
  34. package/dist/assets/vendor-charts-RkGK5ROP.js +36 -0
  35. package/dist/assets/vendor-db-l0sNRNKZ.js +1 -0
  36. package/dist/assets/vendor-react-BVZ_anCF.js +4 -0
  37. package/dist/assets/vendor-search-Dw8P0qyA.js +1 -0
  38. package/dist/assets/vendor-ui-BU7NfluV.js +53 -0
  39. package/dist/electron/PowerAutomateApiService-LfW09ZGr.js +147 -0
  40. package/dist/electron/main-CXkNtyv-.js +19789 -0
  41. package/dist/electron/main.js +5 -0
  42. package/dist/electron/preload.js +1 -0
  43. package/dist/icon.png +0 -0
  44. package/dist/index.html +27 -0
  45. package/docs/CODEBASE_ANALYSIS_REPORT.md +309 -0
  46. package/docs/DEBUG_LOGGING_GUIDE.md +244 -0
  47. package/docs/README.md +115 -0
  48. package/docs/TOC_WIRING_GUIDE.md +344 -0
  49. package/docs/analysis/Bullet_Symbol_Bug_Analysis.md +136 -0
  50. package/docs/analysis/DOCXMLATER_ANALYSIS_SUMMARY.txt +169 -0
  51. package/docs/analysis/Document_Processing_Issues_Analysis.md +704 -0
  52. package/docs/analysis/FIELD_PRESERVATION_ANALYSIS.md +1200 -0
  53. package/docs/analysis/INDENTATION_PRESERVE_ANALYSIS.md +181 -0
  54. package/docs/analysis/INDENTATION_PRESERVE_IMPLEMENTATION.md +207 -0
  55. package/docs/analysis/List_Implementation.md +206 -0
  56. package/docs/analysis/List_Implementation_Accuracy_Report.md +366 -0
  57. package/docs/analysis/PROCESSING_OPTIONS_UI_UPDATES.md +220 -0
  58. package/docs/analysis/RefactorStyles.md +852 -0
  59. package/docs/analysis/STYLE_PARAMETER_ENHANCEMENT.md +143 -0
  60. package/docs/analysis/docxmlater-comparison-todo-2025-11-13.md +636 -0
  61. package/docs/analysis/docxmlater-implementation-analysis-2025-11-13.md +340 -0
  62. package/docs/analysis/docxmlater-template_ui-integration-analysis.md +263 -0
  63. package/docs/analysis/github-issues-to-create.md +237 -0
  64. package/docs/api/API_README.md +538 -0
  65. package/docs/api/API_REFERENCE.md +751 -0
  66. package/docs/api/TYPE_DEFINITIONS.md +869 -0
  67. package/docs/architecture/FONT_EMBEDDING_GUIDE.md +318 -0
  68. package/docs/architecture/docxmlater-functions-and-structure.md +726 -0
  69. package/docs/docxmlater-readme.md +1341 -0
  70. package/docs/fixes/EXECUTION_LOG_TEST_BASE.md +573 -0
  71. package/docs/fixes/HYPERLINK_TEXT_SANITIZATION.md +253 -0
  72. package/docs/fixes/README.md +37 -0
  73. package/docs/github-issues/issue-1-body.md +125 -0
  74. package/docs/github-issues/issue-10-body.md +850 -0
  75. package/docs/github-issues/issue-2-body.md +200 -0
  76. package/docs/github-issues/issue-3-body.md +270 -0
  77. package/docs/github-issues/issue-4-body.md +169 -0
  78. package/docs/github-issues/issue-5-body.md +173 -0
  79. package/docs/github-issues/issue-6-body.md +158 -0
  80. package/docs/github-issues/issue-7-body.md +171 -0
  81. package/docs/github-issues/issue-8-body.md +407 -0
  82. package/docs/github-issues/issue-9-body.md +515 -0
  83. package/docs/github-issues/issue-tracker.md +274 -0
  84. package/docs/github-issues/predictive-analysis-2025-10-18.md +2131 -0
  85. package/docs/implementation/List_Framework_Refactor_Plan.md +336 -0
  86. package/docs/implementation/PRIMARY_TEXT_COLOR_FEATURE.md +217 -0
  87. package/docs/implementation/RELEASE_PLAN_v2.1.0.md +362 -0
  88. package/docs/implementation/RefactorStyles.md +588 -0
  89. package/docs/implementation/implement-plan.md +489 -0
  90. package/docs/implementation/missing-helpers-implementation.md +391 -0
  91. package/docs/implementation/refactor-plan.md +520 -0
  92. package/docs/implementation/session-implementation-complete.md +233 -0
  93. package/docs/implementation/session-management-plan.md +250 -0
  94. package/docs/setup-checklist.md +77 -0
  95. package/docs/versions/changelog.md +345 -0
  96. package/electron/customUpdater.ts +656 -0
  97. package/electron/main.ts +2441 -0
  98. package/electron/memoryConfig.ts +187 -0
  99. package/electron/preload.ts +394 -0
  100. package/electron/proxyConfig.ts +340 -0
  101. package/electron/services/BackupService.ts +452 -0
  102. package/electron/services/DictionaryService.ts +402 -0
  103. package/electron/services/LocalDictionaryLookupService.ts +147 -0
  104. package/electron/services/PowerAutomateApiService.ts +231 -0
  105. package/electron/services/SharePointSyncService.ts +474 -0
  106. package/electron/windowsCertStore.ts +427 -0
  107. package/electron/zscalerConfig.ts +381 -0
  108. package/eslint.config.js +92 -0
  109. package/jest.config.js +52 -0
  110. package/package.json +214 -0
  111. package/postcss.config.mjs +6 -0
  112. package/public/icon.png +0 -0
  113. package/publish-release.ps1 +5 -0
  114. package/renovate.json +30 -0
  115. package/src/App.tsx +216 -0
  116. package/src/__mocks__/p-limit.js +12 -0
  117. package/src/__mocks__/styleMock.js +1 -0
  118. package/src/components/common/BugReportButton.tsx +44 -0
  119. package/src/components/common/BugReportDialog.tsx +193 -0
  120. package/src/components/common/Button.tsx +153 -0
  121. package/src/components/common/Card.tsx +86 -0
  122. package/src/components/common/ColorPickerDialog.tsx +177 -0
  123. package/src/components/common/ConfirmDialog.tsx +96 -0
  124. package/src/components/common/DebugConsole.tsx +275 -0
  125. package/src/components/common/EmptyState.tsx +183 -0
  126. package/src/components/common/ErrorBoundary.tsx +98 -0
  127. package/src/components/common/ErrorDetailsDialog.tsx +153 -0
  128. package/src/components/common/ErrorFallback.tsx +218 -0
  129. package/src/components/common/Input.tsx +109 -0
  130. package/src/components/common/Skeleton.tsx +184 -0
  131. package/src/components/common/SplashScreen.tsx +81 -0
  132. package/src/components/common/Toast.tsx +155 -0
  133. package/src/components/common/Tooltip.tsx +79 -0
  134. package/src/components/common/UpdateNotification.tsx +320 -0
  135. package/src/components/comparison/ComparisonWindow.tsx +374 -0
  136. package/src/components/comparison/SideBySideDiff.tsx +486 -0
  137. package/src/components/comparison/index.ts +8 -0
  138. package/src/components/document/DocumentUploader.tsx +288 -0
  139. package/src/components/document/HyperlinkPreview.tsx +430 -0
  140. package/src/components/document/HyperlinkService.md +1484 -0
  141. package/src/components/document/Hyperlink_Technical_Documentation.md +496 -0
  142. package/src/components/document/InlineChangesView.tsx +707 -0
  143. package/src/components/document/ProcessingProgress.tsx +303 -0
  144. package/src/components/document/ProcessingResults.tsx +256 -0
  145. package/src/components/document/TrackedChangesDetail.tsx +530 -0
  146. package/src/components/document/TrackedChangesPanel.tsx +546 -0
  147. package/src/components/document/VirtualDocumentList.tsx +240 -0
  148. package/src/components/editor/DocumentEditor.tsx +723 -0
  149. package/src/components/editor/DocumentEditorModal.tsx +640 -0
  150. package/src/components/editor/EditorQuickActions.tsx +502 -0
  151. package/src/components/editor/EditorToolbar.tsx +312 -0
  152. package/src/components/editor/TableEditor.tsx +926 -0
  153. package/src/components/editor/index.ts +18 -0
  154. package/src/components/layout/Header.tsx +190 -0
  155. package/src/components/layout/Sidebar.tsx +313 -0
  156. package/src/components/layout/TitleBar.tsx +190 -0
  157. package/src/components/navigation/CommandPalette.tsx +233 -0
  158. package/src/components/navigation/KeyboardShortcutsModal.tsx +173 -0
  159. package/src/components/sessions/ChangeItem.tsx +408 -0
  160. package/src/components/sessions/ChangeViewer.tsx +1155 -0
  161. package/src/components/sessions/DocumentComparisonModal.tsx +314 -0
  162. package/src/components/sessions/ProcessingOptions.tsx +297 -0
  163. package/src/components/sessions/ReplacementsTab.tsx +438 -0
  164. package/src/components/sessions/RevisionHandlingOptions.tsx +87 -0
  165. package/src/components/sessions/SessionManager.tsx +188 -0
  166. package/src/components/sessions/StylesEditor.tsx +1335 -0
  167. package/src/components/sessions/TabContainer.tsx +151 -0
  168. package/src/components/sessions/VirtualSessionList.tsx +157 -0
  169. package/src/components/sessions/sessionToProcessorManager.tsx +420 -0
  170. package/src/components/settings/CertificateManager.tsx +410 -0
  171. package/src/components/settings/SegmentedControl.tsx +88 -0
  172. package/src/components/settings/SettingRow.tsx +52 -0
  173. package/src/contexts/GlobalStatsContext.tsx +396 -0
  174. package/src/contexts/SessionContext.tsx +2129 -0
  175. package/src/contexts/ThemeContext.tsx +428 -0
  176. package/src/contexts/UserSettingsContext.tsx +290 -0
  177. package/src/contexts/__tests__/GlobalStatsContext.test.tsx +390 -0
  178. package/src/global.d.ts +273 -0
  179. package/src/hooks/useDocumentQueue.tsx +210 -0
  180. package/src/hooks/useToast.tsx +55 -0
  181. package/src/main.tsx +10 -0
  182. package/src/pages/Analytics.tsx +386 -0
  183. package/src/pages/CurrentSession.tsx +1174 -0
  184. package/src/pages/Dashboard.tsx +319 -0
  185. package/src/pages/Documents.tsx +317 -0
  186. package/src/pages/Projects.tsx +250 -0
  187. package/src/pages/Reporting.tsx +386 -0
  188. package/src/pages/Search.tsx +349 -0
  189. package/src/pages/Sessions.tsx +285 -0
  190. package/src/pages/Settings.tsx +2662 -0
  191. package/src/services/HyperlinkService.ts +1085 -0
  192. package/src/services/document/DocXMLaterProcessor.ts +617 -0
  193. package/src/services/document/DocumentProcessingComparison.ts +856 -0
  194. package/src/services/document/DocumentSnapshotService.ts +575 -0
  195. package/src/services/document/WordDocumentProcessor.ts +10509 -0
  196. package/src/services/document/__tests__/DocXMLaterProcessor.hyperlinks.test.md +311 -0
  197. package/src/services/document/__tests__/WordDocumentProcessor.integration.test.ts +515 -0
  198. package/src/services/document/__tests__/WordDocumentProcessor.test.ts +812 -0
  199. package/src/services/document/blanklines/BlankLineManager.ts +658 -0
  200. package/src/services/document/blanklines/__tests__/paragraphChecks.test.ts +281 -0
  201. package/src/services/document/blanklines/helpers/blankLineInsertion.ts +87 -0
  202. package/src/services/document/blanklines/helpers/blankLineSnapshot.ts +251 -0
  203. package/src/services/document/blanklines/helpers/clearCustom.ts +121 -0
  204. package/src/services/document/blanklines/helpers/contextChecks.ts +117 -0
  205. package/src/services/document/blanklines/helpers/imageChecks.ts +51 -0
  206. package/src/services/document/blanklines/helpers/paragraphChecks.ts +236 -0
  207. package/src/services/document/blanklines/helpers/removeBlanksBetweenListItems.ts +91 -0
  208. package/src/services/document/blanklines/helpers/removeTrailingBlanks.ts +35 -0
  209. package/src/services/document/blanklines/helpers/tableGuards.ts +21 -0
  210. package/src/services/document/blanklines/index.ts +67 -0
  211. package/src/services/document/blanklines/rules/additionRules.ts +337 -0
  212. package/src/services/document/blanklines/rules/indentationRules.ts +317 -0
  213. package/src/services/document/blanklines/rules/removalRules.ts +362 -0
  214. package/src/services/document/blanklines/rules/ruleTypes.ts +92 -0
  215. package/src/services/document/blanklines/types.ts +29 -0
  216. package/src/services/document/helpers/ImageBorderCropper.ts +377 -0
  217. package/src/services/document/helpers/__tests__/whitespace.test.ts +272 -0
  218. package/src/services/document/helpers/whitespace.ts +117 -0
  219. package/src/services/document/list/ListNormalizer.ts +947 -0
  220. package/src/services/document/list/index.ts +45 -0
  221. package/src/services/document/list/list-detection.ts +275 -0
  222. package/src/services/document/list/list-types.ts +162 -0
  223. package/src/services/document/processors/HyperlinkProcessor.ts +370 -0
  224. package/src/services/document/processors/ListProcessor.ts +257 -0
  225. package/src/services/document/processors/StructureProcessor.ts +176 -0
  226. package/src/services/document/processors/StyleProcessor.ts +389 -0
  227. package/src/services/document/processors/TableProcessor.ts +2238 -0
  228. package/src/services/document/processors/__tests__/HyperlinkProcessor.test.ts +314 -0
  229. package/src/services/document/processors/__tests__/ListProcessor.test.ts +291 -0
  230. package/src/services/document/processors/__tests__/StructureProcessor.test.ts +257 -0
  231. package/src/services/document/processors/__tests__/TableProcessor.hlp-tips-bullets.test.ts +459 -0
  232. package/src/services/document/processors/__tests__/TableProcessor.test.ts +1604 -0
  233. package/src/services/document/processors/index.ts +28 -0
  234. package/src/services/document/types/docx-processing.ts +310 -0
  235. package/src/services/editor/EditorActionHandlers.ts +901 -0
  236. package/src/services/editor/index.ts +13 -0
  237. package/src/setupTests.ts +47 -0
  238. package/src/styles/global.css +782 -0
  239. package/src/types/backup.ts +132 -0
  240. package/src/types/dictionary.ts +125 -0
  241. package/src/types/document-processing.ts +331 -0
  242. package/src/types/docxmlater-augments.d.ts +142 -0
  243. package/src/types/editor.ts +280 -0
  244. package/src/types/electron.ts +340 -0
  245. package/src/types/globalStats.ts +155 -0
  246. package/src/types/hyperlink.ts +471 -0
  247. package/src/types/operations.ts +354 -0
  248. package/src/types/session.ts +427 -0
  249. package/src/types/settings.ts +112 -0
  250. package/src/utils/MemoryMonitor.ts +248 -0
  251. package/src/utils/cn.ts +6 -0
  252. package/src/utils/colorConvert.ts +306 -0
  253. package/src/utils/diffUtils.ts +347 -0
  254. package/src/utils/documentUtils.ts +202 -0
  255. package/src/utils/electronGuard.ts +62 -0
  256. package/src/utils/indexedDB.ts +915 -0
  257. package/src/utils/logger.ts +717 -0
  258. package/src/utils/pathSecurity.ts +232 -0
  259. package/src/utils/pathValidator.ts +236 -0
  260. package/src/utils/processingTimeEstimator.ts +153 -0
  261. package/src/utils/safeJsonParse.ts +62 -0
  262. package/src/utils/textSanitizer.ts +162 -0
  263. package/src/utils/urlHelpers.ts +304 -0
  264. package/src/utils/urlPatterns.ts +198 -0
  265. package/src/utils/urlSanitizer.ts +152 -0
  266. package/src/vite-env.d.ts +11 -0
  267. package/tsconfig.electron.json +19 -0
  268. package/tsconfig.json +36 -0
  269. package/tsconfig.node.json +12 -0
  270. package/typedoc.json +45 -0
  271. package/vite.config.ts +152 -0
@@ -0,0 +1,162 @@
1
+ /**
2
+ * Text Sanitization Utilities
3
+ *
4
+ * Handles defensive cleanup of corrupted text from docxmlater framework.
5
+ *
6
+ * PROBLEM CONTEXT:
7
+ * ─────────────────
8
+ * The docxmlater Hyperlink.getText() method may return text containing XML markup
9
+ * when the underlying Run object contains corrupted data. This happens when:
10
+ * - Hyperlink runs have malformed XML structures
11
+ * - Text nodes contain embedded XML tags like <w:t xml:space="preserve">
12
+ * - Document was previously corrupted or modified externally
13
+ *
14
+ * EXPECTED BEHAVIOR:
15
+ * The Run() constructor auto-cleans by default (cleanXmlFromText: true)
16
+ * But Hyperlink.getText() doesn't apply the same cleanup.
17
+ *
18
+ * SOLUTION:
19
+ * Apply defensive XML tag removal to all hyperlink text extraction.
20
+ * This prevents XML corruption from propagating through the system.
21
+ *
22
+ * EXAMPLE:
23
+ * Input: "Important Information<w:t xml:space=\"preserve\">1"
24
+ * Output: "Important Information1"
25
+ */
26
+
27
+ /**
28
+ * Remove XML markup from text
29
+ *
30
+ * Removes any XML-like tags: <w:t>, <w:t xml:space="preserve">, etc.
31
+ * Safe to call on any text - if no tags present, returns unchanged.
32
+ *
33
+ * @param text - The text that may contain XML markup
34
+ * @returns The text with XML tags removed
35
+ *
36
+ * @example
37
+ * ```typescript
38
+ * sanitizeHyperlinkText("Hello<w:t>World</w:t>")
39
+ * // Returns: "HelloWorld"
40
+ *
41
+ * sanitizeHyperlinkText("Clean text")
42
+ * // Returns: "Clean text"
43
+ *
44
+ * sanitizeHyperlinkText("Text with<w:t xml:space=\"preserve\">space")
45
+ * // Returns: "Text withspace"
46
+ * ```
47
+ */
48
+ export function sanitizeHyperlinkText(text: string): string {
49
+ if (!text) return '';
50
+
51
+ // Remove all XML tags: <...> patterns
52
+ // This matches:
53
+ // - Simple tags: <w:t>
54
+ // - Tags with attributes: <w:t xml:space="preserve">
55
+ // - Self-closing tags: <br/>
56
+ // - Any other XML markup
57
+ const cleaned = text.replace(/<[^>]+>/g, '');
58
+
59
+ return cleaned;
60
+ }
61
+
62
+ /**
63
+ * Sanitize display text with optional fallback
64
+ *
65
+ * If the text is empty after sanitization, optionally falls back to a default.
66
+ * Useful for hyperlink display text that might be corrupted to empty strings.
67
+ *
68
+ * @param text - The text to sanitize
69
+ * @param fallback - Optional fallback if result is empty
70
+ * @returns The sanitized text, or fallback if empty
71
+ *
72
+ * @example
73
+ * ```typescript
74
+ * // With fallback
75
+ * sanitizeHyperlinkTextWithFallback("<w:t>", "Click here")
76
+ * // Returns: "Click here"
77
+ *
78
+ * // Without fallback
79
+ * sanitizeHyperlinkTextWithFallback("Normal Text")
80
+ * // Returns: "Normal Text"
81
+ * ```
82
+ */
83
+ export function sanitizeHyperlinkTextWithFallback(text: string, fallback?: string): string {
84
+ const sanitized = sanitizeHyperlinkText(text);
85
+
86
+ if (!sanitized && fallback) {
87
+ return fallback;
88
+ }
89
+
90
+ return sanitized;
91
+ }
92
+
93
+ /**
94
+ * Check if text appears to contain XML corruption
95
+ *
96
+ * Useful for diagnostic logging and determining if corruption occurred.
97
+ *
98
+ * @param text - The text to check
99
+ * @returns true if the text contains XML-like tags
100
+ *
101
+ * @example
102
+ * ```typescript
103
+ * isTextCorrupted("Normal text")
104
+ * // Returns: false
105
+ *
106
+ * isTextCorrupted("Text<w:t>with tags</w:t>")
107
+ * // Returns: true
108
+ * ```
109
+ */
110
+ export function isTextCorrupted(text: string): boolean {
111
+ if (!text) return false;
112
+ return /<[^>]+>/.test(text);
113
+ }
114
+
115
+ /**
116
+ * Sanitize array of hyperlink texts
117
+ *
118
+ * Applies sanitization to multiple texts efficiently.
119
+ *
120
+ * @param texts - Array of texts to sanitize
121
+ * @returns Array of sanitized texts
122
+ *
123
+ * @example
124
+ * ```typescript
125
+ * sanitizeHyperlinkTexts([
126
+ * "Text<w:t>1</w:t>",
127
+ * "Normal text",
128
+ * "Another<tag>corrupted</tag>"
129
+ * ])
130
+ * // Returns: ["Text1", "Normal text", "Anothercorrupted"]
131
+ * ```
132
+ */
133
+ export function sanitizeHyperlinkTexts(texts: string[]): string[] {
134
+ return texts.map(sanitizeHyperlinkText);
135
+ }
136
+
137
+ /**
138
+ * Replace en-dashes and em-dashes with regular hyphens
139
+ *
140
+ * Normalizes typographic dashes to standard ASCII hyphens:
141
+ * - En-dash (U+2013, –) -> Hyphen (U+002D, -)
142
+ * - Em-dash (U+2014, —) -> Hyphen (U+002D, -)
143
+ *
144
+ * @param text - The text that may contain en-dashes or em-dashes
145
+ * @returns The text with dashes normalized to hyphens
146
+ *
147
+ * @example
148
+ * ```typescript
149
+ * normalizeEnDashesToHyphens("2020–2024")
150
+ * // Returns: "2020-2024"
151
+ *
152
+ * normalizeEnDashesToHyphens("Hello—World")
153
+ * // Returns: "Hello-World"
154
+ *
155
+ * normalizeEnDashesToHyphens("Normal text with - hyphens")
156
+ * // Returns: "Normal text with - hyphens" (unchanged)
157
+ * ```
158
+ */
159
+ export function normalizeEnDashesToHyphens(text: string): string {
160
+ if (!text) return '';
161
+ return text.replace(/[\u2013\u2014]/g, '-');
162
+ }
@@ -0,0 +1,304 @@
1
+ /**
2
+ * URL Helper Utilities
3
+ *
4
+ * Provides functions for sanitizing, validating, and fixing common URL encoding issues.
5
+ * This is critical for Azure Logic Apps URLs which often come with encoded query parameters.
6
+ */
7
+
8
+ /**
9
+ * Sanitize a URL by decoding common encoding issues
10
+ *
11
+ * Fixes three common URL encoding problems:
12
+ * 1. Unicode escapes: \u0026 → &
13
+ * 2. HTML entities: &amp; → &
14
+ * 3. URL encoding: %26 → &
15
+ *
16
+ * @param url - The URL to sanitize (may contain encoded characters)
17
+ * @returns Sanitized URL with properly decoded query parameters
18
+ *
19
+ * @example
20
+ * // Azure Logic App URL with Unicode escapes
21
+ * const encoded = 'https://api.com?v=1\u0026sp=/triggers';
22
+ * const clean = sanitizeUrl(encoded);
23
+ * // Result: 'https://api.com?v=1&sp=/triggers'
24
+ */
25
+ export function sanitizeUrl(url: string): string {
26
+ if (!url) return url;
27
+
28
+ let sanitized = url;
29
+
30
+ // Step 1: Decode Unicode escapes (\u0026 → &)
31
+ // Common when URLs are stored in JSON or JavaScript strings
32
+ sanitized = sanitized.replace(/\\u0026/g, '&');
33
+
34
+ // Step 2: Decode HTML entities (&amp; → &, &lt; → <, &gt; → >)
35
+ // Common when URLs are copied from HTML documents
36
+ sanitized = sanitized.replace(/&amp;/g, '&');
37
+ sanitized = sanitized.replace(/&lt;/g, '<');
38
+ sanitized = sanitized.replace(/&gt;/g, '>');
39
+ sanitized = sanitized.replace(/&quot;/g, '"');
40
+
41
+ // Step 3: Decode URL encoding (%26 → &, %3D → =, etc.)
42
+ // Common when URLs are copied from browsers
43
+ // Note: We only decode the query string part to preserve intentional encoding
44
+ try {
45
+ const urlObj = new URL(sanitized);
46
+ const searchParams = new URLSearchParams(urlObj.search);
47
+
48
+ // Rebuild query string from decoded parameters
49
+ const params: string[] = [];
50
+ searchParams.forEach((value, key) => {
51
+ params.push(`${decodeURIComponent(key)}=${decodeURIComponent(value)}`);
52
+ });
53
+
54
+ if (params.length > 0) {
55
+ sanitized = `${urlObj.origin}${urlObj.pathname}?${params.join('&')}${urlObj.hash}`;
56
+ }
57
+ } catch (e) {
58
+ // If URL parsing fails, return the partially sanitized version
59
+ // This handles cases where the URL is malformed
60
+ }
61
+
62
+ return sanitized;
63
+ }
64
+
65
+ /**
66
+ * Validate that a URL is properly formatted for Azure Logic Apps
67
+ *
68
+ * Checks for common issues:
69
+ * - Contains encoded characters that should be decoded
70
+ * - Has required query parameters (api-version, sp, sv, sig)
71
+ * - Is a valid HTTPS URL
72
+ *
73
+ * @param url - The URL to validate
74
+ * @returns Object with validation result and any issues found
75
+ */
76
+ export function validatePowerAutomateUrl(url: string): {
77
+ valid: boolean;
78
+ issues: string[];
79
+ warnings: string[];
80
+ } {
81
+ const issues: string[] = [];
82
+ const warnings: string[] = [];
83
+
84
+ if (!url) {
85
+ issues.push('URL is empty');
86
+ return { valid: false, issues, warnings };
87
+ }
88
+
89
+ // Check for encoded characters that should be decoded
90
+ if (url.includes('\\u0026')) {
91
+ warnings.push('URL contains Unicode escapes (\\u0026). These will be auto-decoded.');
92
+ }
93
+ if (url.includes('&amp;')) {
94
+ warnings.push('URL contains HTML entities (&amp;). These will be auto-decoded.');
95
+ }
96
+
97
+ // Try to parse the URL
98
+ try {
99
+ const urlObj = new URL(sanitizeUrl(url));
100
+
101
+ // Must be HTTPS for Azure Logic Apps
102
+ if (urlObj.protocol !== 'https:') {
103
+ issues.push('URL must use HTTPS protocol for Azure Logic Apps');
104
+ }
105
+
106
+ // Check for required Azure Logic Apps query parameters
107
+ const searchParams = new URLSearchParams(urlObj.search);
108
+
109
+ if (!searchParams.has('api-version')) {
110
+ issues.push('Missing required parameter: api-version');
111
+ }
112
+ if (!searchParams.has('sp')) {
113
+ warnings.push(
114
+ 'Missing "sp" parameter (shared access policy). May be required depending on your Logic App configuration.'
115
+ );
116
+ }
117
+ if (!searchParams.has('sv')) {
118
+ warnings.push(
119
+ 'Missing "sv" parameter (signature version). May be required depending on your Logic App configuration.'
120
+ );
121
+ }
122
+ if (!searchParams.has('sig')) {
123
+ warnings.push('Missing "sig" parameter (signature). May be required for authentication.');
124
+ }
125
+
126
+ } catch (e) {
127
+ issues.push(`Invalid URL format: ${e instanceof Error ? e.message : 'Unknown error'}`);
128
+ }
129
+
130
+ return {
131
+ valid: issues.length === 0,
132
+ issues,
133
+ warnings,
134
+ };
135
+ }
136
+
137
+ /**
138
+ * Test if a URL is reachable by making a HEAD request
139
+ *
140
+ * @param url - The URL to test
141
+ * @param timeoutMs - Timeout in milliseconds (default: 10000)
142
+ * @returns Object with reachability status and any error message
143
+ */
144
+ export async function testUrlReachability(
145
+ url: string,
146
+ timeoutMs: number = 10000
147
+ ): Promise<{
148
+ reachable: boolean;
149
+ statusCode?: number;
150
+ error?: string;
151
+ }> {
152
+ const controller = new AbortController();
153
+ const timeout = setTimeout(() => controller.abort(), timeoutMs);
154
+
155
+ try {
156
+ const sanitized = sanitizeUrl(url);
157
+
158
+ // Make a HEAD request to avoid downloading large payloads
159
+ const response = await fetch(sanitized, {
160
+ method: 'HEAD',
161
+ signal: controller.signal,
162
+ });
163
+
164
+ clearTimeout(timeout);
165
+
166
+ return {
167
+ reachable: response.ok,
168
+ statusCode: response.status,
169
+ };
170
+ } catch (error) {
171
+ clearTimeout(timeout);
172
+
173
+ if (error instanceof Error && error.name === 'AbortError') {
174
+ return {
175
+ reachable: false,
176
+ error: `Request timed out after ${timeoutMs}ms`,
177
+ };
178
+ }
179
+
180
+ return {
181
+ reachable: false,
182
+ error: error instanceof Error ? error.message : 'Unknown error',
183
+ };
184
+ }
185
+ }
186
+
187
+ /**
188
+ * Extract query parameters from a URL
189
+ *
190
+ * @param url - The URL to parse
191
+ * @returns Map of parameter names to values
192
+ */
193
+ export function extractQueryParams(url: string): Map<string, string> {
194
+ const params = new Map<string, string>();
195
+
196
+ try {
197
+ const sanitized = sanitizeUrl(url);
198
+ const urlObj = new URL(sanitized);
199
+ const searchParams = new URLSearchParams(urlObj.search);
200
+
201
+ searchParams.forEach((value, key) => {
202
+ params.set(key, value);
203
+ });
204
+ } catch (e) {
205
+ // Return empty map if URL is malformed
206
+ }
207
+
208
+ return params;
209
+ }
210
+
211
+ /**
212
+ * Check if a URL has any encoding issues that need fixing
213
+ *
214
+ * @param url - The URL to check
215
+ * @returns True if the URL has encoding issues, false otherwise
216
+ */
217
+ export function hasEncodingIssues(url: string): boolean {
218
+ if (!url) return false;
219
+
220
+ return (
221
+ url.includes('\\u0026') ||
222
+ url.includes('&amp;') ||
223
+ url.includes('&lt;') ||
224
+ url.includes('&gt;') ||
225
+ url.includes('&quot;')
226
+ );
227
+ }
228
+
229
+ /**
230
+ * SECURITY: Validate URL scheme for user-controlled hyperlink replacements
231
+ *
232
+ * Prevents XSS-like attacks by rejecting dangerous URL schemes that could:
233
+ * - Execute JavaScript (javascript:)
234
+ * - Embed data URIs (data:)
235
+ * - Access local files (file:///)
236
+ * - Use other non-HTTP protocols
237
+ *
238
+ * @param url - The URL to validate
239
+ * @returns Object with validation result and error message if invalid
240
+ *
241
+ * @example
242
+ * validateUrlScheme('https://example.com') // { valid: true, isHttp: true }
243
+ * validateUrlScheme('javascript:alert(1)') // { valid: false, error: '...', isHttp: false }
244
+ */
245
+ export function validateUrlScheme(url: string): {
246
+ valid: boolean;
247
+ isHttp: boolean;
248
+ error?: string;
249
+ } {
250
+ if (!url || url.trim() === '') {
251
+ return { valid: true, isHttp: false }; // Allow empty (will be handled elsewhere)
252
+ }
253
+
254
+ try {
255
+ // Attempt to parse as URL
256
+ const parsed = new URL(url);
257
+
258
+ // Whitelist only HTTP/HTTPS protocols
259
+ const allowedSchemes = ['http:', 'https:'];
260
+ const isAllowed = allowedSchemes.includes(parsed.protocol.toLowerCase());
261
+
262
+ if (!isAllowed) {
263
+ return {
264
+ valid: false,
265
+ isHttp: false,
266
+ error: `Dangerous URL scheme detected: "${parsed.protocol}". Only http:// and https:// are allowed for security.`,
267
+ };
268
+ }
269
+
270
+ return { valid: true, isHttp: true };
271
+ } catch (error) {
272
+ // If URL parsing fails, it might be a relative URL or malformed
273
+ // Check for obvious dangerous patterns even if URL parse fails
274
+ const lowerUrl = url.toLowerCase().trim();
275
+
276
+ if (lowerUrl.startsWith('javascript:')) {
277
+ return {
278
+ valid: false,
279
+ isHttp: false,
280
+ error: 'JavaScript URLs are not allowed for security reasons.',
281
+ };
282
+ }
283
+
284
+ if (lowerUrl.startsWith('data:')) {
285
+ return {
286
+ valid: false,
287
+ isHttp: false,
288
+ error: 'Data URLs are not allowed for security reasons.',
289
+ };
290
+ }
291
+
292
+ if (lowerUrl.startsWith('file:')) {
293
+ return {
294
+ valid: false,
295
+ isHttp: false,
296
+ error: 'File URLs are not allowed for security reasons.',
297
+ };
298
+ }
299
+
300
+ // If it's not a parseable URL and doesn't match dangerous patterns,
301
+ // it might be a content ID or relative path - allow it
302
+ return { valid: true, isHttp: false };
303
+ }
304
+ }
@@ -0,0 +1,198 @@
1
+ /**
2
+ * URL Pattern Utilities for theSource Hyperlink Processing
3
+ *
4
+ * SINGLE SOURCE OF TRUTH for Content ID and Document ID extraction
5
+ * Used across all hyperlink processing services
6
+ *
7
+ * This utility centralizes regex patterns that were previously duplicated across:
8
+ * - HyperlinkService.ts
9
+ * - WordDocumentProcessor.ts
10
+ * - HyperlinkManager.ts
11
+ * - DocXMLaterProcessor.ts
12
+ * - types/hyperlink.ts
13
+ */
14
+
15
+ /**
16
+ * Regex Patterns for theSource URLs
17
+ *
18
+ * Content ID Format: TSRC-ABC-123456 or CMS-XYZ-789012
19
+ * Document ID Format: docid=<uuid-or-alphanumeric>
20
+ */
21
+ export const URL_PATTERNS = {
22
+ /**
23
+ * Matches theSource Content IDs
24
+ * Examples: TSRC-ABC-123456, CMS-XYZ-789012
25
+ * Pattern: (TSRC|CMS)-(alphanumeric)-(6 digits)
26
+ *
27
+ * Format Specification:
28
+ * - Prefix: TSRC or CMS
29
+ * - Separator: hyphen (-)
30
+ * - Middle: alphanumeric characters (A-Z, a-z, 0-9)
31
+ * - Separator: hyphen (-)
32
+ * - Suffix: exactly 6 digits
33
+ */
34
+ CONTENT_ID: /(TSRC|CMS)-([a-zA-Z0-9]+)-(\d{6})/i,
35
+
36
+ /**
37
+ * Matches theSource Document IDs
38
+ * Examples: docid=abc-123-def, docid=abc123
39
+ * Pattern: docid=(alphanumeric with dashes)
40
+ *
41
+ * Format Specification:
42
+ * - Prefix: docid= (case-insensitive)
43
+ * - Value: alphanumeric characters with optional hyphens
44
+ * - Boundary: stops at non-alphanumeric/dash character or end of string
45
+ */
46
+ DOCUMENT_ID: /docid=([a-zA-Z0-9-]+)(?:[^a-zA-Z0-9-]|$)/i,
47
+
48
+ /**
49
+ * Matches theSource domain
50
+ * Example: thesource.cvshealth.com
51
+ */
52
+ THE_SOURCE_DOMAIN: /thesource\.cvshealth\.com/i,
53
+ } as const;
54
+
55
+ /**
56
+ * Extract Content ID from a URL
57
+ *
58
+ * @param url - URL to extract from
59
+ * @returns Content ID (e.g., "TSRC-ABC-123456") or null if not found
60
+ *
61
+ * @example
62
+ * extractContentId('https://thesource.com/doc?Content_ID=TSRC-ABC-123456')
63
+ * // Returns: "TSRC-ABC-123456"
64
+ *
65
+ * extractContentId('https://google.com')
66
+ * // Returns: null
67
+ */
68
+ export function extractContentId(url: string): string | null {
69
+ if (!url) return null;
70
+ const match = url.match(URL_PATTERNS.CONTENT_ID);
71
+ return match ? match[0] : null; // Return full match (TSRC-ABC-123456)
72
+ }
73
+
74
+ /**
75
+ * Extract Content ID from any text string (file path, display text, etc.)
76
+ *
77
+ * This is used as a fallback when the URL is not available from getUrl(),
78
+ * such as with file-type hyperlinks where the URL is stored in the
79
+ * relationship target but getUrl() returns undefined.
80
+ *
81
+ * @param text - Text to search for Content_ID pattern
82
+ * @returns Content ID (e.g., "TSRC-ABC-123456") or null if not found
83
+ *
84
+ * @example
85
+ * extractContentIdFromText('C:\\Users\\user\\Downloads\\TSRC-PROD-015483')
86
+ * // Returns: "TSRC-PROD-015483"
87
+ *
88
+ * extractContentIdFromText('Document: TSRC-ABC-123456 (Final)')
89
+ * // Returns: "TSRC-ABC-123456"
90
+ *
91
+ * extractContentIdFromText('Reviewing SharePoint Errors (Seniors Only)')
92
+ * // Returns: null
93
+ */
94
+ export function extractContentIdFromText(text: string): string | null {
95
+ if (!text) return null;
96
+ const match = text.match(URL_PATTERNS.CONTENT_ID);
97
+ return match ? match[0] : null;
98
+ }
99
+
100
+ /**
101
+ * Extract Document ID from a URL
102
+ *
103
+ * @param url - URL to extract from
104
+ * @returns Document ID (UUID/alphanumeric) or null if not found
105
+ *
106
+ * @example
107
+ * extractDocumentId('https://thesource.com/#!/view?docid=abc-123-def')
108
+ * // Returns: "abc-123-def"
109
+ *
110
+ * extractDocumentId('https://thesource.com/#!/view?docid=abc123#content')
111
+ * // Returns: "abc123"
112
+ *
113
+ * extractDocumentId('https://google.com')
114
+ * // Returns: null
115
+ */
116
+ export function extractDocumentId(url: string): string | null {
117
+ if (!url) return null;
118
+ const match = url.match(URL_PATTERNS.DOCUMENT_ID);
119
+ return match ? match[1] : null; // Return captured group (the ID itself)
120
+ }
121
+
122
+ /**
123
+ * Extract both Lookup IDs (Content ID and Document ID) from a URL
124
+ *
125
+ * This is the primary method used by WordDocumentProcessor for API lookups.
126
+ * It attempts to extract both types of IDs and returns whichever are found.
127
+ *
128
+ * @param url - URL to extract from
129
+ * @returns Object with contentId and/or documentId, or null if neither found
130
+ *
131
+ * @example
132
+ * extractLookupIds('https://thesource.com/doc?Content_ID=TSRC-ABC-123456&docid=abc123')
133
+ * // Returns: { contentId: "TSRC-ABC-123456", documentId: "abc123" }
134
+ *
135
+ * extractLookupIds('https://thesource.com/doc?Content_ID=TSRC-ABC-123456')
136
+ * // Returns: { contentId: "TSRC-ABC-123456" }
137
+ *
138
+ * extractLookupIds('https://google.com')
139
+ * // Returns: null
140
+ */
141
+ export function extractLookupIds(url: string): {
142
+ contentId?: string;
143
+ documentId?: string;
144
+ } | null {
145
+ if (!url) return null;
146
+
147
+ const lookupIds: { contentId?: string; documentId?: string } = {};
148
+
149
+ const contentId = extractContentId(url);
150
+ if (contentId) {
151
+ lookupIds.contentId = contentId;
152
+ }
153
+
154
+ const documentId = extractDocumentId(url);
155
+ if (documentId) {
156
+ lookupIds.documentId = documentId;
157
+ }
158
+
159
+ return Object.keys(lookupIds).length > 0 ? lookupIds : null;
160
+ }
161
+
162
+ /**
163
+ * Check if URL is a theSource URL
164
+ *
165
+ * @param url - URL to check
166
+ * @returns true if theSource URL, false otherwise
167
+ *
168
+ * @example
169
+ * isTheSourceUrl('https://thesource.cvshealth.com/nuxeo/...')
170
+ * // Returns: true
171
+ *
172
+ * isTheSourceUrl('https://google.com')
173
+ * // Returns: false
174
+ */
175
+ export function isTheSourceUrl(url: string): boolean {
176
+ if (!url) return false;
177
+ return URL_PATTERNS.THE_SOURCE_DOMAIN.test(url);
178
+ }
179
+
180
+ /**
181
+ * Check if URL has a Content ID
182
+ *
183
+ * @param url - URL to check
184
+ * @returns true if Content ID found
185
+ */
186
+ export function hasContentId(url: string): boolean {
187
+ return extractContentId(url) !== null;
188
+ }
189
+
190
+ /**
191
+ * Check if URL has a Document ID
192
+ *
193
+ * @param url - URL to check
194
+ * @returns true if Document ID found
195
+ */
196
+ export function hasDocumentId(url: string): boolean {
197
+ return extractDocumentId(url) !== null;
198
+ }