documentation-hub 5.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +43 -0
- package/.github/workflows/build.yml +64 -0
- package/.github/workflows/ci.yml +39 -0
- package/.vscode/extensions.json +3 -0
- package/Current.md +97 -0
- package/DocHub_Image.png +0 -0
- package/README.md +666 -0
- package/USER_GUIDE.md +1173 -0
- package/Updater.md +311 -0
- package/build/256x256.png +0 -0
- package/build/512x512.png +0 -0
- package/build/app-update.yml +4 -0
- package/build/create-icon.js +208 -0
- package/build/icon.ico +0 -0
- package/build/icon.png +0 -0
- package/build/icon_1024x1024.png +0 -0
- package/dist/assets/Analytics-BpsG9895.js +1 -0
- package/dist/assets/Card-IAZin8kp.js +1 -0
- package/dist/assets/CurrentSession-B-rFkHvf.js +12 -0
- package/dist/assets/Dashboard-C_5gMb0q.js +1 -0
- package/dist/assets/Documents-CqZ25axS.js +1 -0
- package/dist/assets/Input-l89xwXBi.js +1 -0
- package/dist/assets/Reporting-DqdHJY_a.js +1 -0
- package/dist/assets/Search-XNbu5z_3.js +1 -0
- package/dist/assets/SessionManager-lH9hZfzH.js +1 -0
- package/dist/assets/Sessions-ClZOPYNc.js +1 -0
- package/dist/assets/Settings-DUEHGURa.js +11 -0
- package/dist/assets/index-8xUe8ptc.js +24 -0
- package/dist/assets/index-RYyJqF7O.css +1 -0
- package/dist/assets/path-BkOl0AGO.js +1 -0
- package/dist/assets/promises-ID_B9S-h.js +1 -0
- package/dist/assets/urlHelpers-TvgahX0r.js +1 -0
- package/dist/assets/useToast-yRSO1dkm.js +1 -0
- package/dist/assets/vendor-charts-RkGK5ROP.js +36 -0
- package/dist/assets/vendor-db-l0sNRNKZ.js +1 -0
- package/dist/assets/vendor-react-BVZ_anCF.js +4 -0
- package/dist/assets/vendor-search-Dw8P0qyA.js +1 -0
- package/dist/assets/vendor-ui-BU7NfluV.js +53 -0
- package/dist/electron/PowerAutomateApiService-LfW09ZGr.js +147 -0
- package/dist/electron/main-CXkNtyv-.js +19789 -0
- package/dist/electron/main.js +5 -0
- package/dist/electron/preload.js +1 -0
- package/dist/icon.png +0 -0
- package/dist/index.html +27 -0
- package/docs/CODEBASE_ANALYSIS_REPORT.md +309 -0
- package/docs/DEBUG_LOGGING_GUIDE.md +244 -0
- package/docs/README.md +115 -0
- package/docs/TOC_WIRING_GUIDE.md +344 -0
- package/docs/analysis/Bullet_Symbol_Bug_Analysis.md +136 -0
- package/docs/analysis/DOCXMLATER_ANALYSIS_SUMMARY.txt +169 -0
- package/docs/analysis/Document_Processing_Issues_Analysis.md +704 -0
- package/docs/analysis/FIELD_PRESERVATION_ANALYSIS.md +1200 -0
- package/docs/analysis/INDENTATION_PRESERVE_ANALYSIS.md +181 -0
- package/docs/analysis/INDENTATION_PRESERVE_IMPLEMENTATION.md +207 -0
- package/docs/analysis/List_Implementation.md +206 -0
- package/docs/analysis/List_Implementation_Accuracy_Report.md +366 -0
- package/docs/analysis/PROCESSING_OPTIONS_UI_UPDATES.md +220 -0
- package/docs/analysis/RefactorStyles.md +852 -0
- package/docs/analysis/STYLE_PARAMETER_ENHANCEMENT.md +143 -0
- package/docs/analysis/docxmlater-comparison-todo-2025-11-13.md +636 -0
- package/docs/analysis/docxmlater-implementation-analysis-2025-11-13.md +340 -0
- package/docs/analysis/docxmlater-template_ui-integration-analysis.md +263 -0
- package/docs/analysis/github-issues-to-create.md +237 -0
- package/docs/api/API_README.md +538 -0
- package/docs/api/API_REFERENCE.md +751 -0
- package/docs/api/TYPE_DEFINITIONS.md +869 -0
- package/docs/architecture/FONT_EMBEDDING_GUIDE.md +318 -0
- package/docs/architecture/docxmlater-functions-and-structure.md +726 -0
- package/docs/docxmlater-readme.md +1341 -0
- package/docs/fixes/EXECUTION_LOG_TEST_BASE.md +573 -0
- package/docs/fixes/HYPERLINK_TEXT_SANITIZATION.md +253 -0
- package/docs/fixes/README.md +37 -0
- package/docs/github-issues/issue-1-body.md +125 -0
- package/docs/github-issues/issue-10-body.md +850 -0
- package/docs/github-issues/issue-2-body.md +200 -0
- package/docs/github-issues/issue-3-body.md +270 -0
- package/docs/github-issues/issue-4-body.md +169 -0
- package/docs/github-issues/issue-5-body.md +173 -0
- package/docs/github-issues/issue-6-body.md +158 -0
- package/docs/github-issues/issue-7-body.md +171 -0
- package/docs/github-issues/issue-8-body.md +407 -0
- package/docs/github-issues/issue-9-body.md +515 -0
- package/docs/github-issues/issue-tracker.md +274 -0
- package/docs/github-issues/predictive-analysis-2025-10-18.md +2131 -0
- package/docs/implementation/List_Framework_Refactor_Plan.md +336 -0
- package/docs/implementation/PRIMARY_TEXT_COLOR_FEATURE.md +217 -0
- package/docs/implementation/RELEASE_PLAN_v2.1.0.md +362 -0
- package/docs/implementation/RefactorStyles.md +588 -0
- package/docs/implementation/implement-plan.md +489 -0
- package/docs/implementation/missing-helpers-implementation.md +391 -0
- package/docs/implementation/refactor-plan.md +520 -0
- package/docs/implementation/session-implementation-complete.md +233 -0
- package/docs/implementation/session-management-plan.md +250 -0
- package/docs/setup-checklist.md +77 -0
- package/docs/versions/changelog.md +345 -0
- package/electron/customUpdater.ts +656 -0
- package/electron/main.ts +2441 -0
- package/electron/memoryConfig.ts +187 -0
- package/electron/preload.ts +394 -0
- package/electron/proxyConfig.ts +340 -0
- package/electron/services/BackupService.ts +452 -0
- package/electron/services/DictionaryService.ts +402 -0
- package/electron/services/LocalDictionaryLookupService.ts +147 -0
- package/electron/services/PowerAutomateApiService.ts +231 -0
- package/electron/services/SharePointSyncService.ts +474 -0
- package/electron/windowsCertStore.ts +427 -0
- package/electron/zscalerConfig.ts +381 -0
- package/eslint.config.js +92 -0
- package/jest.config.js +52 -0
- package/package.json +214 -0
- package/postcss.config.mjs +6 -0
- package/public/icon.png +0 -0
- package/publish-release.ps1 +5 -0
- package/renovate.json +30 -0
- package/src/App.tsx +216 -0
- package/src/__mocks__/p-limit.js +12 -0
- package/src/__mocks__/styleMock.js +1 -0
- package/src/components/common/BugReportButton.tsx +44 -0
- package/src/components/common/BugReportDialog.tsx +193 -0
- package/src/components/common/Button.tsx +153 -0
- package/src/components/common/Card.tsx +86 -0
- package/src/components/common/ColorPickerDialog.tsx +177 -0
- package/src/components/common/ConfirmDialog.tsx +96 -0
- package/src/components/common/DebugConsole.tsx +275 -0
- package/src/components/common/EmptyState.tsx +183 -0
- package/src/components/common/ErrorBoundary.tsx +98 -0
- package/src/components/common/ErrorDetailsDialog.tsx +153 -0
- package/src/components/common/ErrorFallback.tsx +218 -0
- package/src/components/common/Input.tsx +109 -0
- package/src/components/common/Skeleton.tsx +184 -0
- package/src/components/common/SplashScreen.tsx +81 -0
- package/src/components/common/Toast.tsx +155 -0
- package/src/components/common/Tooltip.tsx +79 -0
- package/src/components/common/UpdateNotification.tsx +320 -0
- package/src/components/comparison/ComparisonWindow.tsx +374 -0
- package/src/components/comparison/SideBySideDiff.tsx +486 -0
- package/src/components/comparison/index.ts +8 -0
- package/src/components/document/DocumentUploader.tsx +288 -0
- package/src/components/document/HyperlinkPreview.tsx +430 -0
- package/src/components/document/HyperlinkService.md +1484 -0
- package/src/components/document/Hyperlink_Technical_Documentation.md +496 -0
- package/src/components/document/InlineChangesView.tsx +707 -0
- package/src/components/document/ProcessingProgress.tsx +303 -0
- package/src/components/document/ProcessingResults.tsx +256 -0
- package/src/components/document/TrackedChangesDetail.tsx +530 -0
- package/src/components/document/TrackedChangesPanel.tsx +546 -0
- package/src/components/document/VirtualDocumentList.tsx +240 -0
- package/src/components/editor/DocumentEditor.tsx +723 -0
- package/src/components/editor/DocumentEditorModal.tsx +640 -0
- package/src/components/editor/EditorQuickActions.tsx +502 -0
- package/src/components/editor/EditorToolbar.tsx +312 -0
- package/src/components/editor/TableEditor.tsx +926 -0
- package/src/components/editor/index.ts +18 -0
- package/src/components/layout/Header.tsx +190 -0
- package/src/components/layout/Sidebar.tsx +313 -0
- package/src/components/layout/TitleBar.tsx +190 -0
- package/src/components/navigation/CommandPalette.tsx +233 -0
- package/src/components/navigation/KeyboardShortcutsModal.tsx +173 -0
- package/src/components/sessions/ChangeItem.tsx +408 -0
- package/src/components/sessions/ChangeViewer.tsx +1155 -0
- package/src/components/sessions/DocumentComparisonModal.tsx +314 -0
- package/src/components/sessions/ProcessingOptions.tsx +297 -0
- package/src/components/sessions/ReplacementsTab.tsx +438 -0
- package/src/components/sessions/RevisionHandlingOptions.tsx +87 -0
- package/src/components/sessions/SessionManager.tsx +188 -0
- package/src/components/sessions/StylesEditor.tsx +1335 -0
- package/src/components/sessions/TabContainer.tsx +151 -0
- package/src/components/sessions/VirtualSessionList.tsx +157 -0
- package/src/components/sessions/sessionToProcessorManager.tsx +420 -0
- package/src/components/settings/CertificateManager.tsx +410 -0
- package/src/components/settings/SegmentedControl.tsx +88 -0
- package/src/components/settings/SettingRow.tsx +52 -0
- package/src/contexts/GlobalStatsContext.tsx +396 -0
- package/src/contexts/SessionContext.tsx +2129 -0
- package/src/contexts/ThemeContext.tsx +428 -0
- package/src/contexts/UserSettingsContext.tsx +290 -0
- package/src/contexts/__tests__/GlobalStatsContext.test.tsx +390 -0
- package/src/global.d.ts +273 -0
- package/src/hooks/useDocumentQueue.tsx +210 -0
- package/src/hooks/useToast.tsx +55 -0
- package/src/main.tsx +10 -0
- package/src/pages/Analytics.tsx +386 -0
- package/src/pages/CurrentSession.tsx +1174 -0
- package/src/pages/Dashboard.tsx +319 -0
- package/src/pages/Documents.tsx +317 -0
- package/src/pages/Projects.tsx +250 -0
- package/src/pages/Reporting.tsx +386 -0
- package/src/pages/Search.tsx +349 -0
- package/src/pages/Sessions.tsx +285 -0
- package/src/pages/Settings.tsx +2662 -0
- package/src/services/HyperlinkService.ts +1085 -0
- package/src/services/document/DocXMLaterProcessor.ts +617 -0
- package/src/services/document/DocumentProcessingComparison.ts +856 -0
- package/src/services/document/DocumentSnapshotService.ts +575 -0
- package/src/services/document/WordDocumentProcessor.ts +10509 -0
- package/src/services/document/__tests__/DocXMLaterProcessor.hyperlinks.test.md +311 -0
- package/src/services/document/__tests__/WordDocumentProcessor.integration.test.ts +515 -0
- package/src/services/document/__tests__/WordDocumentProcessor.test.ts +812 -0
- package/src/services/document/blanklines/BlankLineManager.ts +658 -0
- package/src/services/document/blanklines/__tests__/paragraphChecks.test.ts +281 -0
- package/src/services/document/blanklines/helpers/blankLineInsertion.ts +87 -0
- package/src/services/document/blanklines/helpers/blankLineSnapshot.ts +251 -0
- package/src/services/document/blanklines/helpers/clearCustom.ts +121 -0
- package/src/services/document/blanklines/helpers/contextChecks.ts +117 -0
- package/src/services/document/blanklines/helpers/imageChecks.ts +51 -0
- package/src/services/document/blanklines/helpers/paragraphChecks.ts +236 -0
- package/src/services/document/blanklines/helpers/removeBlanksBetweenListItems.ts +91 -0
- package/src/services/document/blanklines/helpers/removeTrailingBlanks.ts +35 -0
- package/src/services/document/blanklines/helpers/tableGuards.ts +21 -0
- package/src/services/document/blanklines/index.ts +67 -0
- package/src/services/document/blanklines/rules/additionRules.ts +337 -0
- package/src/services/document/blanklines/rules/indentationRules.ts +317 -0
- package/src/services/document/blanklines/rules/removalRules.ts +362 -0
- package/src/services/document/blanklines/rules/ruleTypes.ts +92 -0
- package/src/services/document/blanklines/types.ts +29 -0
- package/src/services/document/helpers/ImageBorderCropper.ts +377 -0
- package/src/services/document/helpers/__tests__/whitespace.test.ts +272 -0
- package/src/services/document/helpers/whitespace.ts +117 -0
- package/src/services/document/list/ListNormalizer.ts +947 -0
- package/src/services/document/list/index.ts +45 -0
- package/src/services/document/list/list-detection.ts +275 -0
- package/src/services/document/list/list-types.ts +162 -0
- package/src/services/document/processors/HyperlinkProcessor.ts +370 -0
- package/src/services/document/processors/ListProcessor.ts +257 -0
- package/src/services/document/processors/StructureProcessor.ts +176 -0
- package/src/services/document/processors/StyleProcessor.ts +389 -0
- package/src/services/document/processors/TableProcessor.ts +2238 -0
- package/src/services/document/processors/__tests__/HyperlinkProcessor.test.ts +314 -0
- package/src/services/document/processors/__tests__/ListProcessor.test.ts +291 -0
- package/src/services/document/processors/__tests__/StructureProcessor.test.ts +257 -0
- package/src/services/document/processors/__tests__/TableProcessor.hlp-tips-bullets.test.ts +459 -0
- package/src/services/document/processors/__tests__/TableProcessor.test.ts +1604 -0
- package/src/services/document/processors/index.ts +28 -0
- package/src/services/document/types/docx-processing.ts +310 -0
- package/src/services/editor/EditorActionHandlers.ts +901 -0
- package/src/services/editor/index.ts +13 -0
- package/src/setupTests.ts +47 -0
- package/src/styles/global.css +782 -0
- package/src/types/backup.ts +132 -0
- package/src/types/dictionary.ts +125 -0
- package/src/types/document-processing.ts +331 -0
- package/src/types/docxmlater-augments.d.ts +142 -0
- package/src/types/editor.ts +280 -0
- package/src/types/electron.ts +340 -0
- package/src/types/globalStats.ts +155 -0
- package/src/types/hyperlink.ts +471 -0
- package/src/types/operations.ts +354 -0
- package/src/types/session.ts +427 -0
- package/src/types/settings.ts +112 -0
- package/src/utils/MemoryMonitor.ts +248 -0
- package/src/utils/cn.ts +6 -0
- package/src/utils/colorConvert.ts +306 -0
- package/src/utils/diffUtils.ts +347 -0
- package/src/utils/documentUtils.ts +202 -0
- package/src/utils/electronGuard.ts +62 -0
- package/src/utils/indexedDB.ts +915 -0
- package/src/utils/logger.ts +717 -0
- package/src/utils/pathSecurity.ts +232 -0
- package/src/utils/pathValidator.ts +236 -0
- package/src/utils/processingTimeEstimator.ts +153 -0
- package/src/utils/safeJsonParse.ts +62 -0
- package/src/utils/textSanitizer.ts +162 -0
- package/src/utils/urlHelpers.ts +304 -0
- package/src/utils/urlPatterns.ts +198 -0
- package/src/utils/urlSanitizer.ts +152 -0
- package/src/vite-env.d.ts +11 -0
- package/tsconfig.electron.json +19 -0
- package/tsconfig.json +36 -0
- package/tsconfig.node.json +12 -0
- package/typedoc.json +45 -0
- package/vite.config.ts +152 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text Sanitization Utilities
|
|
3
|
+
*
|
|
4
|
+
* Handles defensive cleanup of corrupted text from docxmlater framework.
|
|
5
|
+
*
|
|
6
|
+
* PROBLEM CONTEXT:
|
|
7
|
+
* ─────────────────
|
|
8
|
+
* The docxmlater Hyperlink.getText() method may return text containing XML markup
|
|
9
|
+
* when the underlying Run object contains corrupted data. This happens when:
|
|
10
|
+
* - Hyperlink runs have malformed XML structures
|
|
11
|
+
* - Text nodes contain embedded XML tags like <w:t xml:space="preserve">
|
|
12
|
+
* - Document was previously corrupted or modified externally
|
|
13
|
+
*
|
|
14
|
+
* EXPECTED BEHAVIOR:
|
|
15
|
+
* The Run() constructor auto-cleans by default (cleanXmlFromText: true)
|
|
16
|
+
* But Hyperlink.getText() doesn't apply the same cleanup.
|
|
17
|
+
*
|
|
18
|
+
* SOLUTION:
|
|
19
|
+
* Apply defensive XML tag removal to all hyperlink text extraction.
|
|
20
|
+
* This prevents XML corruption from propagating through the system.
|
|
21
|
+
*
|
|
22
|
+
* EXAMPLE:
|
|
23
|
+
* Input: "Important Information<w:t xml:space=\"preserve\">1"
|
|
24
|
+
* Output: "Important Information1"
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Remove XML markup from text
|
|
29
|
+
*
|
|
30
|
+
* Removes any XML-like tags: <w:t>, <w:t xml:space="preserve">, etc.
|
|
31
|
+
* Safe to call on any text - if no tags present, returns unchanged.
|
|
32
|
+
*
|
|
33
|
+
* @param text - The text that may contain XML markup
|
|
34
|
+
* @returns The text with XML tags removed
|
|
35
|
+
*
|
|
36
|
+
* @example
|
|
37
|
+
* ```typescript
|
|
38
|
+
* sanitizeHyperlinkText("Hello<w:t>World</w:t>")
|
|
39
|
+
* // Returns: "HelloWorld"
|
|
40
|
+
*
|
|
41
|
+
* sanitizeHyperlinkText("Clean text")
|
|
42
|
+
* // Returns: "Clean text"
|
|
43
|
+
*
|
|
44
|
+
* sanitizeHyperlinkText("Text with<w:t xml:space=\"preserve\">space")
|
|
45
|
+
* // Returns: "Text withspace"
|
|
46
|
+
* ```
|
|
47
|
+
*/
|
|
48
|
+
export function sanitizeHyperlinkText(text: string): string {
|
|
49
|
+
if (!text) return '';
|
|
50
|
+
|
|
51
|
+
// Remove all XML tags: <...> patterns
|
|
52
|
+
// This matches:
|
|
53
|
+
// - Simple tags: <w:t>
|
|
54
|
+
// - Tags with attributes: <w:t xml:space="preserve">
|
|
55
|
+
// - Self-closing tags: <br/>
|
|
56
|
+
// - Any other XML markup
|
|
57
|
+
const cleaned = text.replace(/<[^>]+>/g, '');
|
|
58
|
+
|
|
59
|
+
return cleaned;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Sanitize display text with optional fallback
|
|
64
|
+
*
|
|
65
|
+
* If the text is empty after sanitization, optionally falls back to a default.
|
|
66
|
+
* Useful for hyperlink display text that might be corrupted to empty strings.
|
|
67
|
+
*
|
|
68
|
+
* @param text - The text to sanitize
|
|
69
|
+
* @param fallback - Optional fallback if result is empty
|
|
70
|
+
* @returns The sanitized text, or fallback if empty
|
|
71
|
+
*
|
|
72
|
+
* @example
|
|
73
|
+
* ```typescript
|
|
74
|
+
* // With fallback
|
|
75
|
+
* sanitizeHyperlinkTextWithFallback("<w:t>", "Click here")
|
|
76
|
+
* // Returns: "Click here"
|
|
77
|
+
*
|
|
78
|
+
* // Without fallback
|
|
79
|
+
* sanitizeHyperlinkTextWithFallback("Normal Text")
|
|
80
|
+
* // Returns: "Normal Text"
|
|
81
|
+
* ```
|
|
82
|
+
*/
|
|
83
|
+
export function sanitizeHyperlinkTextWithFallback(text: string, fallback?: string): string {
|
|
84
|
+
const sanitized = sanitizeHyperlinkText(text);
|
|
85
|
+
|
|
86
|
+
if (!sanitized && fallback) {
|
|
87
|
+
return fallback;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return sanitized;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Check if text appears to contain XML corruption
|
|
95
|
+
*
|
|
96
|
+
* Useful for diagnostic logging and determining if corruption occurred.
|
|
97
|
+
*
|
|
98
|
+
* @param text - The text to check
|
|
99
|
+
* @returns true if the text contains XML-like tags
|
|
100
|
+
*
|
|
101
|
+
* @example
|
|
102
|
+
* ```typescript
|
|
103
|
+
* isTextCorrupted("Normal text")
|
|
104
|
+
* // Returns: false
|
|
105
|
+
*
|
|
106
|
+
* isTextCorrupted("Text<w:t>with tags</w:t>")
|
|
107
|
+
* // Returns: true
|
|
108
|
+
* ```
|
|
109
|
+
*/
|
|
110
|
+
export function isTextCorrupted(text: string): boolean {
|
|
111
|
+
if (!text) return false;
|
|
112
|
+
return /<[^>]+>/.test(text);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Sanitize array of hyperlink texts
|
|
117
|
+
*
|
|
118
|
+
* Applies sanitization to multiple texts efficiently.
|
|
119
|
+
*
|
|
120
|
+
* @param texts - Array of texts to sanitize
|
|
121
|
+
* @returns Array of sanitized texts
|
|
122
|
+
*
|
|
123
|
+
* @example
|
|
124
|
+
* ```typescript
|
|
125
|
+
* sanitizeHyperlinkTexts([
|
|
126
|
+
* "Text<w:t>1</w:t>",
|
|
127
|
+
* "Normal text",
|
|
128
|
+
* "Another<tag>corrupted</tag>"
|
|
129
|
+
* ])
|
|
130
|
+
* // Returns: ["Text1", "Normal text", "Anothercorrupted"]
|
|
131
|
+
* ```
|
|
132
|
+
*/
|
|
133
|
+
export function sanitizeHyperlinkTexts(texts: string[]): string[] {
|
|
134
|
+
return texts.map(sanitizeHyperlinkText);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Replace en-dashes and em-dashes with regular hyphens
|
|
139
|
+
*
|
|
140
|
+
* Normalizes typographic dashes to standard ASCII hyphens:
|
|
141
|
+
* - En-dash (U+2013, –) -> Hyphen (U+002D, -)
|
|
142
|
+
* - Em-dash (U+2014, —) -> Hyphen (U+002D, -)
|
|
143
|
+
*
|
|
144
|
+
* @param text - The text that may contain en-dashes or em-dashes
|
|
145
|
+
* @returns The text with dashes normalized to hyphens
|
|
146
|
+
*
|
|
147
|
+
* @example
|
|
148
|
+
* ```typescript
|
|
149
|
+
* normalizeEnDashesToHyphens("2020–2024")
|
|
150
|
+
* // Returns: "2020-2024"
|
|
151
|
+
*
|
|
152
|
+
* normalizeEnDashesToHyphens("Hello—World")
|
|
153
|
+
* // Returns: "Hello-World"
|
|
154
|
+
*
|
|
155
|
+
* normalizeEnDashesToHyphens("Normal text with - hyphens")
|
|
156
|
+
* // Returns: "Normal text with - hyphens" (unchanged)
|
|
157
|
+
* ```
|
|
158
|
+
*/
|
|
159
|
+
export function normalizeEnDashesToHyphens(text: string): string {
|
|
160
|
+
if (!text) return '';
|
|
161
|
+
return text.replace(/[\u2013\u2014]/g, '-');
|
|
162
|
+
}
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL Helper Utilities
|
|
3
|
+
*
|
|
4
|
+
* Provides functions for sanitizing, validating, and fixing common URL encoding issues.
|
|
5
|
+
* This is critical for Azure Logic Apps URLs which often come with encoded query parameters.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Sanitize a URL by decoding common encoding issues
|
|
10
|
+
*
|
|
11
|
+
* Fixes three common URL encoding problems:
|
|
12
|
+
* 1. Unicode escapes: \u0026 → &
|
|
13
|
+
* 2. HTML entities: & → &
|
|
14
|
+
* 3. URL encoding: %26 → &
|
|
15
|
+
*
|
|
16
|
+
* @param url - The URL to sanitize (may contain encoded characters)
|
|
17
|
+
* @returns Sanitized URL with properly decoded query parameters
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* // Azure Logic App URL with Unicode escapes
|
|
21
|
+
* const encoded = 'https://api.com?v=1\u0026sp=/triggers';
|
|
22
|
+
* const clean = sanitizeUrl(encoded);
|
|
23
|
+
* // Result: 'https://api.com?v=1&sp=/triggers'
|
|
24
|
+
*/
|
|
25
|
+
export function sanitizeUrl(url: string): string {
|
|
26
|
+
if (!url) return url;
|
|
27
|
+
|
|
28
|
+
let sanitized = url;
|
|
29
|
+
|
|
30
|
+
// Step 1: Decode Unicode escapes (\u0026 → &)
|
|
31
|
+
// Common when URLs are stored in JSON or JavaScript strings
|
|
32
|
+
sanitized = sanitized.replace(/\\u0026/g, '&');
|
|
33
|
+
|
|
34
|
+
// Step 2: Decode HTML entities (& → &, < → <, > → >)
|
|
35
|
+
// Common when URLs are copied from HTML documents
|
|
36
|
+
sanitized = sanitized.replace(/&/g, '&');
|
|
37
|
+
sanitized = sanitized.replace(/</g, '<');
|
|
38
|
+
sanitized = sanitized.replace(/>/g, '>');
|
|
39
|
+
sanitized = sanitized.replace(/"/g, '"');
|
|
40
|
+
|
|
41
|
+
// Step 3: Decode URL encoding (%26 → &, %3D → =, etc.)
|
|
42
|
+
// Common when URLs are copied from browsers
|
|
43
|
+
// Note: We only decode the query string part to preserve intentional encoding
|
|
44
|
+
try {
|
|
45
|
+
const urlObj = new URL(sanitized);
|
|
46
|
+
const searchParams = new URLSearchParams(urlObj.search);
|
|
47
|
+
|
|
48
|
+
// Rebuild query string from decoded parameters
|
|
49
|
+
const params: string[] = [];
|
|
50
|
+
searchParams.forEach((value, key) => {
|
|
51
|
+
params.push(`${decodeURIComponent(key)}=${decodeURIComponent(value)}`);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
if (params.length > 0) {
|
|
55
|
+
sanitized = `${urlObj.origin}${urlObj.pathname}?${params.join('&')}${urlObj.hash}`;
|
|
56
|
+
}
|
|
57
|
+
} catch (e) {
|
|
58
|
+
// If URL parsing fails, return the partially sanitized version
|
|
59
|
+
// This handles cases where the URL is malformed
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return sanitized;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Validate that a URL is properly formatted for Azure Logic Apps
|
|
67
|
+
*
|
|
68
|
+
* Checks for common issues:
|
|
69
|
+
* - Contains encoded characters that should be decoded
|
|
70
|
+
* - Has required query parameters (api-version, sp, sv, sig)
|
|
71
|
+
* - Is a valid HTTPS URL
|
|
72
|
+
*
|
|
73
|
+
* @param url - The URL to validate
|
|
74
|
+
* @returns Object with validation result and any issues found
|
|
75
|
+
*/
|
|
76
|
+
export function validatePowerAutomateUrl(url: string): {
|
|
77
|
+
valid: boolean;
|
|
78
|
+
issues: string[];
|
|
79
|
+
warnings: string[];
|
|
80
|
+
} {
|
|
81
|
+
const issues: string[] = [];
|
|
82
|
+
const warnings: string[] = [];
|
|
83
|
+
|
|
84
|
+
if (!url) {
|
|
85
|
+
issues.push('URL is empty');
|
|
86
|
+
return { valid: false, issues, warnings };
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Check for encoded characters that should be decoded
|
|
90
|
+
if (url.includes('\\u0026')) {
|
|
91
|
+
warnings.push('URL contains Unicode escapes (\\u0026). These will be auto-decoded.');
|
|
92
|
+
}
|
|
93
|
+
if (url.includes('&')) {
|
|
94
|
+
warnings.push('URL contains HTML entities (&). These will be auto-decoded.');
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Try to parse the URL
|
|
98
|
+
try {
|
|
99
|
+
const urlObj = new URL(sanitizeUrl(url));
|
|
100
|
+
|
|
101
|
+
// Must be HTTPS for Azure Logic Apps
|
|
102
|
+
if (urlObj.protocol !== 'https:') {
|
|
103
|
+
issues.push('URL must use HTTPS protocol for Azure Logic Apps');
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Check for required Azure Logic Apps query parameters
|
|
107
|
+
const searchParams = new URLSearchParams(urlObj.search);
|
|
108
|
+
|
|
109
|
+
if (!searchParams.has('api-version')) {
|
|
110
|
+
issues.push('Missing required parameter: api-version');
|
|
111
|
+
}
|
|
112
|
+
if (!searchParams.has('sp')) {
|
|
113
|
+
warnings.push(
|
|
114
|
+
'Missing "sp" parameter (shared access policy). May be required depending on your Logic App configuration.'
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
if (!searchParams.has('sv')) {
|
|
118
|
+
warnings.push(
|
|
119
|
+
'Missing "sv" parameter (signature version). May be required depending on your Logic App configuration.'
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
if (!searchParams.has('sig')) {
|
|
123
|
+
warnings.push('Missing "sig" parameter (signature). May be required for authentication.');
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
} catch (e) {
|
|
127
|
+
issues.push(`Invalid URL format: ${e instanceof Error ? e.message : 'Unknown error'}`);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
valid: issues.length === 0,
|
|
132
|
+
issues,
|
|
133
|
+
warnings,
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Test if a URL is reachable by making a HEAD request
|
|
139
|
+
*
|
|
140
|
+
* @param url - The URL to test
|
|
141
|
+
* @param timeoutMs - Timeout in milliseconds (default: 10000)
|
|
142
|
+
* @returns Object with reachability status and any error message
|
|
143
|
+
*/
|
|
144
|
+
export async function testUrlReachability(
|
|
145
|
+
url: string,
|
|
146
|
+
timeoutMs: number = 10000
|
|
147
|
+
): Promise<{
|
|
148
|
+
reachable: boolean;
|
|
149
|
+
statusCode?: number;
|
|
150
|
+
error?: string;
|
|
151
|
+
}> {
|
|
152
|
+
const controller = new AbortController();
|
|
153
|
+
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
|
154
|
+
|
|
155
|
+
try {
|
|
156
|
+
const sanitized = sanitizeUrl(url);
|
|
157
|
+
|
|
158
|
+
// Make a HEAD request to avoid downloading large payloads
|
|
159
|
+
const response = await fetch(sanitized, {
|
|
160
|
+
method: 'HEAD',
|
|
161
|
+
signal: controller.signal,
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
clearTimeout(timeout);
|
|
165
|
+
|
|
166
|
+
return {
|
|
167
|
+
reachable: response.ok,
|
|
168
|
+
statusCode: response.status,
|
|
169
|
+
};
|
|
170
|
+
} catch (error) {
|
|
171
|
+
clearTimeout(timeout);
|
|
172
|
+
|
|
173
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
174
|
+
return {
|
|
175
|
+
reachable: false,
|
|
176
|
+
error: `Request timed out after ${timeoutMs}ms`,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return {
|
|
181
|
+
reachable: false,
|
|
182
|
+
error: error instanceof Error ? error.message : 'Unknown error',
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Extract query parameters from a URL
|
|
189
|
+
*
|
|
190
|
+
* @param url - The URL to parse
|
|
191
|
+
* @returns Map of parameter names to values
|
|
192
|
+
*/
|
|
193
|
+
export function extractQueryParams(url: string): Map<string, string> {
|
|
194
|
+
const params = new Map<string, string>();
|
|
195
|
+
|
|
196
|
+
try {
|
|
197
|
+
const sanitized = sanitizeUrl(url);
|
|
198
|
+
const urlObj = new URL(sanitized);
|
|
199
|
+
const searchParams = new URLSearchParams(urlObj.search);
|
|
200
|
+
|
|
201
|
+
searchParams.forEach((value, key) => {
|
|
202
|
+
params.set(key, value);
|
|
203
|
+
});
|
|
204
|
+
} catch (e) {
|
|
205
|
+
// Return empty map if URL is malformed
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return params;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Check if a URL has any encoding issues that need fixing
|
|
213
|
+
*
|
|
214
|
+
* @param url - The URL to check
|
|
215
|
+
* @returns True if the URL has encoding issues, false otherwise
|
|
216
|
+
*/
|
|
217
|
+
export function hasEncodingIssues(url: string): boolean {
|
|
218
|
+
if (!url) return false;
|
|
219
|
+
|
|
220
|
+
return (
|
|
221
|
+
url.includes('\\u0026') ||
|
|
222
|
+
url.includes('&') ||
|
|
223
|
+
url.includes('<') ||
|
|
224
|
+
url.includes('>') ||
|
|
225
|
+
url.includes('"')
|
|
226
|
+
);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* SECURITY: Validate URL scheme for user-controlled hyperlink replacements
|
|
231
|
+
*
|
|
232
|
+
* Prevents XSS-like attacks by rejecting dangerous URL schemes that could:
|
|
233
|
+
* - Execute JavaScript (javascript:)
|
|
234
|
+
* - Embed data URIs (data:)
|
|
235
|
+
* - Access local files (file:///)
|
|
236
|
+
* - Use other non-HTTP protocols
|
|
237
|
+
*
|
|
238
|
+
* @param url - The URL to validate
|
|
239
|
+
* @returns Object with validation result and error message if invalid
|
|
240
|
+
*
|
|
241
|
+
* @example
|
|
242
|
+
* validateUrlScheme('https://example.com') // { valid: true, isHttp: true }
|
|
243
|
+
* validateUrlScheme('javascript:alert(1)') // { valid: false, error: '...', isHttp: false }
|
|
244
|
+
*/
|
|
245
|
+
export function validateUrlScheme(url: string): {
|
|
246
|
+
valid: boolean;
|
|
247
|
+
isHttp: boolean;
|
|
248
|
+
error?: string;
|
|
249
|
+
} {
|
|
250
|
+
if (!url || url.trim() === '') {
|
|
251
|
+
return { valid: true, isHttp: false }; // Allow empty (will be handled elsewhere)
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
try {
|
|
255
|
+
// Attempt to parse as URL
|
|
256
|
+
const parsed = new URL(url);
|
|
257
|
+
|
|
258
|
+
// Whitelist only HTTP/HTTPS protocols
|
|
259
|
+
const allowedSchemes = ['http:', 'https:'];
|
|
260
|
+
const isAllowed = allowedSchemes.includes(parsed.protocol.toLowerCase());
|
|
261
|
+
|
|
262
|
+
if (!isAllowed) {
|
|
263
|
+
return {
|
|
264
|
+
valid: false,
|
|
265
|
+
isHttp: false,
|
|
266
|
+
error: `Dangerous URL scheme detected: "${parsed.protocol}". Only http:// and https:// are allowed for security.`,
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return { valid: true, isHttp: true };
|
|
271
|
+
} catch (error) {
|
|
272
|
+
// If URL parsing fails, it might be a relative URL or malformed
|
|
273
|
+
// Check for obvious dangerous patterns even if URL parse fails
|
|
274
|
+
const lowerUrl = url.toLowerCase().trim();
|
|
275
|
+
|
|
276
|
+
if (lowerUrl.startsWith('javascript:')) {
|
|
277
|
+
return {
|
|
278
|
+
valid: false,
|
|
279
|
+
isHttp: false,
|
|
280
|
+
error: 'JavaScript URLs are not allowed for security reasons.',
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if (lowerUrl.startsWith('data:')) {
|
|
285
|
+
return {
|
|
286
|
+
valid: false,
|
|
287
|
+
isHttp: false,
|
|
288
|
+
error: 'Data URLs are not allowed for security reasons.',
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
if (lowerUrl.startsWith('file:')) {
|
|
293
|
+
return {
|
|
294
|
+
valid: false,
|
|
295
|
+
isHttp: false,
|
|
296
|
+
error: 'File URLs are not allowed for security reasons.',
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// If it's not a parseable URL and doesn't match dangerous patterns,
|
|
301
|
+
// it might be a content ID or relative path - allow it
|
|
302
|
+
return { valid: true, isHttp: false };
|
|
303
|
+
}
|
|
304
|
+
}
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* URL Pattern Utilities for theSource Hyperlink Processing
|
|
3
|
+
*
|
|
4
|
+
* SINGLE SOURCE OF TRUTH for Content ID and Document ID extraction
|
|
5
|
+
* Used across all hyperlink processing services
|
|
6
|
+
*
|
|
7
|
+
* This utility centralizes regex patterns that were previously duplicated across:
|
|
8
|
+
* - HyperlinkService.ts
|
|
9
|
+
* - WordDocumentProcessor.ts
|
|
10
|
+
* - HyperlinkManager.ts
|
|
11
|
+
* - DocXMLaterProcessor.ts
|
|
12
|
+
* - types/hyperlink.ts
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Regex Patterns for theSource URLs
|
|
17
|
+
*
|
|
18
|
+
* Content ID Format: TSRC-ABC-123456 or CMS-XYZ-789012
|
|
19
|
+
* Document ID Format: docid=<uuid-or-alphanumeric>
|
|
20
|
+
*/
|
|
21
|
+
export const URL_PATTERNS = {
|
|
22
|
+
/**
|
|
23
|
+
* Matches theSource Content IDs
|
|
24
|
+
* Examples: TSRC-ABC-123456, CMS-XYZ-789012
|
|
25
|
+
* Pattern: (TSRC|CMS)-(alphanumeric)-(6 digits)
|
|
26
|
+
*
|
|
27
|
+
* Format Specification:
|
|
28
|
+
* - Prefix: TSRC or CMS
|
|
29
|
+
* - Separator: hyphen (-)
|
|
30
|
+
* - Middle: alphanumeric characters (A-Z, a-z, 0-9)
|
|
31
|
+
* - Separator: hyphen (-)
|
|
32
|
+
* - Suffix: exactly 6 digits
|
|
33
|
+
*/
|
|
34
|
+
CONTENT_ID: /(TSRC|CMS)-([a-zA-Z0-9]+)-(\d{6})/i,
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Matches theSource Document IDs
|
|
38
|
+
* Examples: docid=abc-123-def, docid=abc123
|
|
39
|
+
* Pattern: docid=(alphanumeric with dashes)
|
|
40
|
+
*
|
|
41
|
+
* Format Specification:
|
|
42
|
+
* - Prefix: docid= (case-insensitive)
|
|
43
|
+
* - Value: alphanumeric characters with optional hyphens
|
|
44
|
+
* - Boundary: stops at non-alphanumeric/dash character or end of string
|
|
45
|
+
*/
|
|
46
|
+
DOCUMENT_ID: /docid=([a-zA-Z0-9-]+)(?:[^a-zA-Z0-9-]|$)/i,
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Matches theSource domain
|
|
50
|
+
* Example: thesource.cvshealth.com
|
|
51
|
+
*/
|
|
52
|
+
THE_SOURCE_DOMAIN: /thesource\.cvshealth\.com/i,
|
|
53
|
+
} as const;
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Extract Content ID from a URL
|
|
57
|
+
*
|
|
58
|
+
* @param url - URL to extract from
|
|
59
|
+
* @returns Content ID (e.g., "TSRC-ABC-123456") or null if not found
|
|
60
|
+
*
|
|
61
|
+
* @example
|
|
62
|
+
* extractContentId('https://thesource.com/doc?Content_ID=TSRC-ABC-123456')
|
|
63
|
+
* // Returns: "TSRC-ABC-123456"
|
|
64
|
+
*
|
|
65
|
+
* extractContentId('https://google.com')
|
|
66
|
+
* // Returns: null
|
|
67
|
+
*/
|
|
68
|
+
export function extractContentId(url: string): string | null {
|
|
69
|
+
if (!url) return null;
|
|
70
|
+
const match = url.match(URL_PATTERNS.CONTENT_ID);
|
|
71
|
+
return match ? match[0] : null; // Return full match (TSRC-ABC-123456)
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Extract Content ID from any text string (file path, display text, etc.)
|
|
76
|
+
*
|
|
77
|
+
* This is used as a fallback when the URL is not available from getUrl(),
|
|
78
|
+
* such as with file-type hyperlinks where the URL is stored in the
|
|
79
|
+
* relationship target but getUrl() returns undefined.
|
|
80
|
+
*
|
|
81
|
+
* @param text - Text to search for Content_ID pattern
|
|
82
|
+
* @returns Content ID (e.g., "TSRC-ABC-123456") or null if not found
|
|
83
|
+
*
|
|
84
|
+
* @example
|
|
85
|
+
* extractContentIdFromText('C:\\Users\\user\\Downloads\\TSRC-PROD-015483')
|
|
86
|
+
* // Returns: "TSRC-PROD-015483"
|
|
87
|
+
*
|
|
88
|
+
* extractContentIdFromText('Document: TSRC-ABC-123456 (Final)')
|
|
89
|
+
* // Returns: "TSRC-ABC-123456"
|
|
90
|
+
*
|
|
91
|
+
* extractContentIdFromText('Reviewing SharePoint Errors (Seniors Only)')
|
|
92
|
+
* // Returns: null
|
|
93
|
+
*/
|
|
94
|
+
export function extractContentIdFromText(text: string): string | null {
|
|
95
|
+
if (!text) return null;
|
|
96
|
+
const match = text.match(URL_PATTERNS.CONTENT_ID);
|
|
97
|
+
return match ? match[0] : null;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Extract Document ID from a URL
|
|
102
|
+
*
|
|
103
|
+
* @param url - URL to extract from
|
|
104
|
+
* @returns Document ID (UUID/alphanumeric) or null if not found
|
|
105
|
+
*
|
|
106
|
+
* @example
|
|
107
|
+
* extractDocumentId('https://thesource.com/#!/view?docid=abc-123-def')
|
|
108
|
+
* // Returns: "abc-123-def"
|
|
109
|
+
*
|
|
110
|
+
* extractDocumentId('https://thesource.com/#!/view?docid=abc123#content')
|
|
111
|
+
* // Returns: "abc123"
|
|
112
|
+
*
|
|
113
|
+
* extractDocumentId('https://google.com')
|
|
114
|
+
* // Returns: null
|
|
115
|
+
*/
|
|
116
|
+
export function extractDocumentId(url: string): string | null {
|
|
117
|
+
if (!url) return null;
|
|
118
|
+
const match = url.match(URL_PATTERNS.DOCUMENT_ID);
|
|
119
|
+
return match ? match[1] : null; // Return captured group (the ID itself)
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Extract both Lookup IDs (Content ID and Document ID) from a URL
|
|
124
|
+
*
|
|
125
|
+
* This is the primary method used by WordDocumentProcessor for API lookups.
|
|
126
|
+
* It attempts to extract both types of IDs and returns whichever are found.
|
|
127
|
+
*
|
|
128
|
+
* @param url - URL to extract from
|
|
129
|
+
* @returns Object with contentId and/or documentId, or null if neither found
|
|
130
|
+
*
|
|
131
|
+
* @example
|
|
132
|
+
* extractLookupIds('https://thesource.com/doc?Content_ID=TSRC-ABC-123456&docid=abc123')
|
|
133
|
+
* // Returns: { contentId: "TSRC-ABC-123456", documentId: "abc123" }
|
|
134
|
+
*
|
|
135
|
+
* extractLookupIds('https://thesource.com/doc?Content_ID=TSRC-ABC-123456')
|
|
136
|
+
* // Returns: { contentId: "TSRC-ABC-123456" }
|
|
137
|
+
*
|
|
138
|
+
* extractLookupIds('https://google.com')
|
|
139
|
+
* // Returns: null
|
|
140
|
+
*/
|
|
141
|
+
export function extractLookupIds(url: string): {
|
|
142
|
+
contentId?: string;
|
|
143
|
+
documentId?: string;
|
|
144
|
+
} | null {
|
|
145
|
+
if (!url) return null;
|
|
146
|
+
|
|
147
|
+
const lookupIds: { contentId?: string; documentId?: string } = {};
|
|
148
|
+
|
|
149
|
+
const contentId = extractContentId(url);
|
|
150
|
+
if (contentId) {
|
|
151
|
+
lookupIds.contentId = contentId;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
const documentId = extractDocumentId(url);
|
|
155
|
+
if (documentId) {
|
|
156
|
+
lookupIds.documentId = documentId;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return Object.keys(lookupIds).length > 0 ? lookupIds : null;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Check if URL is a theSource URL
|
|
164
|
+
*
|
|
165
|
+
* @param url - URL to check
|
|
166
|
+
* @returns true if theSource URL, false otherwise
|
|
167
|
+
*
|
|
168
|
+
* @example
|
|
169
|
+
* isTheSourceUrl('https://thesource.cvshealth.com/nuxeo/...')
|
|
170
|
+
* // Returns: true
|
|
171
|
+
*
|
|
172
|
+
* isTheSourceUrl('https://google.com')
|
|
173
|
+
* // Returns: false
|
|
174
|
+
*/
|
|
175
|
+
export function isTheSourceUrl(url: string): boolean {
|
|
176
|
+
if (!url) return false;
|
|
177
|
+
return URL_PATTERNS.THE_SOURCE_DOMAIN.test(url);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Check if URL has a Content ID
|
|
182
|
+
*
|
|
183
|
+
* @param url - URL to check
|
|
184
|
+
* @returns true if Content ID found
|
|
185
|
+
*/
|
|
186
|
+
export function hasContentId(url: string): boolean {
|
|
187
|
+
return extractContentId(url) !== null;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Check if URL has a Document ID
|
|
192
|
+
*
|
|
193
|
+
* @param url - URL to check
|
|
194
|
+
* @returns true if Document ID found
|
|
195
|
+
*/
|
|
196
|
+
export function hasDocumentId(url: string): boolean {
|
|
197
|
+
return extractDocumentId(url) !== null;
|
|
198
|
+
}
|