documentation-hub 5.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. package/.eslintrc.json +43 -0
  2. package/.github/workflows/build.yml +64 -0
  3. package/.github/workflows/ci.yml +39 -0
  4. package/.vscode/extensions.json +3 -0
  5. package/Current.md +97 -0
  6. package/DocHub_Image.png +0 -0
  7. package/README.md +666 -0
  8. package/USER_GUIDE.md +1173 -0
  9. package/Updater.md +311 -0
  10. package/build/256x256.png +0 -0
  11. package/build/512x512.png +0 -0
  12. package/build/app-update.yml +4 -0
  13. package/build/create-icon.js +208 -0
  14. package/build/icon.ico +0 -0
  15. package/build/icon.png +0 -0
  16. package/build/icon_1024x1024.png +0 -0
  17. package/dist/assets/Analytics-BpsG9895.js +1 -0
  18. package/dist/assets/Card-IAZin8kp.js +1 -0
  19. package/dist/assets/CurrentSession-B-rFkHvf.js +12 -0
  20. package/dist/assets/Dashboard-C_5gMb0q.js +1 -0
  21. package/dist/assets/Documents-CqZ25axS.js +1 -0
  22. package/dist/assets/Input-l89xwXBi.js +1 -0
  23. package/dist/assets/Reporting-DqdHJY_a.js +1 -0
  24. package/dist/assets/Search-XNbu5z_3.js +1 -0
  25. package/dist/assets/SessionManager-lH9hZfzH.js +1 -0
  26. package/dist/assets/Sessions-ClZOPYNc.js +1 -0
  27. package/dist/assets/Settings-DUEHGURa.js +11 -0
  28. package/dist/assets/index-8xUe8ptc.js +24 -0
  29. package/dist/assets/index-RYyJqF7O.css +1 -0
  30. package/dist/assets/path-BkOl0AGO.js +1 -0
  31. package/dist/assets/promises-ID_B9S-h.js +1 -0
  32. package/dist/assets/urlHelpers-TvgahX0r.js +1 -0
  33. package/dist/assets/useToast-yRSO1dkm.js +1 -0
  34. package/dist/assets/vendor-charts-RkGK5ROP.js +36 -0
  35. package/dist/assets/vendor-db-l0sNRNKZ.js +1 -0
  36. package/dist/assets/vendor-react-BVZ_anCF.js +4 -0
  37. package/dist/assets/vendor-search-Dw8P0qyA.js +1 -0
  38. package/dist/assets/vendor-ui-BU7NfluV.js +53 -0
  39. package/dist/electron/PowerAutomateApiService-LfW09ZGr.js +147 -0
  40. package/dist/electron/main-CXkNtyv-.js +19789 -0
  41. package/dist/electron/main.js +5 -0
  42. package/dist/electron/preload.js +1 -0
  43. package/dist/icon.png +0 -0
  44. package/dist/index.html +27 -0
  45. package/docs/CODEBASE_ANALYSIS_REPORT.md +309 -0
  46. package/docs/DEBUG_LOGGING_GUIDE.md +244 -0
  47. package/docs/README.md +115 -0
  48. package/docs/TOC_WIRING_GUIDE.md +344 -0
  49. package/docs/analysis/Bullet_Symbol_Bug_Analysis.md +136 -0
  50. package/docs/analysis/DOCXMLATER_ANALYSIS_SUMMARY.txt +169 -0
  51. package/docs/analysis/Document_Processing_Issues_Analysis.md +704 -0
  52. package/docs/analysis/FIELD_PRESERVATION_ANALYSIS.md +1200 -0
  53. package/docs/analysis/INDENTATION_PRESERVE_ANALYSIS.md +181 -0
  54. package/docs/analysis/INDENTATION_PRESERVE_IMPLEMENTATION.md +207 -0
  55. package/docs/analysis/List_Implementation.md +206 -0
  56. package/docs/analysis/List_Implementation_Accuracy_Report.md +366 -0
  57. package/docs/analysis/PROCESSING_OPTIONS_UI_UPDATES.md +220 -0
  58. package/docs/analysis/RefactorStyles.md +852 -0
  59. package/docs/analysis/STYLE_PARAMETER_ENHANCEMENT.md +143 -0
  60. package/docs/analysis/docxmlater-comparison-todo-2025-11-13.md +636 -0
  61. package/docs/analysis/docxmlater-implementation-analysis-2025-11-13.md +340 -0
  62. package/docs/analysis/docxmlater-template_ui-integration-analysis.md +263 -0
  63. package/docs/analysis/github-issues-to-create.md +237 -0
  64. package/docs/api/API_README.md +538 -0
  65. package/docs/api/API_REFERENCE.md +751 -0
  66. package/docs/api/TYPE_DEFINITIONS.md +869 -0
  67. package/docs/architecture/FONT_EMBEDDING_GUIDE.md +318 -0
  68. package/docs/architecture/docxmlater-functions-and-structure.md +726 -0
  69. package/docs/docxmlater-readme.md +1341 -0
  70. package/docs/fixes/EXECUTION_LOG_TEST_BASE.md +573 -0
  71. package/docs/fixes/HYPERLINK_TEXT_SANITIZATION.md +253 -0
  72. package/docs/fixes/README.md +37 -0
  73. package/docs/github-issues/issue-1-body.md +125 -0
  74. package/docs/github-issues/issue-10-body.md +850 -0
  75. package/docs/github-issues/issue-2-body.md +200 -0
  76. package/docs/github-issues/issue-3-body.md +270 -0
  77. package/docs/github-issues/issue-4-body.md +169 -0
  78. package/docs/github-issues/issue-5-body.md +173 -0
  79. package/docs/github-issues/issue-6-body.md +158 -0
  80. package/docs/github-issues/issue-7-body.md +171 -0
  81. package/docs/github-issues/issue-8-body.md +407 -0
  82. package/docs/github-issues/issue-9-body.md +515 -0
  83. package/docs/github-issues/issue-tracker.md +274 -0
  84. package/docs/github-issues/predictive-analysis-2025-10-18.md +2131 -0
  85. package/docs/implementation/List_Framework_Refactor_Plan.md +336 -0
  86. package/docs/implementation/PRIMARY_TEXT_COLOR_FEATURE.md +217 -0
  87. package/docs/implementation/RELEASE_PLAN_v2.1.0.md +362 -0
  88. package/docs/implementation/RefactorStyles.md +588 -0
  89. package/docs/implementation/implement-plan.md +489 -0
  90. package/docs/implementation/missing-helpers-implementation.md +391 -0
  91. package/docs/implementation/refactor-plan.md +520 -0
  92. package/docs/implementation/session-implementation-complete.md +233 -0
  93. package/docs/implementation/session-management-plan.md +250 -0
  94. package/docs/setup-checklist.md +77 -0
  95. package/docs/versions/changelog.md +345 -0
  96. package/electron/customUpdater.ts +656 -0
  97. package/electron/main.ts +2441 -0
  98. package/electron/memoryConfig.ts +187 -0
  99. package/electron/preload.ts +394 -0
  100. package/electron/proxyConfig.ts +340 -0
  101. package/electron/services/BackupService.ts +452 -0
  102. package/electron/services/DictionaryService.ts +402 -0
  103. package/electron/services/LocalDictionaryLookupService.ts +147 -0
  104. package/electron/services/PowerAutomateApiService.ts +231 -0
  105. package/electron/services/SharePointSyncService.ts +474 -0
  106. package/electron/windowsCertStore.ts +427 -0
  107. package/electron/zscalerConfig.ts +381 -0
  108. package/eslint.config.js +92 -0
  109. package/jest.config.js +52 -0
  110. package/package.json +214 -0
  111. package/postcss.config.mjs +6 -0
  112. package/public/icon.png +0 -0
  113. package/publish-release.ps1 +5 -0
  114. package/renovate.json +30 -0
  115. package/src/App.tsx +216 -0
  116. package/src/__mocks__/p-limit.js +12 -0
  117. package/src/__mocks__/styleMock.js +1 -0
  118. package/src/components/common/BugReportButton.tsx +44 -0
  119. package/src/components/common/BugReportDialog.tsx +193 -0
  120. package/src/components/common/Button.tsx +153 -0
  121. package/src/components/common/Card.tsx +86 -0
  122. package/src/components/common/ColorPickerDialog.tsx +177 -0
  123. package/src/components/common/ConfirmDialog.tsx +96 -0
  124. package/src/components/common/DebugConsole.tsx +275 -0
  125. package/src/components/common/EmptyState.tsx +183 -0
  126. package/src/components/common/ErrorBoundary.tsx +98 -0
  127. package/src/components/common/ErrorDetailsDialog.tsx +153 -0
  128. package/src/components/common/ErrorFallback.tsx +218 -0
  129. package/src/components/common/Input.tsx +109 -0
  130. package/src/components/common/Skeleton.tsx +184 -0
  131. package/src/components/common/SplashScreen.tsx +81 -0
  132. package/src/components/common/Toast.tsx +155 -0
  133. package/src/components/common/Tooltip.tsx +79 -0
  134. package/src/components/common/UpdateNotification.tsx +320 -0
  135. package/src/components/comparison/ComparisonWindow.tsx +374 -0
  136. package/src/components/comparison/SideBySideDiff.tsx +486 -0
  137. package/src/components/comparison/index.ts +8 -0
  138. package/src/components/document/DocumentUploader.tsx +288 -0
  139. package/src/components/document/HyperlinkPreview.tsx +430 -0
  140. package/src/components/document/HyperlinkService.md +1484 -0
  141. package/src/components/document/Hyperlink_Technical_Documentation.md +496 -0
  142. package/src/components/document/InlineChangesView.tsx +707 -0
  143. package/src/components/document/ProcessingProgress.tsx +303 -0
  144. package/src/components/document/ProcessingResults.tsx +256 -0
  145. package/src/components/document/TrackedChangesDetail.tsx +530 -0
  146. package/src/components/document/TrackedChangesPanel.tsx +546 -0
  147. package/src/components/document/VirtualDocumentList.tsx +240 -0
  148. package/src/components/editor/DocumentEditor.tsx +723 -0
  149. package/src/components/editor/DocumentEditorModal.tsx +640 -0
  150. package/src/components/editor/EditorQuickActions.tsx +502 -0
  151. package/src/components/editor/EditorToolbar.tsx +312 -0
  152. package/src/components/editor/TableEditor.tsx +926 -0
  153. package/src/components/editor/index.ts +18 -0
  154. package/src/components/layout/Header.tsx +190 -0
  155. package/src/components/layout/Sidebar.tsx +313 -0
  156. package/src/components/layout/TitleBar.tsx +190 -0
  157. package/src/components/navigation/CommandPalette.tsx +233 -0
  158. package/src/components/navigation/KeyboardShortcutsModal.tsx +173 -0
  159. package/src/components/sessions/ChangeItem.tsx +408 -0
  160. package/src/components/sessions/ChangeViewer.tsx +1155 -0
  161. package/src/components/sessions/DocumentComparisonModal.tsx +314 -0
  162. package/src/components/sessions/ProcessingOptions.tsx +297 -0
  163. package/src/components/sessions/ReplacementsTab.tsx +438 -0
  164. package/src/components/sessions/RevisionHandlingOptions.tsx +87 -0
  165. package/src/components/sessions/SessionManager.tsx +188 -0
  166. package/src/components/sessions/StylesEditor.tsx +1335 -0
  167. package/src/components/sessions/TabContainer.tsx +151 -0
  168. package/src/components/sessions/VirtualSessionList.tsx +157 -0
  169. package/src/components/sessions/sessionToProcessorManager.tsx +420 -0
  170. package/src/components/settings/CertificateManager.tsx +410 -0
  171. package/src/components/settings/SegmentedControl.tsx +88 -0
  172. package/src/components/settings/SettingRow.tsx +52 -0
  173. package/src/contexts/GlobalStatsContext.tsx +396 -0
  174. package/src/contexts/SessionContext.tsx +2129 -0
  175. package/src/contexts/ThemeContext.tsx +428 -0
  176. package/src/contexts/UserSettingsContext.tsx +290 -0
  177. package/src/contexts/__tests__/GlobalStatsContext.test.tsx +390 -0
  178. package/src/global.d.ts +273 -0
  179. package/src/hooks/useDocumentQueue.tsx +210 -0
  180. package/src/hooks/useToast.tsx +55 -0
  181. package/src/main.tsx +10 -0
  182. package/src/pages/Analytics.tsx +386 -0
  183. package/src/pages/CurrentSession.tsx +1174 -0
  184. package/src/pages/Dashboard.tsx +319 -0
  185. package/src/pages/Documents.tsx +317 -0
  186. package/src/pages/Projects.tsx +250 -0
  187. package/src/pages/Reporting.tsx +386 -0
  188. package/src/pages/Search.tsx +349 -0
  189. package/src/pages/Sessions.tsx +285 -0
  190. package/src/pages/Settings.tsx +2662 -0
  191. package/src/services/HyperlinkService.ts +1085 -0
  192. package/src/services/document/DocXMLaterProcessor.ts +617 -0
  193. package/src/services/document/DocumentProcessingComparison.ts +856 -0
  194. package/src/services/document/DocumentSnapshotService.ts +575 -0
  195. package/src/services/document/WordDocumentProcessor.ts +10509 -0
  196. package/src/services/document/__tests__/DocXMLaterProcessor.hyperlinks.test.md +311 -0
  197. package/src/services/document/__tests__/WordDocumentProcessor.integration.test.ts +515 -0
  198. package/src/services/document/__tests__/WordDocumentProcessor.test.ts +812 -0
  199. package/src/services/document/blanklines/BlankLineManager.ts +658 -0
  200. package/src/services/document/blanklines/__tests__/paragraphChecks.test.ts +281 -0
  201. package/src/services/document/blanklines/helpers/blankLineInsertion.ts +87 -0
  202. package/src/services/document/blanklines/helpers/blankLineSnapshot.ts +251 -0
  203. package/src/services/document/blanklines/helpers/clearCustom.ts +121 -0
  204. package/src/services/document/blanklines/helpers/contextChecks.ts +117 -0
  205. package/src/services/document/blanklines/helpers/imageChecks.ts +51 -0
  206. package/src/services/document/blanklines/helpers/paragraphChecks.ts +236 -0
  207. package/src/services/document/blanklines/helpers/removeBlanksBetweenListItems.ts +91 -0
  208. package/src/services/document/blanklines/helpers/removeTrailingBlanks.ts +35 -0
  209. package/src/services/document/blanklines/helpers/tableGuards.ts +21 -0
  210. package/src/services/document/blanklines/index.ts +67 -0
  211. package/src/services/document/blanklines/rules/additionRules.ts +337 -0
  212. package/src/services/document/blanklines/rules/indentationRules.ts +317 -0
  213. package/src/services/document/blanklines/rules/removalRules.ts +362 -0
  214. package/src/services/document/blanklines/rules/ruleTypes.ts +92 -0
  215. package/src/services/document/blanklines/types.ts +29 -0
  216. package/src/services/document/helpers/ImageBorderCropper.ts +377 -0
  217. package/src/services/document/helpers/__tests__/whitespace.test.ts +272 -0
  218. package/src/services/document/helpers/whitespace.ts +117 -0
  219. package/src/services/document/list/ListNormalizer.ts +947 -0
  220. package/src/services/document/list/index.ts +45 -0
  221. package/src/services/document/list/list-detection.ts +275 -0
  222. package/src/services/document/list/list-types.ts +162 -0
  223. package/src/services/document/processors/HyperlinkProcessor.ts +370 -0
  224. package/src/services/document/processors/ListProcessor.ts +257 -0
  225. package/src/services/document/processors/StructureProcessor.ts +176 -0
  226. package/src/services/document/processors/StyleProcessor.ts +389 -0
  227. package/src/services/document/processors/TableProcessor.ts +2238 -0
  228. package/src/services/document/processors/__tests__/HyperlinkProcessor.test.ts +314 -0
  229. package/src/services/document/processors/__tests__/ListProcessor.test.ts +291 -0
  230. package/src/services/document/processors/__tests__/StructureProcessor.test.ts +257 -0
  231. package/src/services/document/processors/__tests__/TableProcessor.hlp-tips-bullets.test.ts +459 -0
  232. package/src/services/document/processors/__tests__/TableProcessor.test.ts +1604 -0
  233. package/src/services/document/processors/index.ts +28 -0
  234. package/src/services/document/types/docx-processing.ts +310 -0
  235. package/src/services/editor/EditorActionHandlers.ts +901 -0
  236. package/src/services/editor/index.ts +13 -0
  237. package/src/setupTests.ts +47 -0
  238. package/src/styles/global.css +782 -0
  239. package/src/types/backup.ts +132 -0
  240. package/src/types/dictionary.ts +125 -0
  241. package/src/types/document-processing.ts +331 -0
  242. package/src/types/docxmlater-augments.d.ts +142 -0
  243. package/src/types/editor.ts +280 -0
  244. package/src/types/electron.ts +340 -0
  245. package/src/types/globalStats.ts +155 -0
  246. package/src/types/hyperlink.ts +471 -0
  247. package/src/types/operations.ts +354 -0
  248. package/src/types/session.ts +427 -0
  249. package/src/types/settings.ts +112 -0
  250. package/src/utils/MemoryMonitor.ts +248 -0
  251. package/src/utils/cn.ts +6 -0
  252. package/src/utils/colorConvert.ts +306 -0
  253. package/src/utils/diffUtils.ts +347 -0
  254. package/src/utils/documentUtils.ts +202 -0
  255. package/src/utils/electronGuard.ts +62 -0
  256. package/src/utils/indexedDB.ts +915 -0
  257. package/src/utils/logger.ts +717 -0
  258. package/src/utils/pathSecurity.ts +232 -0
  259. package/src/utils/pathValidator.ts +236 -0
  260. package/src/utils/processingTimeEstimator.ts +153 -0
  261. package/src/utils/safeJsonParse.ts +62 -0
  262. package/src/utils/textSanitizer.ts +162 -0
  263. package/src/utils/urlHelpers.ts +304 -0
  264. package/src/utils/urlPatterns.ts +198 -0
  265. package/src/utils/urlSanitizer.ts +152 -0
  266. package/src/vite-env.d.ts +11 -0
  267. package/tsconfig.electron.json +19 -0
  268. package/tsconfig.json +36 -0
  269. package/tsconfig.node.json +12 -0
  270. package/typedoc.json +45 -0
  271. package/vite.config.ts +152 -0
@@ -0,0 +1,45 @@
1
+ /**
2
+ * List normalization module
3
+ *
4
+ * Moved from docxmlater to dochub-app for processing-level customization.
5
+ * Provides typed list prefix detection and normalization to proper Word formatting.
6
+ */
7
+
8
+ // Types
9
+ export type {
10
+ ListCategory,
11
+ NumberFormat,
12
+ BulletFormat,
13
+ ListDetectionResult,
14
+ ListAnalysis,
15
+ IndentationLevel,
16
+ ListNormalizationOptions,
17
+ ListNormalizationReport,
18
+ } from "./list-types";
19
+
20
+ // Detection utilities
21
+ export {
22
+ TYPED_LIST_PATTERNS,
23
+ PATTERN_TO_CATEGORY,
24
+ FORMAT_TO_LEVEL,
25
+ getLevelFromFormat,
26
+ inferLevelFromIndentation,
27
+ inferLevelFromRelativeIndentation,
28
+ detectTypedPrefix,
29
+ getParagraphIndentation,
30
+ detectListType,
31
+ validateListSequence,
32
+ getListCategoryFromFormat,
33
+ } from "./list-detection";
34
+
35
+ // Normalization
36
+ export {
37
+ ListNormalizer,
38
+ analyzeCellLists,
39
+ analyzeTableLists,
40
+ normalizeListsInCell,
41
+ normalizeListsInTable,
42
+ normalizeOrphanListLevelsInCell,
43
+ normalizeOrphanListLevelsInTable,
44
+ stripTypedPrefix,
45
+ } from "./ListNormalizer";
@@ -0,0 +1,275 @@
1
+ /**
2
+ * List Detection Utilities
3
+ *
4
+ * Moved from docxmlater to dochub-app for processing-level customization.
5
+ * Provides functions to detect typed list prefixes and analyze
6
+ * paragraph list properties.
7
+ */
8
+
9
+ import type { Paragraph } from "docxmlater";
10
+ import type {
11
+ ListCategory,
12
+ ListDetectionResult,
13
+ NumberFormat,
14
+ BulletFormat,
15
+ } from "./list-types";
16
+
17
+ // =============================================================================
18
+ // CONSTANTS
19
+ // =============================================================================
20
+
21
+ /**
22
+ * Regex patterns for typed list prefixes.
23
+ * Order matters: more specific patterns first.
24
+ */
25
+ export const TYPED_LIST_PATTERNS: Record<string, RegExp> = {
26
+ // Numbered patterns (capture the marker for validation)
27
+ decimal: /^(\d+)[.)]\s+/,
28
+ lowerLetter: /^([a-z])[.)]\s+/,
29
+ upperLetter: /^([A-Z])[.)]\s+/,
30
+ lowerRoman: /^((?:i{1,3}|iv|vi{0,3}|ix|x{1,3}))[.)]\s+/i,
31
+
32
+ // Bullet patterns
33
+ bullet: /^[•●○◦▪■□]\s+/,
34
+ dash: /^[-–—]\s+/,
35
+ arrow: /^[►▸▶→]\s+/,
36
+ };
37
+
38
+ /** Map pattern names to categories */
39
+ export const PATTERN_TO_CATEGORY: Record<string, ListCategory> = {
40
+ decimal: "numbered",
41
+ lowerLetter: "numbered",
42
+ upperLetter: "numbered",
43
+ lowerRoman: "numbered",
44
+ bullet: "bullet",
45
+ dash: "bullet",
46
+ arrow: "bullet",
47
+ };
48
+
49
+ /**
50
+ * Map typed prefix format to Word numbering level.
51
+ * Word's default multilevel list uses:
52
+ * Level 0: 1., 2., 3. (decimal)
53
+ * Level 1: a., b., c. (lowerLetter)
54
+ * Level 2: i., ii., iii. (lowerRoman)
55
+ */
56
+ export const FORMAT_TO_LEVEL: Record<string, number> = {
57
+ decimal: 0, // 1., 2., 3.
58
+ lowerLetter: 1, // a., b., c.
59
+ upperLetter: 0, // A., B., C. — top-level alternative to decimal in business docs
60
+ lowerRoman: 2, // i., ii., iii.
61
+ upperRoman: 2, // I., II., III.
62
+ bullet: 0, // Top-level bullet (filled circle)
63
+ dash: 0, // Top-level dash marker
64
+ arrow: 0, // Top-level arrow marker
65
+ };
66
+
67
+ /**
68
+ * Get the Word numbering level for a given format.
69
+ * Returns 0 (top level) for decimal or unknown formats.
70
+ */
71
+ export function getLevelFromFormat(format: string | null): number {
72
+ if (!format) return 0;
73
+ return FORMAT_TO_LEVEL[format] ?? 0;
74
+ }
75
+
76
+ /**
77
+ * Word standard indentation values in twips.
78
+ * Level 0: 720 twips (0.5 inch) left indent
79
+ * Level 1: 1080 twips (0.75 inch) left indent
80
+ * Level 2: 1440 twips (1 inch) left indent
81
+ * Each subsequent level adds 360 twips.
82
+ */
83
+ const WORD_BASE_INDENT = 720;
84
+ const INDENT_PER_LEVEL = 360;
85
+
86
+ // =============================================================================
87
+ // CORE DETECTION FUNCTIONS
88
+ // =============================================================================
89
+
90
+ /**
91
+ * Infer list level from indentation.
92
+ * Uses standard Word indentation: 720 twips for level 0, +360 per level.
93
+ */
94
+ export function inferLevelFromIndentation(indentTwips: number): number {
95
+ if (indentTwips < WORD_BASE_INDENT) return 0;
96
+ return Math.floor((indentTwips - WORD_BASE_INDENT) / INDENT_PER_LEVEL);
97
+ }
98
+
99
+ /**
100
+ * Infer list level from relative indentation (baseline already subtracted).
101
+ * Used when normalizing lists within a table cell where the baseline
102
+ * indentation varies per cell.
103
+ */
104
+ export function inferLevelFromRelativeIndentation(relativeIndentTwips: number): number {
105
+ if (relativeIndentTwips <= 0) return 0;
106
+ return Math.min(8, Math.floor(relativeIndentTwips / INDENT_PER_LEVEL));
107
+ }
108
+
109
+ /**
110
+ * Detect typed list prefix in text.
111
+ * Returns the matched prefix and format type.
112
+ *
113
+ * Special handling for abbreviations:
114
+ * - Single letter prefixes (A., B., P.) are NOT treated as list markers
115
+ * if the remaining text also starts with a letter+period pattern,
116
+ * indicating an abbreviation like "P.O. Box", "U.S. Army", etc.
117
+ */
118
+ export function detectTypedPrefix(text: string): {
119
+ prefix: string | null;
120
+ format: NumberFormat | BulletFormat | null;
121
+ category: ListCategory;
122
+ } {
123
+ for (const [format, regex] of Object.entries(TYPED_LIST_PATTERNS)) {
124
+ const match = text.match(regex);
125
+ if (match) {
126
+ // Special check for single-letter patterns (lowerLetter, upperLetter)
127
+ // to avoid false positives on abbreviations like "P.O. Box", "U.S.", "A.M."
128
+ if (format === "lowerLetter" || format === "upperLetter") {
129
+ const remaining = text.substring(match[0].length);
130
+ // If remaining text starts with another letter followed by period,
131
+ // this is likely an abbreviation, not a list marker
132
+ if (/^[A-Za-z]\./.test(remaining)) {
133
+ continue; // Skip this pattern, try others
134
+ }
135
+ }
136
+
137
+ return {
138
+ prefix: match[0],
139
+ format: format as NumberFormat | BulletFormat,
140
+ category: PATTERN_TO_CATEGORY[format] ?? "none",
141
+ };
142
+ }
143
+ }
144
+
145
+ return { prefix: null, format: null, category: "none" };
146
+ }
147
+
148
+ /**
149
+ * Get the left indentation from a paragraph in twips.
150
+ */
151
+ export function getParagraphIndentation(paragraph: Paragraph): number {
152
+ const formatting = paragraph.getFormatting();
153
+ return formatting?.indentation?.left ?? 0;
154
+ }
155
+
156
+ /**
157
+ * Main detection function: analyze a single paragraph for list properties.
158
+ */
159
+ export function detectListType(paragraph: Paragraph): ListDetectionResult {
160
+ const text = paragraph.getText();
161
+ const indentation = getParagraphIndentation(paragraph);
162
+ const numbering = paragraph.getNumbering();
163
+
164
+ // Priority 1: Real Word list with <w:numPr>
165
+ if (numbering && numbering.numId !== undefined && numbering.numId !== 0) {
166
+ return {
167
+ category: "numbered", // Default, caller can refine with NumberingManager lookup
168
+ isWordList: true,
169
+ typedPrefix: null,
170
+ inferredLevel: numbering.level ?? 0,
171
+ format: null, // Would need numbering.xml lookup
172
+ numId: numbering.numId,
173
+ ilvl: numbering.level ?? 0,
174
+ indentationTwips: indentation,
175
+ };
176
+ }
177
+
178
+ // Priority 2: Typed prefix detection
179
+ const typed = detectTypedPrefix(text);
180
+ if (typed.prefix) {
181
+ return {
182
+ category: typed.category,
183
+ isWordList: false,
184
+ typedPrefix: typed.prefix,
185
+ // Use FORMAT to determine level, not indentation!
186
+ // decimal=0, lowerLetter=1, lowerRoman=2
187
+ inferredLevel: getLevelFromFormat(typed.format),
188
+ format: typed.format,
189
+ numId: null,
190
+ ilvl: null,
191
+ indentationTwips: indentation,
192
+ };
193
+ }
194
+
195
+ // Priority 3: Not a list
196
+ return {
197
+ category: "none",
198
+ isWordList: false,
199
+ typedPrefix: null,
200
+ inferredLevel: 0,
201
+ format: null,
202
+ numId: null,
203
+ ilvl: null,
204
+ indentationTwips: indentation,
205
+ };
206
+ }
207
+
208
+ /**
209
+ * Validate that a typed prefix sequence is reasonable.
210
+ * E.g., "1. 2. 3." is valid, "1. 5. 2." is suspicious.
211
+ */
212
+ export function validateListSequence(
213
+ paragraphs: Array<{ detection: ListDetectionResult; text: string }>
214
+ ): { valid: boolean; warnings: string[] } {
215
+ const warnings: string[] = [];
216
+ let lastDecimal = 0;
217
+ let lastLetter = "";
218
+
219
+ for (const { detection } of paragraphs) {
220
+ if (!detection.typedPrefix || detection.category !== "numbered") continue;
221
+
222
+ const match = detection.typedPrefix.match(/^(\d+|[a-zA-Z]+)/);
223
+ if (!match || !match[1]) continue;
224
+
225
+ const marker = match[1];
226
+
227
+ // Check decimal sequence
228
+ if (/^\d+$/.test(marker)) {
229
+ const num = parseInt(marker, 10);
230
+ if (lastDecimal > 0 && num !== lastDecimal + 1 && num !== 1) {
231
+ warnings.push(`Unexpected number sequence: ${lastDecimal} → ${num}`);
232
+ }
233
+ lastDecimal = num;
234
+ }
235
+
236
+ // Check letter sequence
237
+ if (/^[a-z]$/i.test(marker)) {
238
+ const letter = marker.toLowerCase();
239
+ if (
240
+ lastLetter &&
241
+ letter.charCodeAt(0) !== lastLetter.charCodeAt(0) + 1 &&
242
+ letter !== "a"
243
+ ) {
244
+ warnings.push(`Unexpected letter sequence: ${lastLetter} → ${letter}`);
245
+ }
246
+ lastLetter = letter;
247
+ }
248
+ }
249
+
250
+ return { valid: warnings.length === 0, warnings };
251
+ }
252
+
253
+ /**
254
+ * Determine the list category for a given numId by checking the abstractNum.
255
+ * This requires access to the NumberingManager.
256
+ */
257
+ export function getListCategoryFromFormat(
258
+ format: string | undefined
259
+ ): ListCategory {
260
+ if (!format) return "none";
261
+
262
+ if (["bullet", "dash", "arrow"].includes(format)) {
263
+ return "bullet";
264
+ }
265
+
266
+ if (
267
+ ["decimal", "lowerLetter", "upperLetter", "lowerRoman", "upperRoman"].includes(
268
+ format
269
+ )
270
+ ) {
271
+ return "numbered";
272
+ }
273
+
274
+ return "none";
275
+ }
@@ -0,0 +1,162 @@
1
+ /**
2
+ * List Normalization Types
3
+ *
4
+ * Moved from docxmlater to dochub-app for processing-level customization.
5
+ * These types support detection and normalization of typed list prefixes
6
+ * to proper Word list formatting.
7
+ */
8
+
9
+ /** High-level list category */
10
+ export type ListCategory = "numbered" | "bullet" | "none";
11
+
12
+ /** Specific number format patterns */
13
+ export type NumberFormat =
14
+ | "decimal" // 1. 2. 3.
15
+ | "lowerLetter" // a. b. c.
16
+ | "upperLetter" // A. B. C.
17
+ | "lowerRoman" // i. ii. iii.
18
+ | "upperRoman"; // I. II. III.
19
+
20
+ /** Specific bullet format patterns */
21
+ export type BulletFormat =
22
+ | "bullet" // • ● ○
23
+ | "dash" // - – —
24
+ | "arrow"; // ► ▸
25
+
26
+ /**
27
+ * Result of detecting list type for a single paragraph
28
+ */
29
+ export interface ListDetectionResult {
30
+ /** High-level category */
31
+ category: ListCategory;
32
+
33
+ /** True if paragraph has <w:numPr> (real Word list) */
34
+ isWordList: boolean;
35
+
36
+ /** The typed prefix found, e.g., "1. ", "a) ", "• " */
37
+ typedPrefix: string | null;
38
+
39
+ /** Inferred nesting level based on indentation */
40
+ inferredLevel: number;
41
+
42
+ /** Specific format detected */
43
+ format: NumberFormat | BulletFormat | null;
44
+
45
+ /** If Word list, the numId from <w:numPr> */
46
+ numId: number | null;
47
+
48
+ /** If Word list, the ilvl from <w:numPr> */
49
+ ilvl: number | null;
50
+
51
+ /** Raw indentation in twips for debugging */
52
+ indentationTwips: number;
53
+ }
54
+
55
+ /**
56
+ * Analysis of all lists within a scope (cell, table, document)
57
+ */
58
+ export interface ListAnalysis {
59
+ /** All analyzed paragraphs with their detection results */
60
+ paragraphs: Array<{
61
+ paragraph: unknown; // Paragraph instance - using unknown to avoid circular dep
62
+ text: string;
63
+ detection: ListDetectionResult;
64
+ }>;
65
+
66
+ /** True if any typed (non-Word) lists found */
67
+ hasTypedLists: boolean;
68
+
69
+ /** True if any real Word lists found */
70
+ hasWordLists: boolean;
71
+
72
+ /** True if both numbered AND bullet lists present */
73
+ hasMixedCategories: boolean;
74
+
75
+ /** The dominant list type in this scope */
76
+ majorityCategory: ListCategory;
77
+
78
+ /** Count by category */
79
+ counts: {
80
+ numbered: number;
81
+ bullet: number;
82
+ none: number;
83
+ };
84
+
85
+ /** Recommended action */
86
+ recommendedAction: "normalize" | "none";
87
+ }
88
+
89
+ /**
90
+ * User-defined indentation level configuration for list normalization
91
+ */
92
+ export interface IndentationLevel {
93
+ level: number;
94
+ textIndent: number; // in inches
95
+ symbolIndent: number; // in inches
96
+ bulletChar?: string;
97
+ numberedFormat?: NumberFormat | string;
98
+ }
99
+
100
+ /**
101
+ * Options for list normalization
102
+ */
103
+ export interface ListNormalizationOptions {
104
+ /** numId to use for numbered lists (from numbering.xml) */
105
+ numberedStyleNumId?: number;
106
+
107
+ /** numId to use for bullet lists (from numbering.xml) */
108
+ bulletStyleNumId?: number;
109
+
110
+ /** Processing scope */
111
+ scope?: "cell" | "table" | "document";
112
+
113
+ /**
114
+ * If true, convert ALL list items to majority type.
115
+ * If false, only convert typed lists (preserve existing Word lists).
116
+ */
117
+ forceMajority?: boolean;
118
+
119
+ /**
120
+ * If true, preserve original indentation instead of using level defaults.
121
+ */
122
+ preserveIndentation?: boolean;
123
+
124
+ /**
125
+ * User-defined indentation settings per list level.
126
+ * Used to configure symbol indent, text indent, and bullet characters.
127
+ */
128
+ indentationLevels?: IndentationLevel[];
129
+
130
+ /**
131
+ * Extra twips to add to hanging indent for documents with 10+ numbered items.
132
+ * When a numbered list has double-digit items (10+), the wider number text
133
+ * needs more space. This value is added to textIndent (leftIndent) before
134
+ * computing hangingIndent, widening the gap between number and text.
135
+ * Typically 72 twips (0.05 inches) to go from 0.25" to 0.30" hanging indent.
136
+ */
137
+ extraHangingIndentTwips?: number;
138
+ }
139
+
140
+ /**
141
+ * Report returned after normalization
142
+ */
143
+ export interface ListNormalizationReport {
144
+ /** Number of paragraphs successfully normalized */
145
+ normalized: number;
146
+
147
+ /** Number of paragraphs skipped (already correct or non-list) */
148
+ skipped: number;
149
+
150
+ /** Any errors encountered */
151
+ errors: string[];
152
+
153
+ /** The majority category that was applied */
154
+ appliedCategory: ListCategory;
155
+
156
+ /** Detailed per-paragraph results */
157
+ details: Array<{
158
+ originalText: string;
159
+ action: "normalized" | "skipped" | "error";
160
+ reason?: string;
161
+ }>;
162
+ }