documentation-hub 5.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +43 -0
- package/.github/workflows/build.yml +64 -0
- package/.github/workflows/ci.yml +39 -0
- package/.vscode/extensions.json +3 -0
- package/Current.md +97 -0
- package/DocHub_Image.png +0 -0
- package/README.md +666 -0
- package/USER_GUIDE.md +1173 -0
- package/Updater.md +311 -0
- package/build/256x256.png +0 -0
- package/build/512x512.png +0 -0
- package/build/app-update.yml +4 -0
- package/build/create-icon.js +208 -0
- package/build/icon.ico +0 -0
- package/build/icon.png +0 -0
- package/build/icon_1024x1024.png +0 -0
- package/dist/assets/Analytics-BpsG9895.js +1 -0
- package/dist/assets/Card-IAZin8kp.js +1 -0
- package/dist/assets/CurrentSession-B-rFkHvf.js +12 -0
- package/dist/assets/Dashboard-C_5gMb0q.js +1 -0
- package/dist/assets/Documents-CqZ25axS.js +1 -0
- package/dist/assets/Input-l89xwXBi.js +1 -0
- package/dist/assets/Reporting-DqdHJY_a.js +1 -0
- package/dist/assets/Search-XNbu5z_3.js +1 -0
- package/dist/assets/SessionManager-lH9hZfzH.js +1 -0
- package/dist/assets/Sessions-ClZOPYNc.js +1 -0
- package/dist/assets/Settings-DUEHGURa.js +11 -0
- package/dist/assets/index-8xUe8ptc.js +24 -0
- package/dist/assets/index-RYyJqF7O.css +1 -0
- package/dist/assets/path-BkOl0AGO.js +1 -0
- package/dist/assets/promises-ID_B9S-h.js +1 -0
- package/dist/assets/urlHelpers-TvgahX0r.js +1 -0
- package/dist/assets/useToast-yRSO1dkm.js +1 -0
- package/dist/assets/vendor-charts-RkGK5ROP.js +36 -0
- package/dist/assets/vendor-db-l0sNRNKZ.js +1 -0
- package/dist/assets/vendor-react-BVZ_anCF.js +4 -0
- package/dist/assets/vendor-search-Dw8P0qyA.js +1 -0
- package/dist/assets/vendor-ui-BU7NfluV.js +53 -0
- package/dist/electron/PowerAutomateApiService-LfW09ZGr.js +147 -0
- package/dist/electron/main-CXkNtyv-.js +19789 -0
- package/dist/electron/main.js +5 -0
- package/dist/electron/preload.js +1 -0
- package/dist/icon.png +0 -0
- package/dist/index.html +27 -0
- package/docs/CODEBASE_ANALYSIS_REPORT.md +309 -0
- package/docs/DEBUG_LOGGING_GUIDE.md +244 -0
- package/docs/README.md +115 -0
- package/docs/TOC_WIRING_GUIDE.md +344 -0
- package/docs/analysis/Bullet_Symbol_Bug_Analysis.md +136 -0
- package/docs/analysis/DOCXMLATER_ANALYSIS_SUMMARY.txt +169 -0
- package/docs/analysis/Document_Processing_Issues_Analysis.md +704 -0
- package/docs/analysis/FIELD_PRESERVATION_ANALYSIS.md +1200 -0
- package/docs/analysis/INDENTATION_PRESERVE_ANALYSIS.md +181 -0
- package/docs/analysis/INDENTATION_PRESERVE_IMPLEMENTATION.md +207 -0
- package/docs/analysis/List_Implementation.md +206 -0
- package/docs/analysis/List_Implementation_Accuracy_Report.md +366 -0
- package/docs/analysis/PROCESSING_OPTIONS_UI_UPDATES.md +220 -0
- package/docs/analysis/RefactorStyles.md +852 -0
- package/docs/analysis/STYLE_PARAMETER_ENHANCEMENT.md +143 -0
- package/docs/analysis/docxmlater-comparison-todo-2025-11-13.md +636 -0
- package/docs/analysis/docxmlater-implementation-analysis-2025-11-13.md +340 -0
- package/docs/analysis/docxmlater-template_ui-integration-analysis.md +263 -0
- package/docs/analysis/github-issues-to-create.md +237 -0
- package/docs/api/API_README.md +538 -0
- package/docs/api/API_REFERENCE.md +751 -0
- package/docs/api/TYPE_DEFINITIONS.md +869 -0
- package/docs/architecture/FONT_EMBEDDING_GUIDE.md +318 -0
- package/docs/architecture/docxmlater-functions-and-structure.md +726 -0
- package/docs/docxmlater-readme.md +1341 -0
- package/docs/fixes/EXECUTION_LOG_TEST_BASE.md +573 -0
- package/docs/fixes/HYPERLINK_TEXT_SANITIZATION.md +253 -0
- package/docs/fixes/README.md +37 -0
- package/docs/github-issues/issue-1-body.md +125 -0
- package/docs/github-issues/issue-10-body.md +850 -0
- package/docs/github-issues/issue-2-body.md +200 -0
- package/docs/github-issues/issue-3-body.md +270 -0
- package/docs/github-issues/issue-4-body.md +169 -0
- package/docs/github-issues/issue-5-body.md +173 -0
- package/docs/github-issues/issue-6-body.md +158 -0
- package/docs/github-issues/issue-7-body.md +171 -0
- package/docs/github-issues/issue-8-body.md +407 -0
- package/docs/github-issues/issue-9-body.md +515 -0
- package/docs/github-issues/issue-tracker.md +274 -0
- package/docs/github-issues/predictive-analysis-2025-10-18.md +2131 -0
- package/docs/implementation/List_Framework_Refactor_Plan.md +336 -0
- package/docs/implementation/PRIMARY_TEXT_COLOR_FEATURE.md +217 -0
- package/docs/implementation/RELEASE_PLAN_v2.1.0.md +362 -0
- package/docs/implementation/RefactorStyles.md +588 -0
- package/docs/implementation/implement-plan.md +489 -0
- package/docs/implementation/missing-helpers-implementation.md +391 -0
- package/docs/implementation/refactor-plan.md +520 -0
- package/docs/implementation/session-implementation-complete.md +233 -0
- package/docs/implementation/session-management-plan.md +250 -0
- package/docs/setup-checklist.md +77 -0
- package/docs/versions/changelog.md +345 -0
- package/electron/customUpdater.ts +656 -0
- package/electron/main.ts +2441 -0
- package/electron/memoryConfig.ts +187 -0
- package/electron/preload.ts +394 -0
- package/electron/proxyConfig.ts +340 -0
- package/electron/services/BackupService.ts +452 -0
- package/electron/services/DictionaryService.ts +402 -0
- package/electron/services/LocalDictionaryLookupService.ts +147 -0
- package/electron/services/PowerAutomateApiService.ts +231 -0
- package/electron/services/SharePointSyncService.ts +474 -0
- package/electron/windowsCertStore.ts +427 -0
- package/electron/zscalerConfig.ts +381 -0
- package/eslint.config.js +92 -0
- package/jest.config.js +52 -0
- package/package.json +214 -0
- package/postcss.config.mjs +6 -0
- package/public/icon.png +0 -0
- package/publish-release.ps1 +5 -0
- package/renovate.json +30 -0
- package/src/App.tsx +216 -0
- package/src/__mocks__/p-limit.js +12 -0
- package/src/__mocks__/styleMock.js +1 -0
- package/src/components/common/BugReportButton.tsx +44 -0
- package/src/components/common/BugReportDialog.tsx +193 -0
- package/src/components/common/Button.tsx +153 -0
- package/src/components/common/Card.tsx +86 -0
- package/src/components/common/ColorPickerDialog.tsx +177 -0
- package/src/components/common/ConfirmDialog.tsx +96 -0
- package/src/components/common/DebugConsole.tsx +275 -0
- package/src/components/common/EmptyState.tsx +183 -0
- package/src/components/common/ErrorBoundary.tsx +98 -0
- package/src/components/common/ErrorDetailsDialog.tsx +153 -0
- package/src/components/common/ErrorFallback.tsx +218 -0
- package/src/components/common/Input.tsx +109 -0
- package/src/components/common/Skeleton.tsx +184 -0
- package/src/components/common/SplashScreen.tsx +81 -0
- package/src/components/common/Toast.tsx +155 -0
- package/src/components/common/Tooltip.tsx +79 -0
- package/src/components/common/UpdateNotification.tsx +320 -0
- package/src/components/comparison/ComparisonWindow.tsx +374 -0
- package/src/components/comparison/SideBySideDiff.tsx +486 -0
- package/src/components/comparison/index.ts +8 -0
- package/src/components/document/DocumentUploader.tsx +288 -0
- package/src/components/document/HyperlinkPreview.tsx +430 -0
- package/src/components/document/HyperlinkService.md +1484 -0
- package/src/components/document/Hyperlink_Technical_Documentation.md +496 -0
- package/src/components/document/InlineChangesView.tsx +707 -0
- package/src/components/document/ProcessingProgress.tsx +303 -0
- package/src/components/document/ProcessingResults.tsx +256 -0
- package/src/components/document/TrackedChangesDetail.tsx +530 -0
- package/src/components/document/TrackedChangesPanel.tsx +546 -0
- package/src/components/document/VirtualDocumentList.tsx +240 -0
- package/src/components/editor/DocumentEditor.tsx +723 -0
- package/src/components/editor/DocumentEditorModal.tsx +640 -0
- package/src/components/editor/EditorQuickActions.tsx +502 -0
- package/src/components/editor/EditorToolbar.tsx +312 -0
- package/src/components/editor/TableEditor.tsx +926 -0
- package/src/components/editor/index.ts +18 -0
- package/src/components/layout/Header.tsx +190 -0
- package/src/components/layout/Sidebar.tsx +313 -0
- package/src/components/layout/TitleBar.tsx +190 -0
- package/src/components/navigation/CommandPalette.tsx +233 -0
- package/src/components/navigation/KeyboardShortcutsModal.tsx +173 -0
- package/src/components/sessions/ChangeItem.tsx +408 -0
- package/src/components/sessions/ChangeViewer.tsx +1155 -0
- package/src/components/sessions/DocumentComparisonModal.tsx +314 -0
- package/src/components/sessions/ProcessingOptions.tsx +297 -0
- package/src/components/sessions/ReplacementsTab.tsx +438 -0
- package/src/components/sessions/RevisionHandlingOptions.tsx +87 -0
- package/src/components/sessions/SessionManager.tsx +188 -0
- package/src/components/sessions/StylesEditor.tsx +1335 -0
- package/src/components/sessions/TabContainer.tsx +151 -0
- package/src/components/sessions/VirtualSessionList.tsx +157 -0
- package/src/components/sessions/sessionToProcessorManager.tsx +420 -0
- package/src/components/settings/CertificateManager.tsx +410 -0
- package/src/components/settings/SegmentedControl.tsx +88 -0
- package/src/components/settings/SettingRow.tsx +52 -0
- package/src/contexts/GlobalStatsContext.tsx +396 -0
- package/src/contexts/SessionContext.tsx +2129 -0
- package/src/contexts/ThemeContext.tsx +428 -0
- package/src/contexts/UserSettingsContext.tsx +290 -0
- package/src/contexts/__tests__/GlobalStatsContext.test.tsx +390 -0
- package/src/global.d.ts +273 -0
- package/src/hooks/useDocumentQueue.tsx +210 -0
- package/src/hooks/useToast.tsx +55 -0
- package/src/main.tsx +10 -0
- package/src/pages/Analytics.tsx +386 -0
- package/src/pages/CurrentSession.tsx +1174 -0
- package/src/pages/Dashboard.tsx +319 -0
- package/src/pages/Documents.tsx +317 -0
- package/src/pages/Projects.tsx +250 -0
- package/src/pages/Reporting.tsx +386 -0
- package/src/pages/Search.tsx +349 -0
- package/src/pages/Sessions.tsx +285 -0
- package/src/pages/Settings.tsx +2662 -0
- package/src/services/HyperlinkService.ts +1085 -0
- package/src/services/document/DocXMLaterProcessor.ts +617 -0
- package/src/services/document/DocumentProcessingComparison.ts +856 -0
- package/src/services/document/DocumentSnapshotService.ts +575 -0
- package/src/services/document/WordDocumentProcessor.ts +10509 -0
- package/src/services/document/__tests__/DocXMLaterProcessor.hyperlinks.test.md +311 -0
- package/src/services/document/__tests__/WordDocumentProcessor.integration.test.ts +515 -0
- package/src/services/document/__tests__/WordDocumentProcessor.test.ts +812 -0
- package/src/services/document/blanklines/BlankLineManager.ts +658 -0
- package/src/services/document/blanklines/__tests__/paragraphChecks.test.ts +281 -0
- package/src/services/document/blanklines/helpers/blankLineInsertion.ts +87 -0
- package/src/services/document/blanklines/helpers/blankLineSnapshot.ts +251 -0
- package/src/services/document/blanklines/helpers/clearCustom.ts +121 -0
- package/src/services/document/blanklines/helpers/contextChecks.ts +117 -0
- package/src/services/document/blanklines/helpers/imageChecks.ts +51 -0
- package/src/services/document/blanklines/helpers/paragraphChecks.ts +236 -0
- package/src/services/document/blanklines/helpers/removeBlanksBetweenListItems.ts +91 -0
- package/src/services/document/blanklines/helpers/removeTrailingBlanks.ts +35 -0
- package/src/services/document/blanklines/helpers/tableGuards.ts +21 -0
- package/src/services/document/blanklines/index.ts +67 -0
- package/src/services/document/blanklines/rules/additionRules.ts +337 -0
- package/src/services/document/blanklines/rules/indentationRules.ts +317 -0
- package/src/services/document/blanklines/rules/removalRules.ts +362 -0
- package/src/services/document/blanklines/rules/ruleTypes.ts +92 -0
- package/src/services/document/blanklines/types.ts +29 -0
- package/src/services/document/helpers/ImageBorderCropper.ts +377 -0
- package/src/services/document/helpers/__tests__/whitespace.test.ts +272 -0
- package/src/services/document/helpers/whitespace.ts +117 -0
- package/src/services/document/list/ListNormalizer.ts +947 -0
- package/src/services/document/list/index.ts +45 -0
- package/src/services/document/list/list-detection.ts +275 -0
- package/src/services/document/list/list-types.ts +162 -0
- package/src/services/document/processors/HyperlinkProcessor.ts +370 -0
- package/src/services/document/processors/ListProcessor.ts +257 -0
- package/src/services/document/processors/StructureProcessor.ts +176 -0
- package/src/services/document/processors/StyleProcessor.ts +389 -0
- package/src/services/document/processors/TableProcessor.ts +2238 -0
- package/src/services/document/processors/__tests__/HyperlinkProcessor.test.ts +314 -0
- package/src/services/document/processors/__tests__/ListProcessor.test.ts +291 -0
- package/src/services/document/processors/__tests__/StructureProcessor.test.ts +257 -0
- package/src/services/document/processors/__tests__/TableProcessor.hlp-tips-bullets.test.ts +459 -0
- package/src/services/document/processors/__tests__/TableProcessor.test.ts +1604 -0
- package/src/services/document/processors/index.ts +28 -0
- package/src/services/document/types/docx-processing.ts +310 -0
- package/src/services/editor/EditorActionHandlers.ts +901 -0
- package/src/services/editor/index.ts +13 -0
- package/src/setupTests.ts +47 -0
- package/src/styles/global.css +782 -0
- package/src/types/backup.ts +132 -0
- package/src/types/dictionary.ts +125 -0
- package/src/types/document-processing.ts +331 -0
- package/src/types/docxmlater-augments.d.ts +142 -0
- package/src/types/editor.ts +280 -0
- package/src/types/electron.ts +340 -0
- package/src/types/globalStats.ts +155 -0
- package/src/types/hyperlink.ts +471 -0
- package/src/types/operations.ts +354 -0
- package/src/types/session.ts +427 -0
- package/src/types/settings.ts +112 -0
- package/src/utils/MemoryMonitor.ts +248 -0
- package/src/utils/cn.ts +6 -0
- package/src/utils/colorConvert.ts +306 -0
- package/src/utils/diffUtils.ts +347 -0
- package/src/utils/documentUtils.ts +202 -0
- package/src/utils/electronGuard.ts +62 -0
- package/src/utils/indexedDB.ts +915 -0
- package/src/utils/logger.ts +717 -0
- package/src/utils/pathSecurity.ts +232 -0
- package/src/utils/pathValidator.ts +236 -0
- package/src/utils/processingTimeEstimator.ts +153 -0
- package/src/utils/safeJsonParse.ts +62 -0
- package/src/utils/textSanitizer.ts +162 -0
- package/src/utils/urlHelpers.ts +304 -0
- package/src/utils/urlPatterns.ts +198 -0
- package/src/utils/urlSanitizer.ts +152 -0
- package/src/vite-env.d.ts +11 -0
- package/tsconfig.electron.json +19 -0
- package/tsconfig.json +36 -0
- package/tsconfig.node.json +12 -0
- package/typedoc.json +45 -0
- package/vite.config.ts +152 -0
|
@@ -0,0 +1,947 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ListNormalizer - Core list normalization for document processing
|
|
3
|
+
*
|
|
4
|
+
* Moved from docxmlater to dochub-app for processing-level customization.
|
|
5
|
+
* Detects typed list prefixes and converts them to proper Word list formatting.
|
|
6
|
+
* Integrates with NumberingManager for numId resolution.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type { Paragraph, Run, Table, TableCell, NumberingManager } from "docxmlater";
|
|
10
|
+
import { isRun, inchesToTwips } from "docxmlater";
|
|
11
|
+
import { logger } from "@/utils/logger";
|
|
12
|
+
import type {
|
|
13
|
+
ListCategory,
|
|
14
|
+
ListAnalysis,
|
|
15
|
+
ListNormalizationOptions,
|
|
16
|
+
ListNormalizationReport,
|
|
17
|
+
NumberFormat,
|
|
18
|
+
IndentationLevel,
|
|
19
|
+
} from "./list-types";
|
|
20
|
+
import {
|
|
21
|
+
detectListType,
|
|
22
|
+
getListCategoryFromFormat,
|
|
23
|
+
inferLevelFromRelativeIndentation,
|
|
24
|
+
} from "./list-detection";
|
|
25
|
+
|
|
26
|
+
// =============================================================================
|
|
27
|
+
// INDENTATION SETTINGS HELPERS
|
|
28
|
+
// =============================================================================
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Apply user's indentation settings to an abstract numbering definition.
|
|
32
|
+
*/
|
|
33
|
+
function applyIndentationSettings(
|
|
34
|
+
abstractNum: ReturnType<NumberingManager["getAbstractNumbering"]>,
|
|
35
|
+
indentationLevels: IndentationLevel[],
|
|
36
|
+
isBulletList: boolean,
|
|
37
|
+
extraHangingIndentTwips: number = 0
|
|
38
|
+
): void {
|
|
39
|
+
if (!abstractNum || !indentationLevels || indentationLevels.length === 0) return;
|
|
40
|
+
|
|
41
|
+
for (const levelConfig of indentationLevels) {
|
|
42
|
+
const level = abstractNum.getLevel(levelConfig.level);
|
|
43
|
+
if (level) {
|
|
44
|
+
const textIndentTwips = inchesToTwips(levelConfig.textIndent) + extraHangingIndentTwips;
|
|
45
|
+
const symbolIndentTwips = inchesToTwips(levelConfig.symbolIndent);
|
|
46
|
+
const hangingTwips = textIndentTwips - symbolIndentTwips;
|
|
47
|
+
|
|
48
|
+
level.setLeftIndent(textIndentTwips);
|
|
49
|
+
level.setHangingIndent(hangingTwips);
|
|
50
|
+
|
|
51
|
+
if (isBulletList && levelConfig.bulletChar) {
|
|
52
|
+
level.setText(levelConfig.bulletChar);
|
|
53
|
+
}
|
|
54
|
+
if (!isBulletList && levelConfig.numberedFormat) {
|
|
55
|
+
level.setFormat(levelConfig.numberedFormat as NumberFormat);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// =============================================================================
|
|
62
|
+
// ANALYSIS FUNCTIONS
|
|
63
|
+
// =============================================================================
|
|
64
|
+
|
|
65
|
+
/** Internal type for analyzed paragraph data */
|
|
66
|
+
interface AnalyzedParagraph {
|
|
67
|
+
paragraph: Paragraph;
|
|
68
|
+
text: string;
|
|
69
|
+
detection: ReturnType<typeof detectListType>;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Determine majority category using OVERALL counts.
|
|
74
|
+
* Counts ALL list items equally (Word lists + typed prefixes).
|
|
75
|
+
* NUMBERED wins ties (business document standard).
|
|
76
|
+
*/
|
|
77
|
+
function determineMajorityCategory(analyzed: AnalyzedParagraph[]): ListCategory {
|
|
78
|
+
let bulletCount = 0;
|
|
79
|
+
let numberedCount = 0;
|
|
80
|
+
|
|
81
|
+
for (const item of analyzed) {
|
|
82
|
+
// Count BOTH Word lists AND typed prefixes equally
|
|
83
|
+
if (item.detection.category === "bullet") {
|
|
84
|
+
bulletCount++;
|
|
85
|
+
} else if (item.detection.category === "numbered") {
|
|
86
|
+
numberedCount++;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// No list items at all
|
|
91
|
+
if (bulletCount === 0 && numberedCount === 0) return "none";
|
|
92
|
+
|
|
93
|
+
// NUMBERED wins ties (business document standard)
|
|
94
|
+
// Bullets only win if strictly more bullets than numbers
|
|
95
|
+
return numberedCount >= bulletCount ? "numbered" : "bullet";
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Analyze all paragraphs in a cell for list properties.
|
|
100
|
+
*/
|
|
101
|
+
export function analyzeCellLists(
|
|
102
|
+
cell: TableCell,
|
|
103
|
+
numberingManager?: NumberingManager
|
|
104
|
+
): ListAnalysis {
|
|
105
|
+
const paragraphs = cell.getParagraphs();
|
|
106
|
+
|
|
107
|
+
const analyzed: AnalyzedParagraph[] = paragraphs.map((p) => ({
|
|
108
|
+
paragraph: p,
|
|
109
|
+
text: p.getText(),
|
|
110
|
+
detection: detectListType(p),
|
|
111
|
+
}));
|
|
112
|
+
|
|
113
|
+
// Refine Word list categories using NumberingManager
|
|
114
|
+
// detectListType() defaults ALL Word lists to "numbered", but we need to
|
|
115
|
+
// look up the actual format to correctly identify bullets vs numbers
|
|
116
|
+
if (numberingManager) {
|
|
117
|
+
for (const item of analyzed) {
|
|
118
|
+
if (item.detection.isWordList && item.detection.numId !== null) {
|
|
119
|
+
// Look up the actual format from numbering.xml
|
|
120
|
+
const instance = numberingManager.getInstance(item.detection.numId);
|
|
121
|
+
if (instance) {
|
|
122
|
+
const abstractNum = numberingManager.getAbstractNumbering(
|
|
123
|
+
instance.getAbstractNumId()
|
|
124
|
+
);
|
|
125
|
+
if (abstractNum) {
|
|
126
|
+
const level = abstractNum.getLevel(item.detection.ilvl ?? 0);
|
|
127
|
+
if (level) {
|
|
128
|
+
const format = level.getFormat();
|
|
129
|
+
// Refine the category based on actual format
|
|
130
|
+
item.detection.category = getListCategoryFromFormat(format);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Count by category
|
|
139
|
+
const counts = { numbered: 0, bullet: 0, none: 0 };
|
|
140
|
+
let hasTypedLists = false;
|
|
141
|
+
let hasWordLists = false;
|
|
142
|
+
|
|
143
|
+
for (const item of analyzed) {
|
|
144
|
+
const cat = item.detection.category;
|
|
145
|
+
counts[cat]++;
|
|
146
|
+
|
|
147
|
+
if (!item.detection.isWordList && item.detection.typedPrefix) {
|
|
148
|
+
hasTypedLists = true;
|
|
149
|
+
}
|
|
150
|
+
if (item.detection.isWordList) {
|
|
151
|
+
hasWordLists = true;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Determine majority using OVERALL counts (Word + typed equally)
|
|
156
|
+
const majorityCategory = determineMajorityCategory(analyzed);
|
|
157
|
+
|
|
158
|
+
// Determine if normalization is needed:
|
|
159
|
+
// - Has typed prefixes that need converting, OR
|
|
160
|
+
// - Has mixed categories (bullets AND numbers) that need unifying
|
|
161
|
+
const hasMixedCategories = counts.numbered > 0 && counts.bullet > 0;
|
|
162
|
+
const needsNormalization = hasTypedLists || hasMixedCategories;
|
|
163
|
+
|
|
164
|
+
return {
|
|
165
|
+
paragraphs: analyzed,
|
|
166
|
+
hasTypedLists,
|
|
167
|
+
hasWordLists,
|
|
168
|
+
hasMixedCategories,
|
|
169
|
+
majorityCategory,
|
|
170
|
+
counts,
|
|
171
|
+
recommendedAction: needsNormalization ? "normalize" : "none",
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Analyze lists in an entire table.
|
|
177
|
+
* Returns analysis per cell.
|
|
178
|
+
*/
|
|
179
|
+
export function analyzeTableLists(
|
|
180
|
+
table: Table
|
|
181
|
+
): Map<TableCell, ListAnalysis> {
|
|
182
|
+
const results = new Map<TableCell, ListAnalysis>();
|
|
183
|
+
|
|
184
|
+
for (const row of table.getRows()) {
|
|
185
|
+
for (const cell of row.getCells()) {
|
|
186
|
+
results.set(cell, analyzeCellLists(cell));
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
return results;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// =============================================================================
|
|
194
|
+
// NORMALIZATION FUNCTIONS
|
|
195
|
+
// =============================================================================
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Strip typed prefix from paragraph text.
|
|
199
|
+
* Handles prefixes that may be split across multiple runs.
|
|
200
|
+
* Also trims leading whitespace from the remaining content.
|
|
201
|
+
*/
|
|
202
|
+
export function stripTypedPrefix(paragraph: Paragraph, prefix: string): void {
|
|
203
|
+
const content = paragraph.getContent();
|
|
204
|
+
let remainingPrefix = prefix;
|
|
205
|
+
let prefixFullyStripped = false;
|
|
206
|
+
// Track whether the prefix was stripped mid-run with content remaining.
|
|
207
|
+
// When true, the next run's leading space is an inter-word separator, not
|
|
208
|
+
// leftover prefix whitespace, so it must NOT be trimmed.
|
|
209
|
+
let strippedMidRunWithContent = false;
|
|
210
|
+
|
|
211
|
+
for (const item of content) {
|
|
212
|
+
if (isRun(item)) {
|
|
213
|
+
const run = item as Run;
|
|
214
|
+
const text = run.getText();
|
|
215
|
+
|
|
216
|
+
if (!prefixFullyStripped && remainingPrefix.length > 0) {
|
|
217
|
+
if (text.length <= remainingPrefix.length) {
|
|
218
|
+
// Entire run is part of prefix
|
|
219
|
+
if (remainingPrefix.startsWith(text)) {
|
|
220
|
+
remainingPrefix = remainingPrefix.substring(text.length);
|
|
221
|
+
run.setText(""); // Clear this run
|
|
222
|
+
if (remainingPrefix.length === 0) {
|
|
223
|
+
prefixFullyStripped = true;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
} else {
|
|
227
|
+
// Partial match - strip prefix portion
|
|
228
|
+
if (text.startsWith(remainingPrefix)) {
|
|
229
|
+
const afterPrefix = text.substring(remainingPrefix.length).trimStart();
|
|
230
|
+
run.setText(afterPrefix);
|
|
231
|
+
prefixFullyStripped = true;
|
|
232
|
+
// If there's actual content left in this run after stripping,
|
|
233
|
+
// the next run's leading space is an inter-word separator
|
|
234
|
+
if (afterPrefix.length > 0) {
|
|
235
|
+
strippedMidRunWithContent = true;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
} else if (prefixFullyStripped && !strippedMidRunWithContent) {
|
|
240
|
+
// Only trim the next run if the prefix was consumed at a run boundary
|
|
241
|
+
// (not mid-run with content remaining). Otherwise the leading space
|
|
242
|
+
// is an inter-word separator (e.g., " spreadsheets" after "All").
|
|
243
|
+
const currentText = run.getText();
|
|
244
|
+
if (currentText.length > 0) {
|
|
245
|
+
const trimmed = currentText.trimStart();
|
|
246
|
+
if (trimmed !== currentText) {
|
|
247
|
+
run.setText(trimmed);
|
|
248
|
+
}
|
|
249
|
+
// Only break if we found actual content (not just whitespace that got trimmed away)
|
|
250
|
+
if (trimmed.length > 0) {
|
|
251
|
+
break;
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
} else if (strippedMidRunWithContent) {
|
|
255
|
+
// Prefix was stripped mid-run and content remains — stop processing
|
|
256
|
+
break;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Normalize all lists in a cell to consistent formatting.
|
|
264
|
+
* KEY BEHAVIORS:
|
|
265
|
+
* - ONE list type per cell - no mixing bullets and numbers
|
|
266
|
+
* - Format determines level: decimal=0, letter=1, roman=2
|
|
267
|
+
* - Word lists that don't match majority are converted
|
|
268
|
+
* - Non-list items are NEVER touched
|
|
269
|
+
* - User indentation settings are applied when provided
|
|
270
|
+
*
|
|
271
|
+
* FIX: Standalone typed decimal lists (1., 2., 3.) are no longer
|
|
272
|
+
* incorrectly assigned to level 2 based on paragraph indentation.
|
|
273
|
+
* Indentation-based nesting only applies when format also suggests nesting.
|
|
274
|
+
*/
|
|
275
|
+
export function normalizeListsInCell(
|
|
276
|
+
cell: TableCell,
|
|
277
|
+
options: ListNormalizationOptions,
|
|
278
|
+
numberingManager: NumberingManager
|
|
279
|
+
): ListNormalizationReport {
|
|
280
|
+
const analysis = analyzeCellLists(cell, numberingManager);
|
|
281
|
+
const majorityCategory = analysis.majorityCategory;
|
|
282
|
+
const report: ListNormalizationReport = {
|
|
283
|
+
normalized: 0,
|
|
284
|
+
skipped: 0,
|
|
285
|
+
errors: [],
|
|
286
|
+
appliedCategory: majorityCategory,
|
|
287
|
+
details: [],
|
|
288
|
+
};
|
|
289
|
+
|
|
290
|
+
// Handle cells that don't need category normalization but may need indentation fixes
|
|
291
|
+
if (analysis.recommendedAction === "none") {
|
|
292
|
+
// Even if no normalization needed, still apply user indentation settings to Word lists.
|
|
293
|
+
// Modify existing abstract numbering definitions in-place rather than creating new
|
|
294
|
+
// numIds. This preserves original bullet characters, parent-child list semantics
|
|
295
|
+
// (shared numId across levels), and ilvl assignments.
|
|
296
|
+
if (options?.indentationLevels?.length && analysis.hasWordLists) {
|
|
297
|
+
const updatedAbstractNums = new Set<number>();
|
|
298
|
+
|
|
299
|
+
for (const item of analysis.paragraphs) {
|
|
300
|
+
if (item.detection.isWordList && item.detection.numId !== null) {
|
|
301
|
+
const para = item.paragraph as Paragraph;
|
|
302
|
+
const numbering = para.getNumbering();
|
|
303
|
+
if (numbering) {
|
|
304
|
+
const instance = numberingManager.getInstance(numbering.numId);
|
|
305
|
+
if (instance) {
|
|
306
|
+
const abstractNumId = instance.getAbstractNumId();
|
|
307
|
+
if (!updatedAbstractNums.has(abstractNumId)) {
|
|
308
|
+
const abstractNum = numberingManager.getAbstractNumbering(abstractNumId);
|
|
309
|
+
if (abstractNum) {
|
|
310
|
+
const isBullet = item.detection.category === "bullet";
|
|
311
|
+
applyIndentationSettings(abstractNum, options.indentationLevels!, isBullet, options.extraHangingIndentTwips ?? 0);
|
|
312
|
+
updatedAbstractNums.add(abstractNumId);
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
// Do NOT change the paragraph's numId or ilvl — preserve original structure
|
|
317
|
+
report.normalized++;
|
|
318
|
+
report.details.push({
|
|
319
|
+
originalText: item.text.substring(0, 50),
|
|
320
|
+
action: "normalized",
|
|
321
|
+
reason: `Applied indentation settings in-place at level ${numbering.level}`,
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
normalizeOrphanListLevelsInCell(cell);
|
|
327
|
+
return report;
|
|
328
|
+
}
|
|
329
|
+
report.skipped = analysis.paragraphs.length;
|
|
330
|
+
// Always normalize orphan levels even when no other normalization needed
|
|
331
|
+
normalizeOrphanListLevelsInCell(cell);
|
|
332
|
+
return report;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Calculate baseline (minimum) indentation for relative level inference
|
|
336
|
+
let baselineIndent = Infinity;
|
|
337
|
+
for (const item of analysis.paragraphs) {
|
|
338
|
+
if (item.detection.category !== "none") {
|
|
339
|
+
baselineIndent = Math.min(baselineIndent, item.detection.indentationTwips);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
if (baselineIndent === Infinity) baselineIndent = 0;
|
|
343
|
+
|
|
344
|
+
// Calculate level shifts PER LIST GROUP based on ALL list items (majority + minority).
|
|
345
|
+
// Including minority items prevents shifting when low-level minority items exist
|
|
346
|
+
// (e.g., numbered items at ilvl=0 among bullet sub-items at ilvl=1+).
|
|
347
|
+
// A "list group" is a contiguous sequence of list items separated by non-list items.
|
|
348
|
+
const levelShiftByIndex = new Map<number, number>();
|
|
349
|
+
let currentGroupStart = -1;
|
|
350
|
+
let currentGroupMinLevel = Infinity;
|
|
351
|
+
|
|
352
|
+
for (let i = 0; i < analysis.paragraphs.length; i++) {
|
|
353
|
+
const item = analysis.paragraphs[i]!;
|
|
354
|
+
|
|
355
|
+
// Consider ALL list items for level shift calculation
|
|
356
|
+
if (item.detection.category !== "none") {
|
|
357
|
+
if (currentGroupStart === -1) {
|
|
358
|
+
currentGroupStart = i; // Start new group
|
|
359
|
+
currentGroupMinLevel = Infinity;
|
|
360
|
+
}
|
|
361
|
+
// Track minimum level in current group across all categories
|
|
362
|
+
currentGroupMinLevel = Math.min(currentGroupMinLevel, item.detection.inferredLevel);
|
|
363
|
+
} else {
|
|
364
|
+
// Non-list item - end current group if any
|
|
365
|
+
if (currentGroupStart !== -1) {
|
|
366
|
+
// Apply the group's level shift to ALL non-"none" items in the group
|
|
367
|
+
const shift = currentGroupMinLevel === Infinity ? 0 : currentGroupMinLevel;
|
|
368
|
+
for (let j = currentGroupStart; j < i; j++) {
|
|
369
|
+
if (analysis.paragraphs[j]!.detection.category !== "none") {
|
|
370
|
+
levelShiftByIndex.set(j, shift);
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
currentGroupStart = -1;
|
|
374
|
+
currentGroupMinLevel = Infinity;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// Handle last group if cell ends with list items
|
|
380
|
+
if (currentGroupStart !== -1) {
|
|
381
|
+
const shift = currentGroupMinLevel === Infinity ? 0 : currentGroupMinLevel;
|
|
382
|
+
for (let j = currentGroupStart; j < analysis.paragraphs.length; j++) {
|
|
383
|
+
if (analysis.paragraphs[j]!.detection.category !== "none") {
|
|
384
|
+
levelShiftByIndex.set(j, shift);
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
// === Context-aware sub-item detection ===
|
|
390
|
+
// Track which items should be treated as sub-items and their parent indices
|
|
391
|
+
const bulletAsSubItemIndices = new Set<number>();
|
|
392
|
+
const numberedAsSubItemIndices = new Set<number>();
|
|
393
|
+
const parentIndexByIndex = new Map<number, number>();
|
|
394
|
+
|
|
395
|
+
// Helper to calculate normalized level for an item (used for parent level lookup)
|
|
396
|
+
const getNormalizedLevel = (itemIndex: number): number => {
|
|
397
|
+
const item = analysis.paragraphs[itemIndex]!;
|
|
398
|
+
const detection = item.detection;
|
|
399
|
+
const hasTypedPrefix = !!detection.typedPrefix;
|
|
400
|
+
const levelShift = levelShiftByIndex.get(itemIndex) ?? 0;
|
|
401
|
+
|
|
402
|
+
if (hasTypedPrefix) {
|
|
403
|
+
const relativeIndent = detection.indentationTwips - baselineIndent;
|
|
404
|
+
const rawLevel = inferLevelFromRelativeIndentation(relativeIndent);
|
|
405
|
+
// Apply levelShift consistently for typed prefixes too
|
|
406
|
+
return Math.max(0, rawLevel - levelShift);
|
|
407
|
+
} else {
|
|
408
|
+
return Math.max(0, detection.inferredLevel - levelShift);
|
|
409
|
+
}
|
|
410
|
+
};
|
|
411
|
+
|
|
412
|
+
// Minimum indentation difference (in twips) to consider an item a sub-item
|
|
413
|
+
// 200 twips ~ 0.14 inches - small enough to catch real sub-items but avoid false positives
|
|
414
|
+
const INDENT_THRESHOLD = 200;
|
|
415
|
+
|
|
416
|
+
if (majorityCategory === "numbered") {
|
|
417
|
+
let lastNumberedItemIndex = -1;
|
|
418
|
+
let lastSubItemIndex = -1;
|
|
419
|
+
|
|
420
|
+
for (let i = 0; i < analysis.paragraphs.length; i++) {
|
|
421
|
+
const item = analysis.paragraphs[i]!;
|
|
422
|
+
const detection = item.detection;
|
|
423
|
+
|
|
424
|
+
if (detection.category === "numbered") {
|
|
425
|
+
lastNumberedItemIndex = i;
|
|
426
|
+
lastSubItemIndex = -1;
|
|
427
|
+
} else if (detection.category === "bullet" && lastNumberedItemIndex >= 0) {
|
|
428
|
+
// Only mark as sub-item if actually indented MORE than the parent
|
|
429
|
+
// This prevents level-0 bullets from being wrongly demoted to level-1
|
|
430
|
+
const parentDetection = analysis.paragraphs[lastNumberedItemIndex]!.detection;
|
|
431
|
+
if (detection.indentationTwips > parentDetection.indentationTwips + INDENT_THRESHOLD) {
|
|
432
|
+
bulletAsSubItemIndices.add(i);
|
|
433
|
+
parentIndexByIndex.set(i, lastNumberedItemIndex);
|
|
434
|
+
lastSubItemIndex = i;
|
|
435
|
+
} else if (
|
|
436
|
+
// 0-indentation fallback for table cells: when both parent and child have
|
|
437
|
+
// 0 indentation (common in table cells), use proximity to the last numbered
|
|
438
|
+
// or sub-item to infer sub-item status. Only apply when the bullet is at
|
|
439
|
+
// inferred level 0 (to avoid demoting items already at higher ilvl).
|
|
440
|
+
detection.indentationTwips === 0 &&
|
|
441
|
+
parentDetection.indentationTwips === 0 &&
|
|
442
|
+
detection.inferredLevel === 0 &&
|
|
443
|
+
(i - Math.max(lastNumberedItemIndex, lastSubItemIndex === -1 ? 0 : lastSubItemIndex)) <= 3
|
|
444
|
+
) {
|
|
445
|
+
bulletAsSubItemIndices.add(i);
|
|
446
|
+
parentIndexByIndex.set(i, lastNumberedItemIndex);
|
|
447
|
+
lastSubItemIndex = i;
|
|
448
|
+
}
|
|
449
|
+
} else if (detection.category === "none") {
|
|
450
|
+
// Only reset on text-bearing "none" items — blank paragraphs (spacers)
|
|
451
|
+
// between list items should not break the parent-child chain
|
|
452
|
+
if (item.text.trim().length > 0) {
|
|
453
|
+
lastNumberedItemIndex = -1;
|
|
454
|
+
lastSubItemIndex = -1;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
if (majorityCategory === "bullet") {
|
|
461
|
+
let lastBulletItemIndex = -1;
|
|
462
|
+
|
|
463
|
+
for (let i = 0; i < analysis.paragraphs.length; i++) {
|
|
464
|
+
const item = analysis.paragraphs[i]!;
|
|
465
|
+
const detection = item.detection;
|
|
466
|
+
|
|
467
|
+
if (detection.category === "bullet") {
|
|
468
|
+
lastBulletItemIndex = i;
|
|
469
|
+
} else if (detection.category === "numbered" && lastBulletItemIndex >= 0) {
|
|
470
|
+
// Only mark as sub-item if actually indented MORE than the parent
|
|
471
|
+
const parentDetection = analysis.paragraphs[lastBulletItemIndex]!.detection;
|
|
472
|
+
if (detection.indentationTwips > parentDetection.indentationTwips + INDENT_THRESHOLD) {
|
|
473
|
+
numberedAsSubItemIndices.add(i);
|
|
474
|
+
parentIndexByIndex.set(i, lastBulletItemIndex);
|
|
475
|
+
}
|
|
476
|
+
} else if (detection.category === "none") {
|
|
477
|
+
// Only reset on text-bearing "none" items — blank paragraphs (spacers)
|
|
478
|
+
// between list items should not break the parent-child chain
|
|
479
|
+
if (item.text.trim().length > 0) {
|
|
480
|
+
lastBulletItemIndex = -1;
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
// === End sub-item detection ===
|
|
486
|
+
|
|
487
|
+
// Track numId per level - will be reset when parent level appears
|
|
488
|
+
const numIdByLevel = new Map<number, number>();
|
|
489
|
+
let lastProcessedLevel = -1;
|
|
490
|
+
|
|
491
|
+
// Helper to get/create numId for a level (uses majority category)
|
|
492
|
+
const getNumId = (level: number): number => {
|
|
493
|
+
if (level < lastProcessedLevel) {
|
|
494
|
+
for (const existingLevel of numIdByLevel.keys()) {
|
|
495
|
+
if (existingLevel > level) {
|
|
496
|
+
numIdByLevel.delete(existingLevel);
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
lastProcessedLevel = level;
|
|
501
|
+
|
|
502
|
+
if (!numIdByLevel.has(level)) {
|
|
503
|
+
const numId =
|
|
504
|
+
majorityCategory === "numbered"
|
|
505
|
+
? numberingManager.createNumberedList()
|
|
506
|
+
: numberingManager.createBulletList();
|
|
507
|
+
numIdByLevel.set(level, numId);
|
|
508
|
+
|
|
509
|
+
// Apply user's indentation settings if provided
|
|
510
|
+
if (options?.indentationLevels?.length) {
|
|
511
|
+
const instance = numberingManager.getInstance(numId);
|
|
512
|
+
if (instance) {
|
|
513
|
+
const abstractNum = numberingManager.getAbstractNumbering(instance.getAbstractNumId());
|
|
514
|
+
if (abstractNum) {
|
|
515
|
+
applyIndentationSettings(abstractNum, options.indentationLevels, majorityCategory !== "numbered", options.extraHangingIndentTwips ?? 0);
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
return numIdByLevel.get(level)!;
|
|
521
|
+
};
|
|
522
|
+
|
|
523
|
+
// Separate tracking for bullet numIds (used for trailing bullets in numbered-majority cells)
|
|
524
|
+
const bulletNumIdByLevel = new Map<number, number>();
|
|
525
|
+
let lastBulletProcessedLevel = -1;
|
|
526
|
+
|
|
527
|
+
const getBulletNumId = (level: number): number => {
|
|
528
|
+
if (level < lastBulletProcessedLevel) {
|
|
529
|
+
for (const existingLevel of bulletNumIdByLevel.keys()) {
|
|
530
|
+
if (existingLevel > level) {
|
|
531
|
+
bulletNumIdByLevel.delete(existingLevel);
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
lastBulletProcessedLevel = level;
|
|
536
|
+
|
|
537
|
+
if (!bulletNumIdByLevel.has(level)) {
|
|
538
|
+
const numId = numberingManager.createBulletList();
|
|
539
|
+
bulletNumIdByLevel.set(level, numId);
|
|
540
|
+
|
|
541
|
+
// Apply user's indentation settings if provided
|
|
542
|
+
if (options?.indentationLevels?.length) {
|
|
543
|
+
const instance = numberingManager.getInstance(numId);
|
|
544
|
+
if (instance) {
|
|
545
|
+
const abstractNum = numberingManager.getAbstractNumbering(instance.getAbstractNumId());
|
|
546
|
+
if (abstractNum) {
|
|
547
|
+
applyIndentationSettings(abstractNum, options.indentationLevels, true, options.extraHangingIndentTwips ?? 0);
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
return bulletNumIdByLevel.get(level)!;
|
|
553
|
+
};
|
|
554
|
+
|
|
555
|
+
// Track last typed format per level to detect numId boundary changes.
|
|
556
|
+
// null = last item was a Word list, string = typed prefix format (e.g., "decimal", "lowerLetter").
|
|
557
|
+
// When transitioning from Word list → typed prefix or changing typed format, the cached
|
|
558
|
+
// numId is cleared so the typed prefix starts a fresh numbered sequence.
|
|
559
|
+
const lastTypedFormatByLevel = new Map<number, string | null>();
|
|
560
|
+
|
|
561
|
+
// Process each paragraph
|
|
562
|
+
for (let index = 0; index < analysis.paragraphs.length; index++) {
|
|
563
|
+
const item = analysis.paragraphs[index]!;
|
|
564
|
+
const { paragraph, text, detection } = item;
|
|
565
|
+
const para = paragraph as Paragraph;
|
|
566
|
+
|
|
567
|
+
// Skip non-list items entirely - preserve "Note:", plain text, etc.
|
|
568
|
+
if (detection.category === "none") {
|
|
569
|
+
report.skipped++;
|
|
570
|
+
report.details.push({
|
|
571
|
+
originalText: text.substring(0, 50),
|
|
572
|
+
action: "skipped",
|
|
573
|
+
reason: "Not a list item - preserving original formatting",
|
|
574
|
+
});
|
|
575
|
+
continue;
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
try {
|
|
579
|
+
// Check if this item needs conversion (different category than majority)
|
|
580
|
+
const needsConversion = detection.category !== majorityCategory;
|
|
581
|
+
const hasTypedPrefix = !!detection.typedPrefix;
|
|
582
|
+
const isWordList = detection.isWordList;
|
|
583
|
+
|
|
584
|
+
// Get the level shift for this paragraph's list group
|
|
585
|
+
const levelShift = levelShiftByIndex.get(index) ?? 0;
|
|
586
|
+
|
|
587
|
+
// Calculate target level
|
|
588
|
+
// - For typed prefixes: use format-based level (decimal=0, letter=1, roman=2)
|
|
589
|
+
// unless BOTH format AND indentation suggest nesting
|
|
590
|
+
// - For sub-items: use parent's normalized level + 1
|
|
591
|
+
// - For Word lists: use format-based level with level shift applied
|
|
592
|
+
let targetLevel: number;
|
|
593
|
+
if (hasTypedPrefix) {
|
|
594
|
+
const relativeIndent = detection.indentationTwips - baselineIndent;
|
|
595
|
+
const indentBasedLevel = inferLevelFromRelativeIndentation(relativeIndent);
|
|
596
|
+
|
|
597
|
+
if (indentBasedLevel === 0 && detection.inferredLevel > 0) {
|
|
598
|
+
// No extra indent but format suggests nesting (e.g., "a." at level 1)
|
|
599
|
+
targetLevel = Math.max(0, detection.inferredLevel - levelShift);
|
|
600
|
+
} else if (indentBasedLevel > 0 && detection.inferredLevel === 0) {
|
|
601
|
+
// FIX: Decimal/bullet typed prefix with extra indentation from cell baseline.
|
|
602
|
+
// Don't infer nesting from indentation alone when the format is top-level
|
|
603
|
+
// (e.g., "1.", "2.", "3." are decimal = level 0). The baseline may come from
|
|
604
|
+
// a non-list paragraph (header text), causing false nesting.
|
|
605
|
+
// The levelShift mechanism handles true multi-level lists correctly.
|
|
606
|
+
targetLevel = 0;
|
|
607
|
+
} else {
|
|
608
|
+
targetLevel = indentBasedLevel;
|
|
609
|
+
}
|
|
610
|
+
} else if (bulletAsSubItemIndices.has(index) || numberedAsSubItemIndices.has(index)) {
|
|
611
|
+
// Sub-item: use parent's NORMALIZED level + 1
|
|
612
|
+
const parentIndex = parentIndexByIndex.get(index);
|
|
613
|
+
const parentNormalizedLevel = parentIndex !== undefined ? getNormalizedLevel(parentIndex) : 0;
|
|
614
|
+
targetLevel = parentNormalizedLevel + 1;
|
|
615
|
+
} else {
|
|
616
|
+
targetLevel = Math.max(0, detection.inferredLevel - levelShift);
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
// Process based on what type of item this is
|
|
620
|
+
if (hasTypedPrefix && detection.typedPrefix) {
|
|
621
|
+
// Typed prefix: strip prefix and apply new formatting
|
|
622
|
+
stripTypedPrefix(para, detection.typedPrefix);
|
|
623
|
+
|
|
624
|
+
// Check if we need a fresh numId for this typed prefix.
|
|
625
|
+
// New numId when: previous at this level was a Word list item (null)
|
|
626
|
+
// or a different typed format (e.g., decimal → lowerLetter).
|
|
627
|
+
const lastFormat = lastTypedFormatByLevel.get(targetLevel);
|
|
628
|
+
if (lastFormat === null || (lastFormat !== undefined && lastFormat !== detection.format)) {
|
|
629
|
+
numIdByLevel.delete(targetLevel);
|
|
630
|
+
}
|
|
631
|
+
lastTypedFormatByLevel.set(targetLevel, detection.format ?? "unknown");
|
|
632
|
+
|
|
633
|
+
para.setNumbering(getNumId(targetLevel), targetLevel);
|
|
634
|
+
report.normalized++;
|
|
635
|
+
report.details.push({
|
|
636
|
+
originalText: text.substring(0, 50),
|
|
637
|
+
action: "normalized",
|
|
638
|
+
reason: `Typed prefix → level ${targetLevel}`,
|
|
639
|
+
});
|
|
640
|
+
} else if (isWordList && bulletAsSubItemIndices.has(index)) {
|
|
641
|
+
// Sandwiched bullet following numbered → convert to numbered sub-item
|
|
642
|
+
lastTypedFormatByLevel.set(targetLevel, null);
|
|
643
|
+
para.setNumbering(getNumId(targetLevel), targetLevel);
|
|
644
|
+
report.normalized++;
|
|
645
|
+
report.details.push({
|
|
646
|
+
originalText: text.substring(0, 50),
|
|
647
|
+
action: "normalized",
|
|
648
|
+
reason: `Bullet → numbered sub-item at level ${targetLevel}`,
|
|
649
|
+
});
|
|
650
|
+
} else if (isWordList && numberedAsSubItemIndices.has(index)) {
|
|
651
|
+
// Numbered following bullet → convert to bullet sub-item
|
|
652
|
+
lastTypedFormatByLevel.set(targetLevel, null);
|
|
653
|
+
para.setNumbering(getBulletNumId(targetLevel), targetLevel);
|
|
654
|
+
report.normalized++;
|
|
655
|
+
report.details.push({
|
|
656
|
+
originalText: text.substring(0, 50),
|
|
657
|
+
action: "normalized",
|
|
658
|
+
reason: `Numbered → bullet at level ${targetLevel}`,
|
|
659
|
+
});
|
|
660
|
+
} else if (isWordList && detection.category === "bullet" && majorityCategory === "numbered" && !bulletAsSubItemIndices.has(index)) {
|
|
661
|
+
// Trailing bullet in numbered-majority cell - preserve as bullet
|
|
662
|
+
lastTypedFormatByLevel.set(targetLevel, null);
|
|
663
|
+
para.setNumbering(getBulletNumId(targetLevel), targetLevel);
|
|
664
|
+
report.normalized++;
|
|
665
|
+
report.details.push({
|
|
666
|
+
originalText: text.substring(0, 50),
|
|
667
|
+
action: "normalized",
|
|
668
|
+
reason: `Trailing bullet preserved at level ${targetLevel}`,
|
|
669
|
+
});
|
|
670
|
+
} else if (isWordList && needsConversion) {
|
|
671
|
+
// Regular category conversion
|
|
672
|
+
lastTypedFormatByLevel.set(targetLevel, null);
|
|
673
|
+
if (majorityCategory === "bullet") {
|
|
674
|
+
para.setNumbering(getBulletNumId(targetLevel), targetLevel);
|
|
675
|
+
} else {
|
|
676
|
+
para.setNumbering(getNumId(targetLevel), targetLevel);
|
|
677
|
+
}
|
|
678
|
+
report.normalized++;
|
|
679
|
+
report.details.push({
|
|
680
|
+
originalText: text.substring(0, 50),
|
|
681
|
+
action: "normalized",
|
|
682
|
+
reason: `Word ${detection.category} → ${majorityCategory} level ${targetLevel}`,
|
|
683
|
+
});
|
|
684
|
+
} else if (isWordList) {
|
|
685
|
+
// Preserve category but ensure consistent numId with user settings
|
|
686
|
+
lastTypedFormatByLevel.set(targetLevel, null);
|
|
687
|
+
if (detection.category === "bullet") {
|
|
688
|
+
para.setNumbering(getBulletNumId(targetLevel), targetLevel);
|
|
689
|
+
} else {
|
|
690
|
+
para.setNumbering(getNumId(targetLevel), targetLevel);
|
|
691
|
+
}
|
|
692
|
+
report.normalized++;
|
|
693
|
+
report.details.push({
|
|
694
|
+
originalText: text.substring(0, 50),
|
|
695
|
+
action: "normalized",
|
|
696
|
+
reason: `Updated numId for consistent numbering at level ${targetLevel}`,
|
|
697
|
+
});
|
|
698
|
+
}
|
|
699
|
+
} catch (err: unknown) {
|
|
700
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
701
|
+
report.errors.push(`Failed on "${text.substring(0, 30)}...": ${message}`);
|
|
702
|
+
report.details.push({
|
|
703
|
+
originalText: text.substring(0, 50),
|
|
704
|
+
action: "error",
|
|
705
|
+
reason: message,
|
|
706
|
+
});
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
// Ensure list items don't start at orphan levels (level 1+ without level 0 parent)
|
|
711
|
+
normalizeOrphanListLevelsInCell(cell);
|
|
712
|
+
|
|
713
|
+
return report;
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
/**
|
|
717
|
+
* Normalize lists across all cells in a table.
|
|
718
|
+
*/
|
|
719
|
+
export function normalizeListsInTable(
|
|
720
|
+
table: Table,
|
|
721
|
+
options: ListNormalizationOptions,
|
|
722
|
+
numberingManager: NumberingManager
|
|
723
|
+
): ListNormalizationReport {
|
|
724
|
+
const aggregateReport: ListNormalizationReport = {
|
|
725
|
+
normalized: 0,
|
|
726
|
+
skipped: 0,
|
|
727
|
+
errors: [],
|
|
728
|
+
appliedCategory: "none",
|
|
729
|
+
details: [],
|
|
730
|
+
};
|
|
731
|
+
|
|
732
|
+
for (const row of table.getRows()) {
|
|
733
|
+
for (const cell of row.getCells()) {
|
|
734
|
+
const cellReport = normalizeListsInCell(cell, options, numberingManager);
|
|
735
|
+
|
|
736
|
+
aggregateReport.normalized += cellReport.normalized;
|
|
737
|
+
aggregateReport.skipped += cellReport.skipped;
|
|
738
|
+
aggregateReport.errors.push(...cellReport.errors);
|
|
739
|
+
aggregateReport.details.push(...cellReport.details);
|
|
740
|
+
|
|
741
|
+
if (cellReport.appliedCategory !== "none") {
|
|
742
|
+
aggregateReport.appliedCategory = cellReport.appliedCategory;
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
return aggregateReport;
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
/**
|
|
751
|
+
* Normalize orphan Level 1+ list items in a table cell.
|
|
752
|
+
*
|
|
753
|
+
* Detects when a cell's first list item starts at Level 1 or higher
|
|
754
|
+
* without a preceding Level 0 item. Shifts all list items down by the
|
|
755
|
+
* minimum level found, so they start at Level 0.
|
|
756
|
+
*/
|
|
757
|
+
export function normalizeOrphanListLevelsInCell(cell: TableCell): number {
|
|
758
|
+
const paragraphs = cell.getParagraphs();
|
|
759
|
+
|
|
760
|
+
// Find minimum level among all list items in the cell
|
|
761
|
+
let minLevel = Infinity;
|
|
762
|
+
let hasListItems = false;
|
|
763
|
+
|
|
764
|
+
for (const para of paragraphs) {
|
|
765
|
+
const numbering = para.getNumbering();
|
|
766
|
+
if (numbering) {
|
|
767
|
+
hasListItems = true;
|
|
768
|
+
minLevel = Math.min(minLevel, numbering.level);
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
// If no list items or already at Level 0, nothing to fix
|
|
773
|
+
if (!hasListItems || minLevel === 0 || minLevel === Infinity) {
|
|
774
|
+
return 0;
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
// Shift all list items down by minLevel
|
|
778
|
+
let normalizedCount = 0;
|
|
779
|
+
for (const para of paragraphs) {
|
|
780
|
+
const numbering = para.getNumbering();
|
|
781
|
+
if (numbering) {
|
|
782
|
+
const newLevel = numbering.level - minLevel;
|
|
783
|
+
para.setNumbering(numbering.numId, newLevel);
|
|
784
|
+
normalizedCount++;
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
return normalizedCount;
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
/**
|
|
792
|
+
* Normalize orphan Level 1+ list items across all cells in a table.
|
|
793
|
+
*/
|
|
794
|
+
export function normalizeOrphanListLevelsInTable(table: Table): number {
|
|
795
|
+
let totalNormalized = 0;
|
|
796
|
+
|
|
797
|
+
for (const row of table.getRows()) {
|
|
798
|
+
for (const cell of row.getCells()) {
|
|
799
|
+
totalNormalized += normalizeOrphanListLevelsInCell(cell);
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
return totalNormalized;
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
// =============================================================================
|
|
807
|
+
// NUMBERING MANAGER HELPERS
|
|
808
|
+
// =============================================================================
|
|
809
|
+
|
|
810
|
+
/**
|
|
811
|
+
* Get existing or create new numbered list numId.
|
|
812
|
+
*/
|
|
813
|
+
function getOrCreateNumberedListNumId(
|
|
814
|
+
numberingManager: NumberingManager
|
|
815
|
+
): number {
|
|
816
|
+
const instances = numberingManager.getAllInstances();
|
|
817
|
+
for (const instance of instances) {
|
|
818
|
+
const abstractNum = numberingManager.getAbstractNumbering(
|
|
819
|
+
instance.getAbstractNumId()
|
|
820
|
+
);
|
|
821
|
+
if (abstractNum) {
|
|
822
|
+
const level0 = abstractNum.getLevel(0);
|
|
823
|
+
if (level0) {
|
|
824
|
+
const format = level0.getFormat();
|
|
825
|
+
if (getListCategoryFromFormat(format) === "numbered") {
|
|
826
|
+
return instance.getNumId();
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
return numberingManager.createNumberedList();
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
/**
|
|
836
|
+
* Get existing or create new bullet list numId.
|
|
837
|
+
*/
|
|
838
|
+
function getOrCreateBulletListNumId(
|
|
839
|
+
numberingManager: NumberingManager
|
|
840
|
+
): number {
|
|
841
|
+
const instances = numberingManager.getAllInstances();
|
|
842
|
+
for (const instance of instances) {
|
|
843
|
+
const abstractNum = numberingManager.getAbstractNumbering(
|
|
844
|
+
instance.getAbstractNumId()
|
|
845
|
+
);
|
|
846
|
+
if (abstractNum) {
|
|
847
|
+
const level0 = abstractNum.getLevel(0);
|
|
848
|
+
if (level0) {
|
|
849
|
+
const format = level0.getFormat();
|
|
850
|
+
if (format === "bullet") {
|
|
851
|
+
return instance.getNumId();
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
return numberingManager.createBulletList();
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
// =============================================================================
|
|
861
|
+
// PUBLIC API CLASS
|
|
862
|
+
// =============================================================================
|
|
863
|
+
|
|
864
|
+
/**
|
|
865
|
+
* Main entry point for list normalization.
|
|
866
|
+
*/
|
|
867
|
+
export class ListNormalizer {
|
|
868
|
+
private numberingManager: NumberingManager;
|
|
869
|
+
|
|
870
|
+
constructor(numberingManager: NumberingManager) {
|
|
871
|
+
this.numberingManager = numberingManager;
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
analyzeCell(cell: TableCell): ListAnalysis {
|
|
875
|
+
return analyzeCellLists(cell);
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
analyzeTable(table: Table): Map<TableCell, ListAnalysis> {
|
|
879
|
+
return analyzeTableLists(table);
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
normalizeCell(
|
|
883
|
+
cell: TableCell,
|
|
884
|
+
options: Partial<ListNormalizationOptions> = {}
|
|
885
|
+
): ListNormalizationReport {
|
|
886
|
+
const fullOptions = this.resolveOptions(options);
|
|
887
|
+
return normalizeListsInCell(cell, fullOptions, this.numberingManager);
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
normalizeTable(
|
|
891
|
+
table: Table,
|
|
892
|
+
options: Partial<ListNormalizationOptions> = {}
|
|
893
|
+
): ListNormalizationReport {
|
|
894
|
+
const fullOptions = this.resolveOptions(options);
|
|
895
|
+
return normalizeListsInTable(table, fullOptions, this.numberingManager);
|
|
896
|
+
}
|
|
897
|
+
|
|
898
|
+
normalizeAllTables(
|
|
899
|
+
tables: Table[],
|
|
900
|
+
options: Partial<ListNormalizationOptions> = {}
|
|
901
|
+
): ListNormalizationReport {
|
|
902
|
+
const aggregateReport: ListNormalizationReport = {
|
|
903
|
+
normalized: 0,
|
|
904
|
+
skipped: 0,
|
|
905
|
+
errors: [],
|
|
906
|
+
appliedCategory: "none",
|
|
907
|
+
details: [],
|
|
908
|
+
};
|
|
909
|
+
|
|
910
|
+
for (const table of tables) {
|
|
911
|
+
const tableReport = this.normalizeTable(table, options);
|
|
912
|
+
aggregateReport.normalized += tableReport.normalized;
|
|
913
|
+
aggregateReport.skipped += tableReport.skipped;
|
|
914
|
+
aggregateReport.errors.push(...tableReport.errors);
|
|
915
|
+
aggregateReport.details.push(...tableReport.details);
|
|
916
|
+
|
|
917
|
+
if (tableReport.appliedCategory !== "none") {
|
|
918
|
+
aggregateReport.appliedCategory = tableReport.appliedCategory;
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
if (aggregateReport.normalized > 0) {
|
|
923
|
+
logger.info(
|
|
924
|
+
`List normalization complete: ${aggregateReport.normalized} items normalized`
|
|
925
|
+
);
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
return aggregateReport;
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
private resolveOptions(
|
|
932
|
+
partial: Partial<ListNormalizationOptions>
|
|
933
|
+
): ListNormalizationOptions {
|
|
934
|
+
return {
|
|
935
|
+
numberedStyleNumId:
|
|
936
|
+
partial.numberedStyleNumId ??
|
|
937
|
+
getOrCreateNumberedListNumId(this.numberingManager),
|
|
938
|
+
bulletStyleNumId:
|
|
939
|
+
partial.bulletStyleNumId ??
|
|
940
|
+
getOrCreateBulletListNumId(this.numberingManager),
|
|
941
|
+
scope: partial.scope ?? "cell",
|
|
942
|
+
forceMajority: partial.forceMajority ?? false,
|
|
943
|
+
preserveIndentation: partial.preserveIndentation ?? false,
|
|
944
|
+
indentationLevels: partial.indentationLevels,
|
|
945
|
+
};
|
|
946
|
+
}
|
|
947
|
+
}
|