docxmlater 10.3.6 → 10.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/README.md +158 -7
  2. package/dist/core/Document.d.ts +97 -3
  3. package/dist/core/Document.d.ts.map +1 -1
  4. package/dist/core/Document.js +727 -50
  5. package/dist/core/Document.js.map +1 -1
  6. package/dist/core/DocumentContent.d.ts.map +1 -1
  7. package/dist/core/DocumentContent.js +0 -8
  8. package/dist/core/DocumentContent.js.map +1 -1
  9. package/dist/core/DocumentGenerator.d.ts.map +1 -1
  10. package/dist/core/DocumentGenerator.js +9 -5
  11. package/dist/core/DocumentGenerator.js.map +1 -1
  12. package/dist/core/DocumentParser.d.ts.map +1 -1
  13. package/dist/core/DocumentParser.js +573 -101
  14. package/dist/core/DocumentParser.js.map +1 -1
  15. package/dist/core/RelationshipManager.d.ts.map +1 -1
  16. package/dist/core/RelationshipManager.js +4 -3
  17. package/dist/core/RelationshipManager.js.map +1 -1
  18. package/dist/elements/Bookmark.d.ts +7 -0
  19. package/dist/elements/Bookmark.d.ts.map +1 -1
  20. package/dist/elements/Bookmark.js +24 -4
  21. package/dist/elements/Bookmark.js.map +1 -1
  22. package/dist/elements/BookmarkManager.d.ts.map +1 -1
  23. package/dist/elements/BookmarkManager.js +4 -3
  24. package/dist/elements/BookmarkManager.js.map +1 -1
  25. package/dist/elements/CommonTypes.d.ts +2 -2
  26. package/dist/elements/CommonTypes.d.ts.map +1 -1
  27. package/dist/elements/CommonTypes.js +14 -1
  28. package/dist/elements/CommonTypes.js.map +1 -1
  29. package/dist/elements/Field.d.ts +1 -1
  30. package/dist/elements/Field.d.ts.map +1 -1
  31. package/dist/elements/Field.js +1 -1
  32. package/dist/elements/Field.js.map +1 -1
  33. package/dist/elements/Footer.d.ts +2 -0
  34. package/dist/elements/Footer.d.ts.map +1 -1
  35. package/dist/elements/Footer.js +6 -0
  36. package/dist/elements/Footer.js.map +1 -1
  37. package/dist/elements/Header.d.ts +2 -0
  38. package/dist/elements/Header.d.ts.map +1 -1
  39. package/dist/elements/Header.js +6 -0
  40. package/dist/elements/Header.js.map +1 -1
  41. package/dist/elements/Image.d.ts.map +1 -1
  42. package/dist/elements/Image.js +3 -0
  43. package/dist/elements/Image.js.map +1 -1
  44. package/dist/elements/Paragraph.d.ts +81 -1
  45. package/dist/elements/Paragraph.d.ts.map +1 -1
  46. package/dist/elements/Paragraph.js +515 -21
  47. package/dist/elements/Paragraph.js.map +1 -1
  48. package/dist/elements/Revision.d.ts +0 -1
  49. package/dist/elements/Revision.d.ts.map +1 -1
  50. package/dist/elements/Revision.js +0 -12
  51. package/dist/elements/Revision.js.map +1 -1
  52. package/dist/elements/RevisionManager.d.ts +0 -1
  53. package/dist/elements/RevisionManager.d.ts.map +1 -1
  54. package/dist/elements/RevisionManager.js +0 -2
  55. package/dist/elements/RevisionManager.js.map +1 -1
  56. package/dist/elements/Run.d.ts +16 -4
  57. package/dist/elements/Run.d.ts.map +1 -1
  58. package/dist/elements/Run.js +114 -22
  59. package/dist/elements/Run.js.map +1 -1
  60. package/dist/elements/Section.d.ts +7 -1
  61. package/dist/elements/Section.d.ts.map +1 -1
  62. package/dist/elements/Section.js +185 -4
  63. package/dist/elements/Section.js.map +1 -1
  64. package/dist/elements/Shape.js.map +1 -1
  65. package/dist/elements/Table.d.ts +30 -1
  66. package/dist/elements/Table.d.ts.map +1 -1
  67. package/dist/elements/Table.js +357 -40
  68. package/dist/elements/Table.js.map +1 -1
  69. package/dist/elements/TableCell.d.ts +3 -0
  70. package/dist/elements/TableCell.d.ts.map +1 -1
  71. package/dist/elements/TableCell.js +30 -3
  72. package/dist/elements/TableCell.js.map +1 -1
  73. package/dist/elements/TableGridChange.d.ts +0 -1
  74. package/dist/elements/TableGridChange.d.ts.map +1 -1
  75. package/dist/elements/TableGridChange.js +0 -10
  76. package/dist/elements/TableGridChange.js.map +1 -1
  77. package/dist/elements/TableRow.d.ts +4 -0
  78. package/dist/elements/TableRow.d.ts.map +1 -1
  79. package/dist/elements/TableRow.js +31 -3
  80. package/dist/elements/TableRow.js.map +1 -1
  81. package/dist/formatting/AbstractNumbering.d.ts +5 -0
  82. package/dist/formatting/AbstractNumbering.d.ts.map +1 -1
  83. package/dist/formatting/AbstractNumbering.js +22 -0
  84. package/dist/formatting/AbstractNumbering.js.map +1 -1
  85. package/dist/formatting/NumberingLevel.d.ts.map +1 -1
  86. package/dist/formatting/NumberingLevel.js +3 -3
  87. package/dist/formatting/NumberingLevel.js.map +1 -1
  88. package/dist/formatting/Style.d.ts +1 -0
  89. package/dist/formatting/Style.d.ts.map +1 -1
  90. package/dist/formatting/Style.js +25 -59
  91. package/dist/formatting/Style.js.map +1 -1
  92. package/dist/formatting/StylesManager.d.ts +1 -0
  93. package/dist/formatting/StylesManager.d.ts.map +1 -1
  94. package/dist/formatting/StylesManager.js +12 -0
  95. package/dist/formatting/StylesManager.js.map +1 -1
  96. package/dist/helpers/CleanupHelper.js.map +1 -1
  97. package/dist/images/ImageOptimizer.d.ts.map +1 -1
  98. package/dist/images/ImageOptimizer.js +0 -1
  99. package/dist/images/ImageOptimizer.js.map +1 -1
  100. package/dist/index.d.ts +1 -1
  101. package/dist/index.d.ts.map +1 -1
  102. package/dist/index.js.map +1 -1
  103. package/dist/managers/DrawingManager.d.ts.map +1 -1
  104. package/dist/managers/DrawingManager.js +4 -2
  105. package/dist/managers/DrawingManager.js.map +1 -1
  106. package/dist/types/formatting.d.ts +2 -2
  107. package/dist/types/formatting.d.ts.map +1 -1
  108. package/dist/types/formatting.js.map +1 -1
  109. package/dist/utils/ChangelogGenerator.d.ts +2 -2
  110. package/dist/utils/ChangelogGenerator.d.ts.map +1 -1
  111. package/dist/utils/ChangelogGenerator.js +4 -5
  112. package/dist/utils/ChangelogGenerator.js.map +1 -1
  113. package/dist/utils/InMemoryRevisionAcceptor.d.ts.map +1 -1
  114. package/dist/utils/InMemoryRevisionAcceptor.js +0 -1
  115. package/dist/utils/InMemoryRevisionAcceptor.js.map +1 -1
  116. package/dist/utils/RevisionAwareProcessor.d.ts +2 -2
  117. package/dist/utils/RevisionAwareProcessor.d.ts.map +1 -1
  118. package/dist/utils/RevisionAwareProcessor.js +2 -2
  119. package/dist/utils/RevisionAwareProcessor.js.map +1 -1
  120. package/dist/utils/SelectiveRevisionAcceptor.d.ts +0 -2
  121. package/dist/utils/SelectiveRevisionAcceptor.d.ts.map +1 -1
  122. package/dist/utils/SelectiveRevisionAcceptor.js +0 -26
  123. package/dist/utils/SelectiveRevisionAcceptor.js.map +1 -1
  124. package/dist/utils/ShadingResolver.d.ts.map +1 -1
  125. package/dist/utils/ShadingResolver.js.map +1 -1
  126. package/dist/utils/acceptRevisions.js +1 -1
  127. package/dist/utils/acceptRevisions.js.map +1 -1
  128. package/dist/utils/stripTrackedChanges.js +1 -1
  129. package/dist/utils/stripTrackedChanges.js.map +1 -1
  130. package/dist/utils/units.d.ts.map +1 -1
  131. package/dist/utils/units.js +1 -1
  132. package/dist/utils/units.js.map +1 -1
  133. package/dist/validation/RevisionAutoFixer.d.ts +2 -1
  134. package/dist/validation/RevisionAutoFixer.d.ts.map +1 -1
  135. package/dist/validation/RevisionAutoFixer.js.map +1 -1
  136. package/package.json +10 -1
  137. package/src/constants/CLAUDE.md +28 -0
  138. package/src/core/CLAUDE.md +4 -0
  139. package/src/core/Document.ts +1755 -85
  140. package/src/core/DocumentContent.ts +0 -11
  141. package/src/core/DocumentGenerator.ts +11 -12
  142. package/src/core/DocumentParser.ts +599 -138
  143. package/src/core/RelationshipManager.ts +6 -3
  144. package/src/elements/Bookmark.ts +39 -4
  145. package/src/elements/BookmarkManager.ts +4 -3
  146. package/src/elements/CLAUDE.md +18 -2
  147. package/src/elements/CommonTypes.ts +35 -8
  148. package/src/elements/Field.ts +1 -1
  149. package/src/elements/Footer.ts +23 -0
  150. package/src/elements/Header.ts +25 -0
  151. package/src/elements/Image.ts +5 -0
  152. package/src/elements/Paragraph.ts +1069 -41
  153. package/src/elements/Revision.ts +0 -19
  154. package/src/elements/RevisionManager.ts +1 -3
  155. package/src/elements/Run.ts +265 -35
  156. package/src/elements/Section.ts +214 -8
  157. package/src/elements/Shape.ts +1 -1
  158. package/src/elements/Table.ts +850 -61
  159. package/src/elements/TableCell.ts +84 -10
  160. package/src/elements/TableGridChange.ts +2 -16
  161. package/src/elements/TableRow.ts +94 -9
  162. package/src/formatting/AbstractNumbering.ts +42 -1
  163. package/src/formatting/CLAUDE.md +4 -0
  164. package/src/formatting/NumberingLevel.ts +11 -7
  165. package/src/formatting/Style.ts +39 -71
  166. package/src/formatting/StylesManager.ts +36 -0
  167. package/src/helpers/CleanupHelper.ts +1 -1
  168. package/src/images/ImageOptimizer.ts +0 -3
  169. package/src/index.ts +1 -1
  170. package/src/managers/DrawingManager.ts +5 -3
  171. package/src/tracking/CLAUDE.md +30 -0
  172. package/src/types/CLAUDE.md +39 -0
  173. package/src/types/formatting.ts +2 -2
  174. package/src/utils/CLAUDE.md +15 -0
  175. package/src/utils/ChangelogGenerator.ts +4 -5
  176. package/src/utils/InMemoryRevisionAcceptor.ts +0 -9
  177. package/src/utils/RevisionAwareProcessor.ts +2 -3
  178. package/src/utils/SelectiveRevisionAcceptor.ts +0 -39
  179. package/src/utils/ShadingResolver.ts +0 -1
  180. package/src/utils/acceptRevisions.ts +1 -1
  181. package/src/utils/stripTrackedChanges.ts +1 -1
  182. package/src/utils/units.ts +2 -1
  183. package/src/validation/CLAUDE.md +40 -0
  184. package/src/validation/RevisionAutoFixer.ts +2 -1
@@ -31,12 +31,13 @@ import {
31
31
  RunContent,
32
32
  RunFormatting,
33
33
  } from '../elements/Run';
34
- import { PageNumberFormat, Section, SectionProperties, SectionType } from '../elements/Section';
34
+ import { Section, SectionProperties, SectionType } from '../elements/Section';
35
35
  import { StructuredDocumentTag } from '../elements/StructuredDocumentTag';
36
36
  import { Table, TableBorder } from '../elements/Table';
37
37
  import { TableCell } from '../elements/TableCell';
38
38
  import { TableOfContents } from '../elements/TableOfContents';
39
39
  import { TableOfContentsElement } from '../elements/TableOfContentsElement';
40
+ import { TableGridChange } from '../elements/TableGridChange';
40
41
  import { TableRow } from '../elements/TableRow';
41
42
  import { AbstractNumbering } from '../formatting/AbstractNumbering';
42
43
  import { NumberingInstance } from '../formatting/NumberingInstance';
@@ -904,11 +905,15 @@ export class DocumentParser {
904
905
  }
905
906
  }
906
907
 
907
- // Parse w14:paraId if present
908
+ // Parse w14:paraId and w14:textId if present
908
909
  const paraId = pElement['w14:paraId'];
909
910
  if (paraId) {
910
911
  paragraph.formatting.paraId = paraId as string;
911
912
  }
913
+ const textId = pElement['w14:textId'];
914
+ if (textId) {
915
+ paragraph.formatting.textId = textId as string;
916
+ }
912
917
 
913
918
  // CRITICAL FIX: Preserve document order of paragraph children (runs, hyperlinks, fields)
914
919
  // When XMLParser.parseToObject groups multiple runs/hyperlinks, it creates arrays
@@ -1861,10 +1866,15 @@ export class DocumentParser {
1861
1866
 
1862
1867
  // Create bookmark with skipNormalization to preserve original name exactly
1863
1868
  // (Word allows special characters like = and . in bookmark names)
1869
+ // Parse optional column range for table bookmarks (ECMA-376 §17.16.5)
1870
+ const colFirstAttr = XMLParser.extractAttribute(bookmarkXml, 'w:colFirst');
1871
+ const colLastAttr = XMLParser.extractAttribute(bookmarkXml, 'w:colLast');
1864
1872
  const bookmark = new Bookmark({
1865
1873
  name: nameAttr,
1866
1874
  id: id,
1867
1875
  skipNormalization: true,
1876
+ colFirst: colFirstAttr ? parseInt(colFirstAttr, 10) : undefined,
1877
+ colLast: colLastAttr ? parseInt(colLastAttr, 10) : undefined,
1868
1878
  });
1869
1879
 
1870
1880
  // Register with BookmarkManager to enable hasBookmark() checks
@@ -1934,11 +1944,15 @@ export class DocumentParser {
1934
1944
  try {
1935
1945
  const paragraph = new Paragraph();
1936
1946
 
1937
- // Parse w14:paraId attribute from paragraph element (Word 2010+ requirement)
1947
+ // Parse w14:paraId and w14:textId attributes from paragraph element (Word 2010+)
1938
1948
  const paraId = paraObj['w14:paraId'];
1939
1949
  if (paraId) {
1940
1950
  paragraph.formatting.paraId = paraId;
1941
1951
  }
1952
+ const textId = paraObj['w14:textId'];
1953
+ if (textId) {
1954
+ paragraph.formatting.textId = textId;
1955
+ }
1942
1956
 
1943
1957
  // Parse paragraph properties
1944
1958
  this.parseParagraphPropertiesFromObject(paraObj['w:pPr'], paragraph);
@@ -2155,9 +2169,11 @@ export class DocumentParser {
2155
2169
  if (pPrObj['w:ind']) {
2156
2170
  const ind = pPrObj['w:ind'];
2157
2171
  // Use isExplicitlySet and safeParseInt for robust zero-value handling
2158
- if (isExplicitlySet(ind['@_w:left'])) paragraph.setLeftIndent(safeParseInt(ind['@_w:left']));
2159
- if (isExplicitlySet(ind['@_w:right']))
2160
- paragraph.setRightIndent(safeParseInt(ind['@_w:right']));
2172
+ // Per ECMA-376 §17.3.1.15: w:start/w:end are bidi-aware alternatives to w:left/w:right
2173
+ const leftVal = ind['@_w:start'] ?? ind['@_w:left'];
2174
+ const rightVal = ind['@_w:end'] ?? ind['@_w:right'];
2175
+ if (isExplicitlySet(leftVal)) paragraph.setLeftIndent(safeParseInt(leftVal));
2176
+ if (isExplicitlySet(rightVal)) paragraph.setRightIndent(safeParseInt(rightVal));
2161
2177
  if (isExplicitlySet(ind['@_w:firstLine']))
2162
2178
  paragraph.setFirstLineIndent(safeParseInt(ind['@_w:firstLine']));
2163
2179
  // Parse hanging indent per ECMA-376 Part 1 §17.3.1.17
@@ -2165,7 +2181,7 @@ export class DocumentParser {
2165
2181
  paragraph.setHangingIndent(safeParseInt(ind['@_w:hanging']));
2166
2182
  }
2167
2183
 
2168
- // Spacing
2184
+ // Spacing (ECMA-376 §17.3.1.33 — 8 attributes)
2169
2185
  if (pPrObj['w:spacing']) {
2170
2186
  const spacing = pPrObj['w:spacing'];
2171
2187
  // Use isExplicitlySet to properly handle 0 values (0 spacing is valid)
@@ -2176,18 +2192,39 @@ export class DocumentParser {
2176
2192
  if (isExplicitlySet(spacing['@_w:line'])) {
2177
2193
  paragraph.setLineSpacing(safeParseInt(spacing['@_w:line']), spacing['@_w:lineRule']);
2178
2194
  }
2195
+ // Parse extended spacing attributes — write directly to paragraph.formatting
2196
+ // (getFormatting() returns a shallow copy, so we must access the internal object)
2197
+ if (!paragraph.formatting.spacing) paragraph.formatting.spacing = {};
2198
+ if (isExplicitlySet(spacing['@_w:beforeLines']))
2199
+ paragraph.formatting.spacing.beforeLines = safeParseInt(spacing['@_w:beforeLines']);
2200
+ if (isExplicitlySet(spacing['@_w:afterLines']))
2201
+ paragraph.formatting.spacing.afterLines = safeParseInt(spacing['@_w:afterLines']);
2202
+ const beforeAuto = spacing['@_w:beforeAutospacing'];
2203
+ if (beforeAuto !== undefined)
2204
+ paragraph.formatting.spacing.beforeAutospacing =
2205
+ String(beforeAuto) === '1' || String(beforeAuto) === 'true';
2206
+ const afterAuto = spacing['@_w:afterAutospacing'];
2207
+ if (afterAuto !== undefined)
2208
+ paragraph.formatting.spacing.afterAutospacing =
2209
+ String(afterAuto) === '1' || String(afterAuto) === 'true';
2179
2210
  }
2180
2211
 
2181
- // Keep properties - parse pageBreakBefore FIRST, then apply keep properties
2182
- // This triggers automatic conflict resolution per ECMA-376 v0.28.2
2183
- if (pPrObj['w:pageBreakBefore']) paragraph.formatting.pageBreakBefore = true;
2184
-
2185
- // Keep properties - these will automatically clear pageBreakBefore if both are set
2186
- if (pPrObj['w:keepNext']) paragraph.setKeepNext(true);
2187
- if (pPrObj['w:keepLines']) paragraph.setKeepLines(true);
2212
+ // Keep properties preserve explicit val="0" to override style inheritance
2213
+ // Parse pageBreakBefore FIRST, then keep properties (triggers automatic conflict resolution)
2214
+ if (pPrObj['w:pageBreakBefore'] !== undefined) {
2215
+ paragraph.formatting.pageBreakBefore = parseOoxmlBoolean(pPrObj['w:pageBreakBefore']);
2216
+ }
2217
+ if (pPrObj['w:keepNext'] !== undefined) {
2218
+ paragraph.setKeepNext(parseOoxmlBoolean(pPrObj['w:keepNext']));
2219
+ }
2220
+ if (pPrObj['w:keepLines'] !== undefined) {
2221
+ paragraph.setKeepLines(parseOoxmlBoolean(pPrObj['w:keepLines']));
2222
+ }
2188
2223
 
2189
2224
  // Contextual spacing
2190
- if (pPrObj['w:contextualSpacing']) paragraph.setContextualSpacing(true);
2225
+ if (pPrObj['w:contextualSpacing'] !== undefined) {
2226
+ paragraph.setContextualSpacing(parseOoxmlBoolean(pPrObj['w:contextualSpacing']));
2227
+ }
2191
2228
 
2192
2229
  // Numbering
2193
2230
  // Note: When track changes are present (w:pPrChange), XMLParser merges the
@@ -2304,8 +2341,8 @@ export class DocumentParser {
2304
2341
  }
2305
2342
 
2306
2343
  // Suppress line numbers per ECMA-376 Part 1 §17.3.1.34
2307
- if (pPrObj['w:suppressLineNumbers']) {
2308
- paragraph.setSuppressLineNumbers(true);
2344
+ if (pPrObj['w:suppressLineNumbers'] !== undefined) {
2345
+ paragraph.setSuppressLineNumbers(parseOoxmlBoolean(pPrObj['w:suppressLineNumbers']));
2309
2346
  }
2310
2347
 
2311
2348
  // Bidirectional layout per ECMA-376 Part 1 §17.3.1.6
@@ -2330,8 +2367,8 @@ export class DocumentParser {
2330
2367
  }
2331
2368
 
2332
2369
  // Mirror indents per ECMA-376 Part 1 §17.3.1.18
2333
- if (pPrObj['w:mirrorIndents']) {
2334
- paragraph.setMirrorIndents(true);
2370
+ if (pPrObj['w:mirrorIndents'] !== undefined) {
2371
+ paragraph.setMirrorIndents(parseOoxmlBoolean(pPrObj['w:mirrorIndents']));
2335
2372
  }
2336
2373
 
2337
2374
  // Auto-adjust right indent per ECMA-376 Part 1 §17.3.1.1
@@ -2384,8 +2421,8 @@ export class DocumentParser {
2384
2421
  }
2385
2422
 
2386
2423
  // Suppress automatic hyphenation per ECMA-376 Part 1 §17.3.1.33
2387
- if (pPrObj['w:suppressAutoHyphens']) {
2388
- paragraph.setSuppressAutoHyphens(true);
2424
+ if (pPrObj['w:suppressAutoHyphens'] !== undefined) {
2425
+ paragraph.setSuppressAutoHyphens(parseOoxmlBoolean(pPrObj['w:suppressAutoHyphens']));
2389
2426
  }
2390
2427
 
2391
2428
  // CJK paragraph properties per ECMA-376 Part 1
@@ -2409,8 +2446,8 @@ export class DocumentParser {
2409
2446
  }
2410
2447
 
2411
2448
  // Suppress text frame overlap per ECMA-376 Part 1 §17.3.1.34
2412
- if (pPrObj['w:suppressOverlap']) {
2413
- paragraph.setSuppressOverlap(true);
2449
+ if (pPrObj['w:suppressOverlap'] !== undefined) {
2450
+ paragraph.setSuppressOverlap(parseOoxmlBoolean(pPrObj['w:suppressOverlap']));
2414
2451
  }
2415
2452
 
2416
2453
  // Textbox tight wrap per ECMA-376 Part 1 §17.3.1.37
@@ -2477,13 +2514,14 @@ export class DocumentParser {
2477
2514
  }
2478
2515
 
2479
2516
  // Parse previous indentation
2517
+ // Per ECMA-376 §17.3.1.15: w:start/w:end are bidi-aware alternatives to w:left/w:right
2480
2518
  if (prevPPr['w:ind']) {
2481
2519
  const ind = prevPPr['w:ind'];
2482
2520
  previousProperties.indentation = {};
2483
- if (ind['@_w:left'] !== undefined)
2484
- previousProperties.indentation.left = parseInt(ind['@_w:left'], 10);
2485
- if (ind['@_w:right'] !== undefined)
2486
- previousProperties.indentation.right = parseInt(ind['@_w:right'], 10);
2521
+ const leftVal = ind['@_w:start'] ?? ind['@_w:left'];
2522
+ const rightVal = ind['@_w:end'] ?? ind['@_w:right'];
2523
+ if (leftVal !== undefined) previousProperties.indentation.left = parseInt(leftVal, 10);
2524
+ if (rightVal !== undefined) previousProperties.indentation.right = parseInt(rightVal, 10);
2487
2525
  if (ind['@_w:firstLine'] !== undefined)
2488
2526
  previousProperties.indentation.firstLine = parseInt(ind['@_w:firstLine'], 10);
2489
2527
  if (ind['@_w:hanging'] !== undefined)
@@ -2495,7 +2533,7 @@ export class DocumentParser {
2495
2533
  previousProperties.alignment = String(prevPPr['w:jc']['@_w:val']);
2496
2534
  }
2497
2535
 
2498
- // Parse previous spacing
2536
+ // Parse previous spacing (all 8 CT_Spacing attributes per ECMA-376 §17.3.1.33)
2499
2537
  if (prevPPr['w:spacing']) {
2500
2538
  const spacing = prevPPr['w:spacing'];
2501
2539
  previousProperties.spacing = {};
@@ -2507,6 +2545,18 @@ export class DocumentParser {
2507
2545
  previousProperties.spacing.line = parseInt(spacing['@_w:line'], 10);
2508
2546
  if (spacing['@_w:lineRule'])
2509
2547
  previousProperties.spacing.lineRule = String(spacing['@_w:lineRule']);
2548
+ if (spacing['@_w:beforeLines'] !== undefined)
2549
+ previousProperties.spacing.beforeLines = parseInt(spacing['@_w:beforeLines'], 10);
2550
+ if (spacing['@_w:afterLines'] !== undefined)
2551
+ previousProperties.spacing.afterLines = parseInt(spacing['@_w:afterLines'], 10);
2552
+ const beforeAuto = spacing['@_w:beforeAutospacing'];
2553
+ if (beforeAuto !== undefined)
2554
+ previousProperties.spacing.beforeAutospacing =
2555
+ String(beforeAuto) === '1' || String(beforeAuto) === 'true';
2556
+ const afterAuto = spacing['@_w:afterAutospacing'];
2557
+ if (afterAuto !== undefined)
2558
+ previousProperties.spacing.afterAutospacing =
2559
+ String(afterAuto) === '1' || String(afterAuto) === 'true';
2510
2560
  }
2511
2561
 
2512
2562
  // Parse previous keepNext/keepLines/pageBreakBefore
@@ -4001,7 +4051,13 @@ export class DocumentParser {
4001
4051
  const brElements = toArray(runObj['w:br']);
4002
4052
  const brElement = brElements[elementIndex] || brElements[0];
4003
4053
  const breakType = brElement?.['@_w:type'] as BreakType | undefined;
4004
- content.push({ type: 'break', breakType });
4054
+ const breakClear = brElement?.['@_w:clear'] as
4055
+ | 'none'
4056
+ | 'left'
4057
+ | 'right'
4058
+ | 'all'
4059
+ | undefined;
4060
+ content.push({ type: 'break', breakType, breakClear });
4005
4061
  break;
4006
4062
  }
4007
4063
 
@@ -4210,7 +4266,13 @@ export class DocumentParser {
4210
4266
  if (runObj['w:br'] !== undefined) {
4211
4267
  const brElement = runObj['w:br'];
4212
4268
  const breakType = brElement?.['@_w:type'] as BreakType | undefined;
4213
- content.push({ type: 'break', breakType });
4269
+ const breakClear = brElement?.['@_w:clear'] as
4270
+ | 'none'
4271
+ | 'left'
4272
+ | 'right'
4273
+ | 'all'
4274
+ | undefined;
4275
+ content.push({ type: 'break', breakType, breakClear });
4214
4276
  }
4215
4277
 
4216
4278
  if (runObj['w:cr'] !== undefined) {
@@ -4453,22 +4515,11 @@ export class DocumentParser {
4453
4515
  }
4454
4516
  }
4455
4517
 
4456
- // Handle external hyperlinks with anchor fragments
4457
- // Microsoft Word can store URLs with the base in relationships and fragment in w:anchor
4458
- // Example: rels has "https://example.com/", anchor has "!/view?docid=abc-123"
4459
- // Combined: "https://example.com/#!/view?docid=abc-123"
4460
- // This is common for single-page applications with hash-based routing (theSource, etc.)
4461
- let finalAnchor = anchor;
4462
- let finalRelationshipId = relationshipId;
4463
- if (url && anchor) {
4464
- // Combine URL and anchor for external hyperlinks with fragments
4465
- url = url + '#' + anchor;
4466
- finalAnchor = undefined; // Clear anchor since it's now part of URL
4467
- // Clear relationshipId since the relationship points to the old base URL
4468
- // On save, a new relationship will be created with the combined URL
4469
- finalRelationshipId = undefined;
4470
- defaultLogger.debug(`[DocumentParser] Combined external URL with anchor fragment: ${url}`);
4471
- }
4518
+ // Per ECMA-376 §17.16.22, a hyperlink can have BOTH r:id (external URL) and w:anchor
4519
+ // (bookmark) simultaneously e.g., linking to a bookmark in an external document.
4520
+ // Preserve both attributes as-is; the serializer supports writing both.
4521
+ const finalAnchor = anchor;
4522
+ const finalRelationshipId = relationshipId;
4472
4523
 
4473
4524
  // Skip hyperlinks that have no destination (neither URL nor anchor nor relationship ID)
4474
4525
  // This can happen with malformed HYPERLINK field codes or corrupted documents
@@ -4780,29 +4831,19 @@ export class DocumentParser {
4780
4831
  if (val) run.setEmphasis(val);
4781
4832
  }
4782
4833
 
4783
- // Parse outline text effect (w:outline) per ECMA-376 Part 1 §17.3.2.23
4784
- if (rPrObj['w:outline']) run.setOutline(true);
4785
-
4786
- // Parse shadow text effect (w:shadow) per ECMA-376 Part 1 §17.3.2.32
4787
- if (rPrObj['w:shadow']) run.setShadow(true);
4788
-
4789
- // Parse emboss text effect (w:emboss) per ECMA-376 Part 1 §17.3.2.13
4790
- if (rPrObj['w:emboss']) run.setEmboss(true);
4791
-
4792
- // Parse imprint text effect (w:imprint) per ECMA-376 Part 1 §17.3.2.18
4793
- if (rPrObj['w:imprint']) run.setImprint(true);
4794
-
4795
- // Parse no proofing (w:noProof) per ECMA-376 Part 1 §17.3.2.21
4796
- if (rPrObj['w:noProof']) run.setNoProof(true);
4797
-
4798
- // Parse snap to grid (w:snapToGrid) per ECMA-376 Part 1 §17.3.2.35
4799
- if (rPrObj['w:snapToGrid']) run.setSnapToGrid(true);
4800
-
4801
- // Parse vanish/hidden (w:vanish) per ECMA-376 Part 1 §17.3.2.42
4802
- if (rPrObj['w:vanish']) run.setVanish(true);
4803
-
4804
- // Parse special vanish (w:specVanish) per ECMA-376 Part 1 §17.3.2.36
4805
- if (rPrObj['w:specVanish']) run.setSpecVanish(true);
4834
+ // Parse boolean text effects use parseOoxmlBoolean to correctly handle w:val="0"/"false"
4835
+ // Per ECMA-376, <w:xxx/> or <w:xxx w:val="1"/> = true; <w:xxx w:val="0"/> = false (explicit off)
4836
+ if (parseOoxmlBoolean(rPrObj['w:outline'])) run.setOutline(true);
4837
+ if (parseOoxmlBoolean(rPrObj['w:shadow'])) run.setShadow(true);
4838
+ if (parseOoxmlBoolean(rPrObj['w:emboss'])) run.setEmboss(true);
4839
+ if (parseOoxmlBoolean(rPrObj['w:imprint'])) run.setImprint(true);
4840
+ if (parseOoxmlBoolean(rPrObj['w:noProof'])) run.setNoProof(true);
4841
+ // snapToGrid: default when absent is true (§17.3.2.34), so explicit val="0" must be preserved
4842
+ if (rPrObj['w:snapToGrid'] !== undefined) {
4843
+ run.setSnapToGrid(parseOoxmlBoolean(rPrObj['w:snapToGrid']));
4844
+ }
4845
+ if (parseOoxmlBoolean(rPrObj['w:vanish'])) run.setVanish(true);
4846
+ if (parseOoxmlBoolean(rPrObj['w:specVanish'])) run.setSpecVanish(true);
4806
4847
 
4807
4848
  // Boolean properties - use parseOoxmlBoolean helper
4808
4849
  // Per ECMA-376: <w:b/> or <w:b w:val="1"/> or <w:b w:val="true"/> means true
@@ -4811,16 +4852,20 @@ export class DocumentParser {
4811
4852
  // Parse RTL text (w:rtl) per ECMA-376 Part 1 §17.3.2.30
4812
4853
  if (parseOoxmlBoolean(rPrObj['w:rtl'])) run.setRTL(true);
4813
4854
 
4814
- if (parseOoxmlBoolean(rPrObj['w:b'])) run.setBold(true);
4815
- if (parseOoxmlBoolean(rPrObj['w:bCs'])) run.setComplexScriptBold(true);
4816
- if (parseOoxmlBoolean(rPrObj['w:i'])) run.setItalic(true);
4817
- if (parseOoxmlBoolean(rPrObj['w:iCs'])) run.setComplexScriptItalic(true);
4818
- if (parseOoxmlBoolean(rPrObj['w:strike'])) run.setStrike(true);
4819
- if (parseOoxmlBoolean(rPrObj['w:dstrike'])) {
4820
- (run as any).formatting.dstrike = true;
4821
- }
4822
- if (parseOoxmlBoolean(rPrObj['w:smallCaps'])) run.setSmallCaps(true);
4823
- if (parseOoxmlBoolean(rPrObj['w:caps'])) run.setAllCaps(true);
4855
+ // b, bCs, i, iCs: preserve explicit val="0" to override style-inherited formatting
4856
+ if (rPrObj['w:b'] !== undefined) run.setBold(parseOoxmlBoolean(rPrObj['w:b']));
4857
+ if (rPrObj['w:bCs'] !== undefined) run.setComplexScriptBold(parseOoxmlBoolean(rPrObj['w:bCs']));
4858
+ if (rPrObj['w:i'] !== undefined) run.setItalic(parseOoxmlBoolean(rPrObj['w:i']));
4859
+ if (rPrObj['w:iCs'] !== undefined)
4860
+ run.setComplexScriptItalic(parseOoxmlBoolean(rPrObj['w:iCs']));
4861
+ // strike, dstrike, smallCaps, caps: preserve explicit val="0" to override style-inherited formatting
4862
+ if (rPrObj['w:strike'] !== undefined) run.setStrike(parseOoxmlBoolean(rPrObj['w:strike']));
4863
+ if (rPrObj['w:dstrike'] !== undefined) {
4864
+ (run as any).formatting.dstrike = parseOoxmlBoolean(rPrObj['w:dstrike']);
4865
+ }
4866
+ if (rPrObj['w:smallCaps'] !== undefined)
4867
+ run.setSmallCaps(parseOoxmlBoolean(rPrObj['w:smallCaps']));
4868
+ if (rPrObj['w:caps'] !== undefined) run.setAllCaps(parseOoxmlBoolean(rPrObj['w:caps']));
4824
4869
 
4825
4870
  // Parse complex script flag (w:cs) per ECMA-376 Part 1 §17.3.2.7
4826
4871
  if (parseOoxmlBoolean(rPrObj['w:cs'])) run.setComplexScript(true);
@@ -4870,10 +4915,21 @@ export class DocumentParser {
4870
4915
  if (val) run.setKerning(parseInt(val, 10));
4871
4916
  }
4872
4917
 
4873
- // Parse language (w:lang) per ECMA-376 Part 1 §17.3.2.20
4918
+ // Parse language (w:lang) per ECMA-376 Part 1 §17.3.2.20 (CT_Language)
4874
4919
  if (rPrObj['w:lang']) {
4875
- const val = rPrObj['w:lang']['@_w:val'];
4876
- if (val) run.setLanguage(val);
4920
+ const langObj = rPrObj['w:lang'];
4921
+ const val = langObj['@_w:val'];
4922
+ const eastAsia = langObj['@_w:eastAsia'];
4923
+ const bidi = langObj['@_w:bidi'];
4924
+ if (eastAsia || bidi) {
4925
+ run.setLanguage({
4926
+ val: val ? String(val) : undefined,
4927
+ eastAsia: eastAsia ? String(eastAsia) : undefined,
4928
+ bidi: bidi ? String(bidi) : undefined,
4929
+ });
4930
+ } else if (val) {
4931
+ run.setLanguage(String(val));
4932
+ }
4877
4933
  }
4878
4934
 
4879
4935
  // Parse East Asian layout (w:eastAsianLayout) per ECMA-376 Part 1 §17.3.2.10
@@ -4907,7 +4963,8 @@ export class DocumentParser {
4907
4963
  if (rPrObj['w:vertAlign']) {
4908
4964
  const val = rPrObj['w:vertAlign']['@_w:val'];
4909
4965
  if (val === 'subscript') run.setSubscript(true);
4910
- if (val === 'superscript') run.setSuperscript(true);
4966
+ else if (val === 'superscript') run.setSuperscript(true);
4967
+ else if (val === 'baseline') (run as any).formatting.vertAlignBaseline = true;
4911
4968
  }
4912
4969
 
4913
4970
  if (rPrObj['w:rFonts']) {
@@ -4946,10 +5003,15 @@ export class DocumentParser {
4946
5003
  if (rPrObj['w:color']) {
4947
5004
  const colorObj = rPrObj['w:color'];
4948
5005
  const colorVal = colorObj['@_w:val'];
4949
- // Skip special OOXML values like "auto" (automatic/inherit from style)
4950
- // "auto" is a valid OOXML color that means inherit - not a hex color
4951
- if (colorVal && colorVal !== 'auto') {
4952
- run.setColor(colorVal);
5006
+ // Per ECMA-376 §17.18.6, w:val can be a hex color OR the special value "auto"
5007
+ // "auto" means use the automatic/window text color must be preserved for round-trip
5008
+ if (colorVal) {
5009
+ if (colorVal === 'auto') {
5010
+ // Bypass normalizeColor() which rejects non-hex values
5011
+ (run as any).formatting.color = 'auto';
5012
+ } else {
5013
+ run.setColor(colorVal);
5014
+ }
4953
5015
  }
4954
5016
  // Parse theme color attributes per ECMA-376 Part 1 Section 17.3.2.6
4955
5017
  if (colorObj['@_w:themeColor']) {
@@ -5046,7 +5108,7 @@ export class DocumentParser {
5046
5108
  if (prevRPr['w:color']) {
5047
5109
  const colorObj = prevRPr['w:color'];
5048
5110
  const colorVal = colorObj['@_w:val'];
5049
- if (colorVal && colorVal !== 'auto') {
5111
+ if (colorVal) {
5050
5112
  prevProps.color = colorVal;
5051
5113
  }
5052
5114
  // Parse theme color attributes
@@ -5066,11 +5128,12 @@ export class DocumentParser {
5066
5128
  prevProps.highlight = prevRPr['w:highlight']['@_w:val'];
5067
5129
  }
5068
5130
 
5069
- // Parse previous subscript/superscript
5131
+ // Parse previous subscript/superscript/baseline per ECMA-376 §17.18.96
5070
5132
  if (prevRPr['w:vertAlign']) {
5071
5133
  const val = prevRPr['w:vertAlign']['@_w:val'];
5072
5134
  if (val === 'subscript') prevProps.subscript = true;
5073
- if (val === 'superscript') prevProps.superscript = true;
5135
+ else if (val === 'superscript') prevProps.superscript = true;
5136
+ else if (val === 'baseline') prevProps.vertAlignBaseline = true;
5074
5137
  }
5075
5138
 
5076
5139
  // Parse previous smallCaps/allCaps
@@ -5173,10 +5236,19 @@ export class DocumentParser {
5173
5236
  }
5174
5237
  }
5175
5238
 
5176
- // Parse language (w:lang @w:val)
5239
+ // Parse language (w:lang) per ECMA-376 CT_Language (w:val, w:eastAsia, w:bidi)
5177
5240
  if (prevRPr['w:lang']) {
5178
- const langVal = prevRPr['w:lang']['@_w:val'];
5179
- if (langVal) {
5241
+ const langObj = prevRPr['w:lang'];
5242
+ const langVal = langObj['@_w:val'];
5243
+ const langEastAsia = langObj['@_w:eastAsia'];
5244
+ const langBidi = langObj['@_w:bidi'];
5245
+ if (langEastAsia || langBidi) {
5246
+ prevProps.language = {
5247
+ val: langVal ? String(langVal) : undefined,
5248
+ eastAsia: langEastAsia ? String(langEastAsia) : undefined,
5249
+ bidi: langBidi ? String(langBidi) : undefined,
5250
+ };
5251
+ } else if (langVal) {
5180
5252
  prevProps.language = String(langVal);
5181
5253
  }
5182
5254
  }
@@ -6052,6 +6124,25 @@ export class DocumentParser {
6052
6124
  }
6053
6125
  }
6054
6126
 
6127
+ // Parse table grid change (w:tblGridChange) per ECMA-376 §17.13.5.35
6128
+ if (tableObj['w:tblGrid']?.['w:tblGridChange']) {
6129
+ const changeObj = tableObj['w:tblGrid']['w:tblGridChange'];
6130
+ const prevGridCols = changeObj['w:tblGrid']?.['w:gridCol'];
6131
+ if (prevGridCols) {
6132
+ const prevArray = Array.isArray(prevGridCols) ? prevGridCols : [prevGridCols];
6133
+ const prevWidths = prevArray.map((col: any) => ({
6134
+ width: isExplicitlySet(col['@_w:w']) ? safeParseInt(col['@_w:w'], 2880) : 2880,
6135
+ }));
6136
+ const gridChange = TableGridChange.create(
6137
+ safeParseInt(changeObj['@_w:id'], 0),
6138
+ prevWidths,
6139
+ changeObj['@_w:author'] || undefined,
6140
+ changeObj['@_w:date'] ? new Date(changeObj['@_w:date']) : undefined
6141
+ );
6142
+ table.setTblGridChange(gridChange);
6143
+ }
6144
+ }
6145
+
6055
6146
  // Parse table rows (w:tr)
6056
6147
  const rows = tableObj['w:tr'];
6057
6148
  const rowChildren = Array.isArray(rows) ? rows : rows ? [rows] : [];
@@ -6158,7 +6249,7 @@ export class DocumentParser {
6158
6249
  table.setTblLook(look['@_w:val']);
6159
6250
  } else {
6160
6251
  // Individual attribute format - construct hex value
6161
- // Per ECMA-376: bit 0=firstRow, 1=lastRow, 2=firstCol, 3=lastCol, 4=noHBand, 5=noVBand
6252
+ // Per ECMA-376 §17.4.57: bit5=firstRow, bit6=lastRow, bit7=firstCol, bit8=lastCol, bit9=noHBand, bit10=noVBand
6162
6253
  let value = 0;
6163
6254
  if (look['@_w:firstRow'] === '1') value |= 0x0020;
6164
6255
  if (look['@_w:lastRow'] === '1') value |= 0x0040;
@@ -6249,9 +6340,14 @@ export class DocumentParser {
6249
6340
  if (tblPrObj['w:tblInd']) {
6250
6341
  const indentVal = safeParseInt(tblPrObj['w:tblInd']['@_w:w'], 0);
6251
6342
  table.setIndent(indentVal);
6343
+ const indentType = tblPrObj['w:tblInd']['@_w:type'];
6344
+ if (indentType) {
6345
+ table.setIndentType(indentType as import('../elements/Table').TableWidthType);
6346
+ }
6252
6347
  }
6253
6348
 
6254
6349
  // Parse table cell margins (w:tblCellMar) per ECMA-376 Part 1 §17.4.42
6350
+ // Supports both legacy w:left/w:right and bidi-aware w:start/w:end (w:start takes precedence)
6255
6351
  if (tblPrObj['w:tblCellMar']) {
6256
6352
  const cellMar = tblPrObj['w:tblCellMar'];
6257
6353
  const margins: { top?: number; bottom?: number; left?: number; right?: number } = {};
@@ -6264,12 +6360,14 @@ export class DocumentParser {
6264
6360
  const w = cellMar['w:bottom']['@_w:w'];
6265
6361
  if (w !== undefined) margins.bottom = parseInt(w, 10);
6266
6362
  }
6267
- if (cellMar['w:left']) {
6268
- const w = cellMar['w:left']['@_w:w'];
6363
+ const leftSource = cellMar['w:start'] || cellMar['w:left'];
6364
+ if (leftSource) {
6365
+ const w = leftSource['@_w:w'];
6269
6366
  if (w !== undefined) margins.left = parseInt(w, 10);
6270
6367
  }
6271
- if (cellMar['w:right']) {
6272
- const w = cellMar['w:right']['@_w:w'];
6368
+ const rightSource = cellMar['w:end'] || cellMar['w:right'];
6369
+ if (rightSource) {
6370
+ const w = rightSource['@_w:w'];
6273
6371
  if (w !== undefined) margins.right = parseInt(w, 10);
6274
6372
  }
6275
6373
 
@@ -6413,11 +6511,21 @@ export class DocumentParser {
6413
6511
  if (!trPrObj) return;
6414
6512
 
6415
6513
  // Parse row height (w:trHeight) per ECMA-376 Part 1 §17.4.81
6514
+ // Per §17.18.33 (ST_HeightRule), when w:hRule is absent the default is "auto"
6416
6515
  if (trPrObj['w:trHeight']) {
6417
6516
  const heightVal = parseInt(trPrObj['w:trHeight']['@_w:val'] || '0', 10);
6418
- const heightRule = trPrObj['w:trHeight']['@_w:hRule'] || 'atLeast';
6517
+ const heightRule = trPrObj['w:trHeight']['@_w:hRule'];
6419
6518
  if (heightVal > 0) {
6420
- row.setHeight(heightVal, heightRule);
6519
+ // Set height without defaulting hRule — setHeight defaults to 'atLeast'
6520
+ // so we set height first, then override the rule only if explicitly present
6521
+ row.setHeight(heightVal);
6522
+ if (heightRule) {
6523
+ row.setHeightRule(heightRule);
6524
+ } else {
6525
+ // When w:hRule is absent, clear the defaulted rule so the generator omits it,
6526
+ // preserving round-trip fidelity (absent = "auto" per ECMA-376 §17.18.33)
6527
+ row.setHeightRule(undefined);
6528
+ }
6421
6529
  }
6422
6530
  }
6423
6531
 
@@ -6657,6 +6765,7 @@ export class DocumentParser {
6657
6765
  }
6658
6766
 
6659
6767
  // Parse cell margins (w:tcMar) per ECMA-376 Part 1 §17.4.43
6768
+ // Supports both legacy w:left/w:right and bidi-aware w:start/w:end (w:start takes precedence)
6660
6769
  if (tcPr['w:tcMar']) {
6661
6770
  const tcMar = tcPr['w:tcMar'];
6662
6771
  const margins: any = {};
@@ -6667,11 +6776,13 @@ export class DocumentParser {
6667
6776
  if (tcMar['w:bottom']) {
6668
6777
  margins.bottom = parseInt(tcMar['w:bottom']['@_w:w'] || '0', 10);
6669
6778
  }
6670
- if (tcMar['w:left']) {
6671
- margins.left = parseInt(tcMar['w:left']['@_w:w'] || '0', 10);
6779
+ const leftSrc = tcMar['w:start'] || tcMar['w:left'];
6780
+ if (leftSrc) {
6781
+ margins.left = parseInt(leftSrc['@_w:w'] || '0', 10);
6672
6782
  }
6673
- if (tcMar['w:right']) {
6674
- margins.right = parseInt(tcMar['w:right']['@_w:w'] || '0', 10);
6783
+ const rightSrc = tcMar['w:end'] || tcMar['w:right'];
6784
+ if (rightSrc) {
6785
+ margins.right = parseInt(rightSrc['@_w:w'] || '0', 10);
6675
6786
  }
6676
6787
 
6677
6788
  if (Object.keys(margins).length > 0) {
@@ -7599,7 +7710,7 @@ export class DocumentParser {
7599
7710
  */
7600
7711
  private parseTOCFromSDTContent(
7601
7712
  content: any[],
7602
- properties: any,
7713
+ _properties: any,
7603
7714
  sdtContent: any
7604
7715
  ): TableOfContents | null {
7605
7716
  try {
@@ -8154,12 +8265,14 @@ export class DocumentParser {
8154
8265
  const width = XMLParser.extractAttribute(pgSz, 'w:w');
8155
8266
  const height = XMLParser.extractAttribute(pgSz, 'w:h');
8156
8267
  const orient = XMLParser.extractAttribute(pgSz, 'w:orient');
8268
+ const code = XMLParser.extractAttribute(pgSz, 'w:code');
8157
8269
 
8158
8270
  if (width && height) {
8159
8271
  sectionProps.pageSize = {
8160
8272
  width: parseInt(width, 10),
8161
8273
  height: parseInt(height, 10),
8162
8274
  orientation: orient === 'landscape' ? 'landscape' : 'portrait',
8275
+ code: code ? parseInt(code, 10) : undefined,
8163
8276
  };
8164
8277
  }
8165
8278
  }
@@ -8252,14 +8365,23 @@ export class DocumentParser {
8252
8365
  const equalWidth = XMLParser.extractAttribute(cols, 'w:equalWidth');
8253
8366
  const sep = XMLParser.extractAttribute(cols, 'w:sep');
8254
8367
 
8255
- // Extract individual column widths
8368
+ // Extract individual column widths and per-column spacing (CT_Column: w:w, w:space)
8256
8369
  const colElements = XMLParser.extractElements(cols, 'w:col');
8257
8370
  const columnWidths: number[] = [];
8371
+ const columnSpaces: number[] = [];
8372
+ let hasColumnSpaces = false;
8258
8373
  for (const col of colElements) {
8259
8374
  const width = XMLParser.extractAttribute(col, 'w:w');
8260
8375
  if (width) {
8261
8376
  columnWidths.push(parseInt(width.toString(), 10));
8262
8377
  }
8378
+ const colSpace = XMLParser.extractAttribute(col, 'w:space');
8379
+ if (colSpace) {
8380
+ columnSpaces.push(parseInt(colSpace.toString(), 10));
8381
+ hasColumnSpaces = true;
8382
+ } else {
8383
+ columnSpaces.push(0);
8384
+ }
8263
8385
  }
8264
8386
 
8265
8387
  // Helper to handle boolean conversion (XMLParser may return string or number)
@@ -8272,6 +8394,7 @@ export class DocumentParser {
8272
8394
  equalWidth: equalWidth ? toBool(equalWidth) : undefined,
8273
8395
  separator: sep ? toBool(sep) : undefined,
8274
8396
  columnWidths: columnWidths.length > 0 ? columnWidths : undefined,
8397
+ columnSpaces: hasColumnSpaces ? columnSpaces : undefined,
8275
8398
  };
8276
8399
  }
8277
8400
  }
@@ -8732,13 +8855,18 @@ export class DocumentParser {
8732
8855
  }
8733
8856
  }
8734
8857
 
8735
- // Parse spacing (w:spacing)
8858
+ // Parse spacing (w:spacing) — all 8 CT_Spacing attributes per ECMA-376 §17.3.1.33
8736
8859
  const spacingElement = XMLParser.extractSelfClosingTag(pPrXml, 'w:spacing');
8737
8860
  if (spacingElement) {
8738
- const before = XMLParser.extractAttribute(`<w:spacing${spacingElement}`, 'w:before');
8739
- const after = XMLParser.extractAttribute(`<w:spacing${spacingElement}`, 'w:after');
8740
- const line = XMLParser.extractAttribute(`<w:spacing${spacingElement}`, 'w:line');
8741
- const lineRule = XMLParser.extractAttribute(`<w:spacing${spacingElement}`, 'w:lineRule');
8861
+ const spacingTag = `<w:spacing${spacingElement}`;
8862
+ const before = XMLParser.extractAttribute(spacingTag, 'w:before');
8863
+ const after = XMLParser.extractAttribute(spacingTag, 'w:after');
8864
+ const line = XMLParser.extractAttribute(spacingTag, 'w:line');
8865
+ const lineRule = XMLParser.extractAttribute(spacingTag, 'w:lineRule');
8866
+ const beforeLines = XMLParser.extractAttribute(spacingTag, 'w:beforeLines');
8867
+ const afterLines = XMLParser.extractAttribute(spacingTag, 'w:afterLines');
8868
+ const beforeAutosp = XMLParser.extractAttribute(spacingTag, 'w:beforeAutospacing');
8869
+ const afterAutosp = XMLParser.extractAttribute(spacingTag, 'w:afterAutospacing');
8742
8870
 
8743
8871
  // Validate lineRule
8744
8872
  let validatedLineRule: 'auto' | 'exact' | 'atLeast' | undefined;
@@ -8755,20 +8883,33 @@ export class DocumentParser {
8755
8883
  // If lineRule exists without line, use default 240 twips
8756
8884
  line: line ? parseInt(line, 10) : validatedLineRule ? 240 : undefined,
8757
8885
  lineRule: validatedLineRule,
8886
+ beforeLines: beforeLines ? parseInt(beforeLines, 10) : undefined,
8887
+ afterLines: afterLines ? parseInt(afterLines, 10) : undefined,
8888
+ beforeAutospacing: beforeAutosp
8889
+ ? beforeAutosp === '1' || beforeAutosp === 'true'
8890
+ : undefined,
8891
+ afterAutospacing: afterAutosp ? afterAutosp === '1' || afterAutosp === 'true' : undefined,
8758
8892
  };
8759
8893
  }
8760
8894
 
8761
8895
  // Parse indentation (w:ind)
8896
+ // Per ECMA-376 §17.3.1.15: w:start/w:end are bidi-aware alternatives to w:left/w:right
8762
8897
  const indElement = XMLParser.extractSelfClosingTag(pPrXml, 'w:ind');
8763
8898
  if (indElement) {
8764
- const left = XMLParser.extractAttribute(`<w:ind${indElement}`, 'w:left');
8765
- const right = XMLParser.extractAttribute(`<w:ind${indElement}`, 'w:right');
8766
- const firstLine = XMLParser.extractAttribute(`<w:ind${indElement}`, 'w:firstLine');
8767
- const hanging = XMLParser.extractAttribute(`<w:ind${indElement}`, 'w:hanging');
8899
+ const indTag = `<w:ind${indElement}`;
8900
+ const start = XMLParser.extractAttribute(indTag, 'w:start');
8901
+ const left = XMLParser.extractAttribute(indTag, 'w:left');
8902
+ const end = XMLParser.extractAttribute(indTag, 'w:end');
8903
+ const right = XMLParser.extractAttribute(indTag, 'w:right');
8904
+ const firstLine = XMLParser.extractAttribute(indTag, 'w:firstLine');
8905
+ const hanging = XMLParser.extractAttribute(indTag, 'w:hanging');
8906
+
8907
+ const leftVal = start || left;
8908
+ const rightVal = end || right;
8768
8909
 
8769
8910
  formatting.indentation = {
8770
- left: left ? parseInt(left, 10) : undefined,
8771
- right: right ? parseInt(right, 10) : undefined,
8911
+ left: leftVal ? parseInt(leftVal, 10) : undefined,
8912
+ right: rightVal ? parseInt(rightVal, 10) : undefined,
8772
8913
  firstLine: firstLine ? parseInt(firstLine, 10) : undefined,
8773
8914
  hanging: hanging ? parseInt(hanging, 10) : undefined,
8774
8915
  };
@@ -8804,6 +8945,54 @@ export class DocumentParser {
8804
8945
  }
8805
8946
  }
8806
8947
 
8948
+ // Parse paragraph borders (w:pBdr) per ECMA-376 Part 1 §17.3.1.24
8949
+ const pBdrXml = XMLParser.extractBetweenTags(pPrXml, '<w:pBdr>', '</w:pBdr>');
8950
+ if (pBdrXml) {
8951
+ const borders: any = {};
8952
+ const borderTypes = ['top', 'left', 'bottom', 'right', 'between', 'bar'];
8953
+ for (const type of borderTypes) {
8954
+ if (pBdrXml.includes(`<w:${type}`)) {
8955
+ const tag = XMLParser.extractSelfClosingTag(pBdrXml, `w:${type}`);
8956
+ if (tag) {
8957
+ const bTag = `<w:${type}${tag}`;
8958
+ const style = XMLParser.extractAttribute(bTag, 'w:val');
8959
+ const size = XMLParser.extractAttribute(bTag, 'w:sz');
8960
+ const space = XMLParser.extractAttribute(bTag, 'w:space');
8961
+ const color = XMLParser.extractAttribute(bTag, 'w:color');
8962
+ const border: any = {};
8963
+ if (style) border.style = style;
8964
+ if (size) border.size = parseInt(size, 10);
8965
+ if (space) border.space = parseInt(space, 10);
8966
+ if (color) border.color = color;
8967
+ if (Object.keys(border).length > 0) borders[type] = border;
8968
+ }
8969
+ }
8970
+ }
8971
+ if (Object.keys(borders).length > 0) formatting.borders = borders;
8972
+ }
8973
+
8974
+ // Parse tab stops (w:tabs) per ECMA-376 Part 1 §17.3.1.38
8975
+ const tabsXml = XMLParser.extractBetweenTags(pPrXml, '<w:tabs>', '</w:tabs>');
8976
+ if (tabsXml) {
8977
+ const tabs: any[] = [];
8978
+ // Extract all w:tab elements
8979
+ const tabRegex = /<w:tab\s[^>]*\/>/g;
8980
+ let tabMatch;
8981
+ while ((tabMatch = tabRegex.exec(tabsXml)) !== null) {
8982
+ const tabTag = tabMatch[0];
8983
+ const pos = XMLParser.extractAttribute(tabTag, 'w:pos');
8984
+ const val = XMLParser.extractAttribute(tabTag, 'w:val');
8985
+ const leader = XMLParser.extractAttribute(tabTag, 'w:leader');
8986
+ if (pos) {
8987
+ const tab: any = { position: parseInt(pos, 10) };
8988
+ if (val) tab.val = val;
8989
+ if (leader) tab.leader = leader;
8990
+ tabs.push(tab);
8991
+ }
8992
+ }
8993
+ if (tabs.length > 0) formatting.tabs = tabs;
8994
+ }
8995
+
8807
8996
  // Parse shading (w:shd) per ECMA-376 Part 1 §17.3.1.32
8808
8997
  const shading = this.parseShadingFromXml(pPrXml);
8809
8998
  if (shading) {
@@ -8838,10 +9027,11 @@ export class DocumentParser {
8838
9027
  formatting.allCaps = true;
8839
9028
  }
8840
9029
 
8841
- // Parse underline - use extractSelfClosingTag for accuracy
9030
+ // Parse underline all attributes per ECMA-376 §17.3.2.40
8842
9031
  const uElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:u');
8843
9032
  if (uElement) {
8844
- const uVal = XMLParser.extractAttribute(`<w:u${uElement}`, 'w:val');
9033
+ const uTag = `<w:u${uElement}`;
9034
+ const uVal = XMLParser.extractAttribute(uTag, 'w:val');
8845
9035
  if (
8846
9036
  uVal === 'single' ||
8847
9037
  uVal === 'double' ||
@@ -8854,9 +9044,19 @@ export class DocumentParser {
8854
9044
  } else {
8855
9045
  formatting.underline = true;
8856
9046
  }
9047
+ const uColor = XMLParser.extractAttribute(uTag, 'w:color');
9048
+ if (uColor) formatting.underlineColor = uColor;
9049
+ const uThemeColor = XMLParser.extractAttribute(uTag, 'w:themeColor');
9050
+ if (uThemeColor) {
9051
+ formatting.underlineThemeColor = uThemeColor as import('../elements/Run').ThemeColorValue;
9052
+ }
9053
+ const uThemeTint = XMLParser.extractAttribute(uTag, 'w:themeTint');
9054
+ if (uThemeTint) formatting.underlineThemeTint = parseInt(uThemeTint, 16);
9055
+ const uThemeShade = XMLParser.extractAttribute(uTag, 'w:themeShade');
9056
+ if (uThemeShade) formatting.underlineThemeShade = parseInt(uThemeShade, 16);
8857
9057
  }
8858
9058
 
8859
- // Parse subscript/superscript - use extractSelfClosingTag
9059
+ // Parse subscript/superscript/baseline per ECMA-376 §17.18.96 (ST_VerticalAlignRun)
8860
9060
  const vertAlignElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:vertAlign');
8861
9061
  if (vertAlignElement) {
8862
9062
  const val = XMLParser.extractAttribute(`<w:vertAlign${vertAlignElement}`, 'w:val');
@@ -8864,16 +9064,33 @@ export class DocumentParser {
8864
9064
  formatting.subscript = true;
8865
9065
  } else if (val === 'superscript') {
8866
9066
  formatting.superscript = true;
9067
+ } else if (val === 'baseline') {
9068
+ formatting.vertAlignBaseline = true;
8867
9069
  }
8868
9070
  }
8869
9071
 
8870
- // Parse font (w:rFonts) - use extractSelfClosingTag
9072
+ // Parse font (w:rFonts) — all attributes per ECMA-376 §17.3.2.26
8871
9073
  const rFontsElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:rFonts');
8872
9074
  if (rFontsElement) {
8873
- const ascii = XMLParser.extractAttribute(`<w:rFonts${rFontsElement}`, 'w:ascii');
8874
- if (ascii) {
8875
- formatting.font = ascii;
8876
- }
9075
+ const rFontsTag = `<w:rFonts${rFontsElement}`;
9076
+ const ascii = XMLParser.extractAttribute(rFontsTag, 'w:ascii');
9077
+ if (ascii) formatting.font = ascii;
9078
+ const hAnsi = XMLParser.extractAttribute(rFontsTag, 'w:hAnsi');
9079
+ if (hAnsi) formatting.fontHAnsi = hAnsi;
9080
+ const eastAsia = XMLParser.extractAttribute(rFontsTag, 'w:eastAsia');
9081
+ if (eastAsia) formatting.fontEastAsia = eastAsia;
9082
+ const cs = XMLParser.extractAttribute(rFontsTag, 'w:cs');
9083
+ if (cs) formatting.fontCs = cs;
9084
+ const hint = XMLParser.extractAttribute(rFontsTag, 'w:hint');
9085
+ if (hint) formatting.fontHint = hint;
9086
+ const asciiTheme = XMLParser.extractAttribute(rFontsTag, 'w:asciiTheme');
9087
+ if (asciiTheme) formatting.fontAsciiTheme = asciiTheme;
9088
+ const hAnsiTheme = XMLParser.extractAttribute(rFontsTag, 'w:hAnsiTheme');
9089
+ if (hAnsiTheme) formatting.fontHAnsiTheme = hAnsiTheme;
9090
+ const eastAsiaTheme = XMLParser.extractAttribute(rFontsTag, 'w:eastAsiaTheme');
9091
+ if (eastAsiaTheme) formatting.fontEastAsiaTheme = eastAsiaTheme;
9092
+ const cstheme = XMLParser.extractAttribute(rFontsTag, 'w:cstheme');
9093
+ if (cstheme) formatting.fontCsTheme = cstheme;
8877
9094
  }
8878
9095
 
8879
9096
  // Parse size (w:sz) - size is in half-points
@@ -8886,14 +9103,38 @@ export class DocumentParser {
8886
9103
  }
8887
9104
  }
8888
9105
 
8889
- // Parse color (w:color)
8890
- // Use extractSelfClosingTag to avoid matching other tags
9106
+ // Parse complex script size (w:szCs) per ECMA-376 §17.3.2.40
9107
+ const szCsElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:szCs');
9108
+ if (szCsElement) {
9109
+ const val = XMLParser.extractAttribute(`<w:szCs${szCsElement}`, 'w:val');
9110
+ if (val) {
9111
+ const szCsVal = halfPointsToPoints(parseInt(val, 10));
9112
+ if (formatting.size === undefined || szCsVal !== formatting.size) {
9113
+ formatting.sizeCs = szCsVal;
9114
+ }
9115
+ }
9116
+ }
9117
+
9118
+ // Parse color (w:color) — all attributes per ECMA-376 §17.3.2.6
8891
9119
  const colorElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:color');
8892
9120
  if (colorElement) {
8893
- const val = XMLParser.extractAttribute(`<w:color${colorElement}`, 'w:val');
9121
+ const colorTag = `<w:color${colorElement}`;
9122
+ const val = XMLParser.extractAttribute(colorTag, 'w:val');
8894
9123
  if (val && val !== 'auto') {
8895
9124
  formatting.color = val;
8896
9125
  }
9126
+ const themeColor = XMLParser.extractAttribute(colorTag, 'w:themeColor');
9127
+ if (themeColor) {
9128
+ formatting.themeColor = themeColor as import('../elements/Run').ThemeColorValue;
9129
+ }
9130
+ const themeTint = XMLParser.extractAttribute(colorTag, 'w:themeTint');
9131
+ if (themeTint) {
9132
+ formatting.themeTint = parseInt(themeTint, 16);
9133
+ }
9134
+ const themeShade = XMLParser.extractAttribute(colorTag, 'w:themeShade');
9135
+ if (themeShade) {
9136
+ formatting.themeShade = parseInt(themeShade, 16);
9137
+ }
8897
9138
  }
8898
9139
 
8899
9140
  // Parse highlight (w:highlight) - use extractSelfClosingTag
@@ -8918,6 +9159,7 @@ export class DocumentParser {
8918
9159
  'lightGray',
8919
9160
  'black',
8920
9161
  'white',
9162
+ 'none',
8921
9163
  ];
8922
9164
  if (validHighlights.includes(val)) {
8923
9165
  formatting.highlight = val as
@@ -8936,7 +9178,8 @@ export class DocumentParser {
8936
9178
  | 'darkGray'
8937
9179
  | 'lightGray'
8938
9180
  | 'black'
8939
- | 'white';
9181
+ | 'white'
9182
+ | 'none';
8940
9183
  }
8941
9184
  }
8942
9185
  }
@@ -9014,14 +9257,19 @@ export class DocumentParser {
9014
9257
  ): import('../formatting/Style').TableStyleFormatting {
9015
9258
  const formatting: import('../formatting/Style').TableStyleFormatting = {};
9016
9259
 
9017
- // Parse indent
9260
+ // Parse indent (w:tblInd) — preserve w:type per ECMA-376 ST_TblWidth
9018
9261
  if (tblPrXml.includes('<w:tblInd')) {
9019
9262
  const tag = XMLParser.extractSelfClosingTag(tblPrXml, 'w:tblInd');
9020
9263
  if (tag) {
9021
- const w = XMLParser.extractAttribute(`<w:tblInd${tag}`, 'w:w');
9264
+ const tblIndTag = `<w:tblInd${tag}`;
9265
+ const w = XMLParser.extractAttribute(tblIndTag, 'w:w');
9022
9266
  if (w) {
9023
9267
  formatting.indent = parseInt(w, 10);
9024
9268
  }
9269
+ const type = XMLParser.extractAttribute(tblIndTag, 'w:type');
9270
+ if (type) {
9271
+ formatting.indentType = type as import('../elements/Table').TableWidthType;
9272
+ }
9025
9273
  }
9026
9274
  }
9027
9275
 
@@ -9331,19 +9579,43 @@ export class DocumentParser {
9331
9579
  ): import('../formatting/Style').CellMargins | undefined {
9332
9580
  const margins: import('../formatting/Style').CellMargins = {};
9333
9581
 
9334
- const marginTypes = ['top', 'bottom', 'left', 'right'];
9335
- for (const type of marginTypes) {
9582
+ // Parse top and bottom directly
9583
+ for (const type of ['top', 'bottom'] as const) {
9336
9584
  if (marginXml.includes(`<w:${type}`)) {
9337
9585
  const tag = XMLParser.extractSelfClosingTag(marginXml, `w:${type}`);
9338
9586
  if (tag) {
9339
9587
  const w = XMLParser.extractAttribute(`<w:${type}${tag}`, 'w:w');
9340
9588
  if (w) {
9341
- margins[type as keyof import('../formatting/Style').CellMargins] = parseInt(w, 10);
9589
+ margins[type] = parseInt(w, 10);
9342
9590
  }
9343
9591
  }
9344
9592
  }
9345
9593
  }
9346
9594
 
9595
+ // Parse left/right with bidi-aware w:start/w:end fallback (ECMA-376 §17.4.42/§17.4.43)
9596
+ // w:start takes precedence over w:left; w:end takes precedence over w:right
9597
+ const leftTag = marginXml.includes('<w:start')
9598
+ ? XMLParser.extractSelfClosingTag(marginXml, 'w:start')
9599
+ : XMLParser.extractSelfClosingTag(marginXml, 'w:left');
9600
+ if (leftTag) {
9601
+ const tagName = marginXml.includes('<w:start') ? 'w:start' : 'w:left';
9602
+ const w = XMLParser.extractAttribute(`<${tagName}${leftTag}`, 'w:w');
9603
+ if (w) {
9604
+ margins.left = parseInt(w, 10);
9605
+ }
9606
+ }
9607
+
9608
+ const rightTag = marginXml.includes('<w:end')
9609
+ ? XMLParser.extractSelfClosingTag(marginXml, 'w:end')
9610
+ : XMLParser.extractSelfClosingTag(marginXml, 'w:right');
9611
+ if (rightTag) {
9612
+ const tagName = marginXml.includes('<w:end') ? 'w:end' : 'w:right';
9613
+ const w = XMLParser.extractAttribute(`<${tagName}${rightTag}`, 'w:w');
9614
+ if (w) {
9615
+ margins.right = parseInt(w, 10);
9616
+ }
9617
+ }
9618
+
9347
9619
  return Object.keys(margins).length > 0 ? margins : undefined;
9348
9620
  }
9349
9621
 
@@ -9854,6 +10126,40 @@ export class DocumentParser {
9854
10126
  if (propsObj['w:tblStyle']) {
9855
10127
  result.style = propsObj['w:tblStyle']['@_w:val'] || '';
9856
10128
  }
10129
+ // tblpPr (floating table position)
10130
+ if (propsObj['w:tblpPr']) {
10131
+ const tblpPr = propsObj['w:tblpPr'];
10132
+ const pos: any = {};
10133
+ if (tblpPr['@_w:tblpX']) pos.x = parseInt(tblpPr['@_w:tblpX'], 10);
10134
+ if (tblpPr['@_w:tblpY']) pos.y = parseInt(tblpPr['@_w:tblpY'], 10);
10135
+ if (tblpPr['@_w:horzAnchor']) pos.horizontalAnchor = tblpPr['@_w:horzAnchor'];
10136
+ if (tblpPr['@_w:vertAnchor']) pos.verticalAnchor = tblpPr['@_w:vertAnchor'];
10137
+ if (tblpPr['@_w:leftFromText']) pos.leftFromText = parseInt(tblpPr['@_w:leftFromText'], 10);
10138
+ if (tblpPr['@_w:rightFromText'])
10139
+ pos.rightFromText = parseInt(tblpPr['@_w:rightFromText'], 10);
10140
+ if (tblpPr['@_w:topFromText']) pos.topFromText = parseInt(tblpPr['@_w:topFromText'], 10);
10141
+ if (tblpPr['@_w:bottomFromText'])
10142
+ pos.bottomFromText = parseInt(tblpPr['@_w:bottomFromText'], 10);
10143
+ if (Object.keys(pos).length > 0) result.position = pos;
10144
+ }
10145
+ if (propsObj['w:tblOverlap']) {
10146
+ result.overlap = propsObj['w:tblOverlap']['@_w:val'];
10147
+ }
10148
+ if (propsObj['w:bidiVisual']) {
10149
+ result.bidiVisual = true;
10150
+ }
10151
+ if (propsObj['w:tblStyleRowBandSize']) {
10152
+ result.tblStyleRowBandSize = parseInt(
10153
+ propsObj['w:tblStyleRowBandSize']['@_w:val'] || '1',
10154
+ 10
10155
+ );
10156
+ }
10157
+ if (propsObj['w:tblStyleColBandSize']) {
10158
+ result.tblStyleColBandSize = parseInt(
10159
+ propsObj['w:tblStyleColBandSize']['@_w:val'] || '1',
10160
+ 10
10161
+ );
10162
+ }
9857
10163
  if (propsObj['w:tblW']) {
9858
10164
  result.width = parseInt(propsObj['w:tblW']['@_w:w'] || '0', 10);
9859
10165
  result.widthType = propsObj['w:tblW']['@_w:type'] || 'dxa';
@@ -9863,9 +10169,24 @@ export class DocumentParser {
9863
10169
  }
9864
10170
  if (propsObj['w:tblInd']) {
9865
10171
  result.indent = parseInt(propsObj['w:tblInd']['@_w:w'] || '0', 10);
10172
+ const indType = propsObj['w:tblInd']['@_w:type'];
10173
+ if (indType) result.indentType = indType;
9866
10174
  }
9867
10175
  if (propsObj['w:tblCellSpacing']) {
9868
10176
  result.cellSpacing = parseInt(propsObj['w:tblCellSpacing']['@_w:w'] || '0', 10);
10177
+ const csType = propsObj['w:tblCellSpacing']['@_w:type'];
10178
+ if (csType) result.cellSpacingType = csType;
10179
+ }
10180
+ if (propsObj['w:tblCellMar']) {
10181
+ const cellMar = propsObj['w:tblCellMar'];
10182
+ const margins: any = {};
10183
+ if (cellMar['w:top']) margins.top = parseInt(cellMar['w:top']['@_w:w'] || '0', 10);
10184
+ if (cellMar['w:bottom']) margins.bottom = parseInt(cellMar['w:bottom']['@_w:w'] || '0', 10);
10185
+ const leftSrc = cellMar['w:start'] || cellMar['w:left'];
10186
+ if (leftSrc) margins.left = parseInt(leftSrc['@_w:w'] || '0', 10);
10187
+ const rightSrc = cellMar['w:end'] || cellMar['w:right'];
10188
+ if (rightSrc) margins.right = parseInt(rightSrc['@_w:w'] || '0', 10);
10189
+ if (Object.keys(margins).length > 0) result.cellMargins = margins;
9869
10190
  }
9870
10191
  if (propsObj['w:tblBorders']) {
9871
10192
  const borders: any = {};
@@ -9877,8 +10198,40 @@ export class DocumentParser {
9877
10198
  }
9878
10199
  if (Object.keys(borders).length > 0) result.borders = borders;
9879
10200
  }
10201
+ if (propsObj['w:tblLook']) {
10202
+ const look = propsObj['w:tblLook'];
10203
+ result.tblLook = look['@_w:val'] || '0000';
10204
+ }
10205
+ if (propsObj['w:tblCaption']) {
10206
+ result.caption = propsObj['w:tblCaption']['@_w:val'];
10207
+ }
10208
+ if (propsObj['w:tblDescription']) {
10209
+ result.description = propsObj['w:tblDescription']['@_w:val'];
10210
+ }
9880
10211
 
9881
- // Row-level properties (w:trPr context)
10212
+ // Row-level properties (w:trPr context) — all CT_TrPr elements
10213
+ if (propsObj['w:cnfStyle']) {
10214
+ result.cnfStyle = propsObj['w:cnfStyle']['@_w:val'];
10215
+ }
10216
+ if (propsObj['w:divId']) {
10217
+ result.divId = propsObj['w:divId']['@_w:val'];
10218
+ }
10219
+ if (propsObj['w:gridBefore']) {
10220
+ result.gridBefore = parseInt(propsObj['w:gridBefore']['@_w:val'] || '0', 10);
10221
+ }
10222
+ if (propsObj['w:gridAfter']) {
10223
+ result.gridAfter = parseInt(propsObj['w:gridAfter']['@_w:val'] || '0', 10);
10224
+ }
10225
+ if (propsObj['w:wBefore']) {
10226
+ result.wBefore = parseInt(propsObj['w:wBefore']['@_w:w'] || '0', 10);
10227
+ const wbType = propsObj['w:wBefore']['@_w:type'];
10228
+ if (wbType) result.wBeforeType = wbType;
10229
+ }
10230
+ if (propsObj['w:wAfter']) {
10231
+ result.wAfter = parseInt(propsObj['w:wAfter']['@_w:w'] || '0', 10);
10232
+ const waType = propsObj['w:wAfter']['@_w:type'];
10233
+ if (waType) result.wAfterType = waType;
10234
+ }
9882
10235
  if (propsObj['w:trHeight']) {
9883
10236
  result.height = parseInt(propsObj['w:trHeight']['@_w:val'] || '0', 10);
9884
10237
  const rule = propsObj['w:trHeight']['@_w:hRule'];
@@ -9894,13 +10247,19 @@ export class DocumentParser {
9894
10247
  result.hidden = true;
9895
10248
  }
9896
10249
 
9897
- // Cell-level properties (w:tcPr context)
10250
+ // Cell-level properties (w:tcPr context) — all CT_TcPr elements
9898
10251
  if (propsObj['w:tcW']) {
9899
10252
  result.width = parseInt(propsObj['w:tcW']['@_w:w'] || '0', 10);
9900
10253
  result.widthType = propsObj['w:tcW']['@_w:type'] || 'dxa';
9901
10254
  }
9902
- if (propsObj['w:vAlign']) {
9903
- result.verticalAlignment = propsObj['w:vAlign']['@_w:val'];
10255
+ if (propsObj['w:gridSpan']) {
10256
+ result.columnSpan = parseInt(propsObj['w:gridSpan']['@_w:val'] || '1', 10);
10257
+ }
10258
+ if (propsObj['w:hMerge']) {
10259
+ result.hMerge = propsObj['w:hMerge']['@_w:val'] || 'continue';
10260
+ }
10261
+ if (propsObj['w:vMerge']) {
10262
+ result.vMerge = propsObj['w:vMerge']['@_w:val'] || 'continue';
9904
10263
  }
9905
10264
  if (propsObj['w:tcBorders']) {
9906
10265
  const borders: any = {};
@@ -9912,6 +10271,35 @@ export class DocumentParser {
9912
10271
  }
9913
10272
  if (Object.keys(borders).length > 0) result.borders = borders;
9914
10273
  }
10274
+ if (propsObj['w:noWrap']) {
10275
+ result.noWrap = true;
10276
+ }
10277
+ if (propsObj['w:tcMar']) {
10278
+ const tcMar = propsObj['w:tcMar'];
10279
+ const margins: any = {};
10280
+ if (tcMar['w:top']) margins.top = parseInt(tcMar['w:top']['@_w:w'] || '0', 10);
10281
+ if (tcMar['w:bottom']) margins.bottom = parseInt(tcMar['w:bottom']['@_w:w'] || '0', 10);
10282
+ const leftSrc = tcMar['w:start'] || tcMar['w:left'];
10283
+ if (leftSrc) margins.left = parseInt(leftSrc['@_w:w'] || '0', 10);
10284
+ const rightSrc = tcMar['w:end'] || tcMar['w:right'];
10285
+ if (rightSrc) margins.right = parseInt(rightSrc['@_w:w'] || '0', 10);
10286
+ if (Object.keys(margins).length > 0) result.margins = margins;
10287
+ }
10288
+ if (propsObj['w:textDirection']) {
10289
+ result.textDirection = propsObj['w:textDirection']['@_w:val'];
10290
+ }
10291
+ if (propsObj['w:tcFitText']) {
10292
+ result.fitText = true;
10293
+ }
10294
+ if (propsObj['w:vAlign']) {
10295
+ result.verticalAlignment = propsObj['w:vAlign']['@_w:val'];
10296
+ }
10297
+ if (propsObj['w:hideMark']) {
10298
+ result.hideMark = true;
10299
+ }
10300
+ if (propsObj['w:cnfStyle']) {
10301
+ result.cnfStyle = propsObj['w:cnfStyle']['@_w:val'];
10302
+ }
9915
10303
 
9916
10304
  // Shared properties (appear in multiple contexts)
9917
10305
  if (propsObj['w:jc']) {
@@ -9944,11 +10332,13 @@ export class DocumentParser {
9944
10332
  const width = XMLParser.extractAttribute(pgSz, 'w:w');
9945
10333
  const height = XMLParser.extractAttribute(pgSz, 'w:h');
9946
10334
  const orient = XMLParser.extractAttribute(pgSz, 'w:orient');
10335
+ const code = XMLParser.extractAttribute(pgSz, 'w:code');
9947
10336
  if (width || height) {
9948
10337
  result.pageSize = {
9949
10338
  width: width ? parseInt(width, 10) : undefined,
9950
10339
  height: height ? parseInt(height, 10) : undefined,
9951
10340
  orientation: orient === 'landscape' ? 'landscape' : 'portrait',
10341
+ code: code ? parseInt(code, 10) : undefined,
9952
10342
  };
9953
10343
  }
9954
10344
  }
@@ -9980,6 +10370,34 @@ export class DocumentParser {
9980
10370
  if (val) result.type = val;
9981
10371
  }
9982
10372
 
10373
+ // Line numbering
10374
+ const lnNumElements = XMLParser.extractElements(sectPrXml, 'w:lnNumType');
10375
+ if (lnNumElements.length > 0 && lnNumElements[0]) {
10376
+ const ln = lnNumElements[0];
10377
+ const lnObj: any = {};
10378
+ const countBy = XMLParser.extractAttribute(ln, 'w:countBy');
10379
+ if (countBy) lnObj.countBy = parseInt(countBy, 10);
10380
+ const start = XMLParser.extractAttribute(ln, 'w:start');
10381
+ if (start) lnObj.start = parseInt(start, 10);
10382
+ const restart = XMLParser.extractAttribute(ln, 'w:restart');
10383
+ if (restart) lnObj.restart = restart;
10384
+ const distance = XMLParser.extractAttribute(ln, 'w:distance');
10385
+ if (distance) lnObj.distance = parseInt(distance, 10);
10386
+ if (Object.keys(lnObj).length > 0) result.lineNumbering = lnObj;
10387
+ }
10388
+
10389
+ // Page numbering
10390
+ const pgNumElements = XMLParser.extractElements(sectPrXml, 'w:pgNumType');
10391
+ if (pgNumElements.length > 0 && pgNumElements[0]) {
10392
+ const pn = pgNumElements[0];
10393
+ const pnObj: any = {};
10394
+ const pnStart = XMLParser.extractAttribute(pn, 'w:start');
10395
+ if (pnStart) pnObj.start = parseInt(pnStart, 10);
10396
+ const fmt = XMLParser.extractAttribute(pn, 'w:fmt');
10397
+ if (fmt) pnObj.format = fmt;
10398
+ if (Object.keys(pnObj).length > 0) result.pageNumbering = pnObj;
10399
+ }
10400
+
9983
10401
  // Columns
9984
10402
  const colsElements = XMLParser.extractElements(sectPrXml, 'w:cols');
9985
10403
  if (colsElements.length > 0 && colsElements[0]) {
@@ -9994,6 +10412,49 @@ export class DocumentParser {
9994
10412
  }
9995
10413
  }
9996
10414
 
10415
+ // Form protection
10416
+ if (sectPrXml.includes('<w:formProt')) result.formProt = true;
10417
+
10418
+ // Vertical alignment
10419
+ const vAlignElements = XMLParser.extractElements(sectPrXml, 'w:vAlign');
10420
+ if (vAlignElements.length > 0 && vAlignElements[0]) {
10421
+ const val = XMLParser.extractAttribute(vAlignElements[0], 'w:val');
10422
+ if (val) result.verticalAlignment = val;
10423
+ }
10424
+
10425
+ // Suppress endnotes
10426
+ if (sectPrXml.includes('<w:noEndnote')) result.noEndnote = true;
10427
+
10428
+ // Title page
10429
+ if (sectPrXml.includes('<w:titlePg')) result.titlePage = true;
10430
+
10431
+ // Text direction
10432
+ const textDirElements = XMLParser.extractElements(sectPrXml, 'w:textDirection');
10433
+ if (textDirElements.length > 0 && textDirElements[0]) {
10434
+ const val = XMLParser.extractAttribute(textDirElements[0], 'w:val');
10435
+ if (val) result.textDirection = val;
10436
+ }
10437
+
10438
+ // Bidi section
10439
+ if (sectPrXml.includes('<w:bidi')) result.bidi = true;
10440
+
10441
+ // RTL gutter
10442
+ if (sectPrXml.includes('<w:rtlGutter')) result.rtlGutter = true;
10443
+
10444
+ // Document grid
10445
+ const docGridElements = XMLParser.extractElements(sectPrXml, 'w:docGrid');
10446
+ if (docGridElements.length > 0 && docGridElements[0]) {
10447
+ const dg = docGridElements[0];
10448
+ const dgObj: any = {};
10449
+ const dgType = XMLParser.extractAttribute(dg, 'w:type');
10450
+ if (dgType) dgObj.type = dgType;
10451
+ const linePitch = XMLParser.extractAttribute(dg, 'w:linePitch');
10452
+ if (linePitch) dgObj.linePitch = parseInt(linePitch, 10);
10453
+ const charSpace = XMLParser.extractAttribute(dg, 'w:charSpace');
10454
+ if (charSpace) dgObj.charSpace = parseInt(charSpace, 10);
10455
+ if (Object.keys(dgObj).length > 0) result.docGrid = dgObj;
10456
+ }
10457
+
9997
10458
  return result;
9998
10459
  }
9999
10460
  }