docxmlater 10.3.5 → 10.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/README.md +158 -7
  2. package/dist/core/Document.d.ts +102 -3
  3. package/dist/core/Document.d.ts.map +1 -1
  4. package/dist/core/Document.js +775 -50
  5. package/dist/core/Document.js.map +1 -1
  6. package/dist/core/DocumentContent.d.ts.map +1 -1
  7. package/dist/core/DocumentContent.js +0 -8
  8. package/dist/core/DocumentContent.js.map +1 -1
  9. package/dist/core/DocumentGenerator.d.ts.map +1 -1
  10. package/dist/core/DocumentGenerator.js +9 -5
  11. package/dist/core/DocumentGenerator.js.map +1 -1
  12. package/dist/core/DocumentParser.d.ts.map +1 -1
  13. package/dist/core/DocumentParser.js +588 -102
  14. package/dist/core/DocumentParser.js.map +1 -1
  15. package/dist/core/RelationshipManager.d.ts.map +1 -1
  16. package/dist/core/RelationshipManager.js +4 -3
  17. package/dist/core/RelationshipManager.js.map +1 -1
  18. package/dist/elements/Bookmark.d.ts +7 -0
  19. package/dist/elements/Bookmark.d.ts.map +1 -1
  20. package/dist/elements/Bookmark.js +24 -4
  21. package/dist/elements/Bookmark.js.map +1 -1
  22. package/dist/elements/BookmarkManager.d.ts.map +1 -1
  23. package/dist/elements/BookmarkManager.js +4 -3
  24. package/dist/elements/BookmarkManager.js.map +1 -1
  25. package/dist/elements/CommonTypes.d.ts +2 -2
  26. package/dist/elements/CommonTypes.d.ts.map +1 -1
  27. package/dist/elements/CommonTypes.js +14 -1
  28. package/dist/elements/CommonTypes.js.map +1 -1
  29. package/dist/elements/Field.d.ts +1 -1
  30. package/dist/elements/Field.d.ts.map +1 -1
  31. package/dist/elements/Field.js +1 -1
  32. package/dist/elements/Field.js.map +1 -1
  33. package/dist/elements/Footer.d.ts +2 -0
  34. package/dist/elements/Footer.d.ts.map +1 -1
  35. package/dist/elements/Footer.js +6 -0
  36. package/dist/elements/Footer.js.map +1 -1
  37. package/dist/elements/Header.d.ts +2 -0
  38. package/dist/elements/Header.d.ts.map +1 -1
  39. package/dist/elements/Header.js +6 -0
  40. package/dist/elements/Header.js.map +1 -1
  41. package/dist/elements/Image.d.ts +1 -0
  42. package/dist/elements/Image.d.ts.map +1 -1
  43. package/dist/elements/Image.js +17 -2
  44. package/dist/elements/Image.js.map +1 -1
  45. package/dist/elements/Paragraph.d.ts +81 -1
  46. package/dist/elements/Paragraph.d.ts.map +1 -1
  47. package/dist/elements/Paragraph.js +515 -21
  48. package/dist/elements/Paragraph.js.map +1 -1
  49. package/dist/elements/Revision.d.ts +0 -1
  50. package/dist/elements/Revision.d.ts.map +1 -1
  51. package/dist/elements/Revision.js +0 -12
  52. package/dist/elements/Revision.js.map +1 -1
  53. package/dist/elements/RevisionManager.d.ts +0 -1
  54. package/dist/elements/RevisionManager.d.ts.map +1 -1
  55. package/dist/elements/RevisionManager.js +0 -2
  56. package/dist/elements/RevisionManager.js.map +1 -1
  57. package/dist/elements/Run.d.ts +16 -4
  58. package/dist/elements/Run.d.ts.map +1 -1
  59. package/dist/elements/Run.js +114 -22
  60. package/dist/elements/Run.js.map +1 -1
  61. package/dist/elements/Section.d.ts +7 -1
  62. package/dist/elements/Section.d.ts.map +1 -1
  63. package/dist/elements/Section.js +185 -4
  64. package/dist/elements/Section.js.map +1 -1
  65. package/dist/elements/Shape.js.map +1 -1
  66. package/dist/elements/Table.d.ts +30 -1
  67. package/dist/elements/Table.d.ts.map +1 -1
  68. package/dist/elements/Table.js +357 -40
  69. package/dist/elements/Table.js.map +1 -1
  70. package/dist/elements/TableCell.d.ts +3 -0
  71. package/dist/elements/TableCell.d.ts.map +1 -1
  72. package/dist/elements/TableCell.js +30 -3
  73. package/dist/elements/TableCell.js.map +1 -1
  74. package/dist/elements/TableGridChange.d.ts +0 -1
  75. package/dist/elements/TableGridChange.d.ts.map +1 -1
  76. package/dist/elements/TableGridChange.js +0 -10
  77. package/dist/elements/TableGridChange.js.map +1 -1
  78. package/dist/elements/TableRow.d.ts +4 -0
  79. package/dist/elements/TableRow.d.ts.map +1 -1
  80. package/dist/elements/TableRow.js +31 -3
  81. package/dist/elements/TableRow.js.map +1 -1
  82. package/dist/formatting/AbstractNumbering.d.ts +5 -0
  83. package/dist/formatting/AbstractNumbering.d.ts.map +1 -1
  84. package/dist/formatting/AbstractNumbering.js +22 -0
  85. package/dist/formatting/AbstractNumbering.js.map +1 -1
  86. package/dist/formatting/NumberingLevel.d.ts.map +1 -1
  87. package/dist/formatting/NumberingLevel.js +3 -3
  88. package/dist/formatting/NumberingLevel.js.map +1 -1
  89. package/dist/formatting/Style.d.ts +1 -0
  90. package/dist/formatting/Style.d.ts.map +1 -1
  91. package/dist/formatting/Style.js +25 -59
  92. package/dist/formatting/Style.js.map +1 -1
  93. package/dist/formatting/StylesManager.d.ts +1 -0
  94. package/dist/formatting/StylesManager.d.ts.map +1 -1
  95. package/dist/formatting/StylesManager.js +12 -0
  96. package/dist/formatting/StylesManager.js.map +1 -1
  97. package/dist/helpers/CleanupHelper.js.map +1 -1
  98. package/dist/images/ImageOptimizer.d.ts.map +1 -1
  99. package/dist/images/ImageOptimizer.js +0 -1
  100. package/dist/images/ImageOptimizer.js.map +1 -1
  101. package/dist/index.d.ts +1 -1
  102. package/dist/index.d.ts.map +1 -1
  103. package/dist/index.js.map +1 -1
  104. package/dist/managers/DrawingManager.d.ts.map +1 -1
  105. package/dist/managers/DrawingManager.js +4 -2
  106. package/dist/managers/DrawingManager.js.map +1 -1
  107. package/dist/types/formatting.d.ts +2 -2
  108. package/dist/types/formatting.d.ts.map +1 -1
  109. package/dist/types/formatting.js.map +1 -1
  110. package/dist/utils/ChangelogGenerator.d.ts +2 -2
  111. package/dist/utils/ChangelogGenerator.d.ts.map +1 -1
  112. package/dist/utils/ChangelogGenerator.js +4 -5
  113. package/dist/utils/ChangelogGenerator.js.map +1 -1
  114. package/dist/utils/InMemoryRevisionAcceptor.d.ts.map +1 -1
  115. package/dist/utils/InMemoryRevisionAcceptor.js +0 -1
  116. package/dist/utils/InMemoryRevisionAcceptor.js.map +1 -1
  117. package/dist/utils/RevisionAwareProcessor.d.ts +2 -2
  118. package/dist/utils/RevisionAwareProcessor.d.ts.map +1 -1
  119. package/dist/utils/RevisionAwareProcessor.js +2 -2
  120. package/dist/utils/RevisionAwareProcessor.js.map +1 -1
  121. package/dist/utils/SelectiveRevisionAcceptor.d.ts +0 -2
  122. package/dist/utils/SelectiveRevisionAcceptor.d.ts.map +1 -1
  123. package/dist/utils/SelectiveRevisionAcceptor.js +0 -26
  124. package/dist/utils/SelectiveRevisionAcceptor.js.map +1 -1
  125. package/dist/utils/ShadingResolver.d.ts.map +1 -1
  126. package/dist/utils/ShadingResolver.js.map +1 -1
  127. package/dist/utils/acceptRevisions.js +1 -1
  128. package/dist/utils/acceptRevisions.js.map +1 -1
  129. package/dist/utils/stripTrackedChanges.js +1 -1
  130. package/dist/utils/stripTrackedChanges.js.map +1 -1
  131. package/dist/utils/units.d.ts.map +1 -1
  132. package/dist/utils/units.js +1 -1
  133. package/dist/utils/units.js.map +1 -1
  134. package/dist/validation/RevisionAutoFixer.d.ts +2 -1
  135. package/dist/validation/RevisionAutoFixer.d.ts.map +1 -1
  136. package/dist/validation/RevisionAutoFixer.js.map +1 -1
  137. package/package.json +10 -1
  138. package/src/constants/CLAUDE.md +28 -0
  139. package/src/core/CLAUDE.md +4 -0
  140. package/src/core/Document.ts +1888 -137
  141. package/src/core/DocumentContent.ts +0 -11
  142. package/src/core/DocumentGenerator.ts +11 -12
  143. package/src/core/DocumentParser.ts +620 -139
  144. package/src/core/RelationshipManager.ts +6 -3
  145. package/src/elements/Bookmark.ts +39 -4
  146. package/src/elements/BookmarkManager.ts +4 -3
  147. package/src/elements/CLAUDE.md +18 -2
  148. package/src/elements/CommonTypes.ts +35 -8
  149. package/src/elements/Field.ts +1 -1
  150. package/src/elements/Footer.ts +23 -0
  151. package/src/elements/Header.ts +25 -0
  152. package/src/elements/Image.ts +28 -5
  153. package/src/elements/Paragraph.ts +1069 -41
  154. package/src/elements/Revision.ts +0 -19
  155. package/src/elements/RevisionManager.ts +1 -3
  156. package/src/elements/Run.ts +265 -35
  157. package/src/elements/Section.ts +214 -8
  158. package/src/elements/Shape.ts +1 -1
  159. package/src/elements/Table.ts +850 -61
  160. package/src/elements/TableCell.ts +84 -10
  161. package/src/elements/TableGridChange.ts +2 -16
  162. package/src/elements/TableRow.ts +94 -9
  163. package/src/formatting/AbstractNumbering.ts +42 -1
  164. package/src/formatting/CLAUDE.md +4 -0
  165. package/src/formatting/NumberingLevel.ts +11 -7
  166. package/src/formatting/Style.ts +39 -71
  167. package/src/formatting/StylesManager.ts +36 -0
  168. package/src/helpers/CleanupHelper.ts +1 -1
  169. package/src/images/ImageOptimizer.ts +0 -3
  170. package/src/index.ts +1 -1
  171. package/src/managers/DrawingManager.ts +5 -3
  172. package/src/tracking/CLAUDE.md +30 -0
  173. package/src/types/CLAUDE.md +39 -0
  174. package/src/types/formatting.ts +2 -2
  175. package/src/utils/CLAUDE.md +15 -0
  176. package/src/utils/ChangelogGenerator.ts +4 -5
  177. package/src/utils/InMemoryRevisionAcceptor.ts +0 -9
  178. package/src/utils/RevisionAwareProcessor.ts +2 -3
  179. package/src/utils/SelectiveRevisionAcceptor.ts +0 -39
  180. package/src/utils/ShadingResolver.ts +0 -1
  181. package/src/utils/acceptRevisions.ts +1 -1
  182. package/src/utils/stripTrackedChanges.ts +1 -1
  183. package/src/utils/units.ts +2 -1
  184. package/src/validation/CLAUDE.md +40 -0
  185. package/src/validation/RevisionAutoFixer.ts +2 -1
@@ -31,12 +31,13 @@ import {
31
31
  RunContent,
32
32
  RunFormatting,
33
33
  } from '../elements/Run';
34
- import { PageNumberFormat, Section, SectionProperties, SectionType } from '../elements/Section';
34
+ import { Section, SectionProperties, SectionType } from '../elements/Section';
35
35
  import { StructuredDocumentTag } from '../elements/StructuredDocumentTag';
36
36
  import { Table, TableBorder } from '../elements/Table';
37
37
  import { TableCell } from '../elements/TableCell';
38
38
  import { TableOfContents } from '../elements/TableOfContents';
39
39
  import { TableOfContentsElement } from '../elements/TableOfContentsElement';
40
+ import { TableGridChange } from '../elements/TableGridChange';
40
41
  import { TableRow } from '../elements/TableRow';
41
42
  import { AbstractNumbering } from '../formatting/AbstractNumbering';
42
43
  import { NumberingInstance } from '../formatting/NumberingInstance';
@@ -904,11 +905,15 @@ export class DocumentParser {
904
905
  }
905
906
  }
906
907
 
907
- // Parse w14:paraId if present
908
+ // Parse w14:paraId and w14:textId if present
908
909
  const paraId = pElement['w14:paraId'];
909
910
  if (paraId) {
910
911
  paragraph.formatting.paraId = paraId as string;
911
912
  }
913
+ const textId = pElement['w14:textId'];
914
+ if (textId) {
915
+ paragraph.formatting.textId = textId as string;
916
+ }
912
917
 
913
918
  // CRITICAL FIX: Preserve document order of paragraph children (runs, hyperlinks, fields)
914
919
  // When XMLParser.parseToObject groups multiple runs/hyperlinks, it creates arrays
@@ -1861,10 +1866,15 @@ export class DocumentParser {
1861
1866
 
1862
1867
  // Create bookmark with skipNormalization to preserve original name exactly
1863
1868
  // (Word allows special characters like = and . in bookmark names)
1869
+ // Parse optional column range for table bookmarks (ECMA-376 §17.16.5)
1870
+ const colFirstAttr = XMLParser.extractAttribute(bookmarkXml, 'w:colFirst');
1871
+ const colLastAttr = XMLParser.extractAttribute(bookmarkXml, 'w:colLast');
1864
1872
  const bookmark = new Bookmark({
1865
1873
  name: nameAttr,
1866
1874
  id: id,
1867
1875
  skipNormalization: true,
1876
+ colFirst: colFirstAttr ? parseInt(colFirstAttr, 10) : undefined,
1877
+ colLast: colLastAttr ? parseInt(colLastAttr, 10) : undefined,
1868
1878
  });
1869
1879
 
1870
1880
  // Register with BookmarkManager to enable hasBookmark() checks
@@ -1934,11 +1944,15 @@ export class DocumentParser {
1934
1944
  try {
1935
1945
  const paragraph = new Paragraph();
1936
1946
 
1937
- // Parse w14:paraId attribute from paragraph element (Word 2010+ requirement)
1947
+ // Parse w14:paraId and w14:textId attributes from paragraph element (Word 2010+)
1938
1948
  const paraId = paraObj['w14:paraId'];
1939
1949
  if (paraId) {
1940
1950
  paragraph.formatting.paraId = paraId;
1941
1951
  }
1952
+ const textId = paraObj['w14:textId'];
1953
+ if (textId) {
1954
+ paragraph.formatting.textId = textId;
1955
+ }
1942
1956
 
1943
1957
  // Parse paragraph properties
1944
1958
  this.parseParagraphPropertiesFromObject(paraObj['w:pPr'], paragraph);
@@ -2155,9 +2169,11 @@ export class DocumentParser {
2155
2169
  if (pPrObj['w:ind']) {
2156
2170
  const ind = pPrObj['w:ind'];
2157
2171
  // Use isExplicitlySet and safeParseInt for robust zero-value handling
2158
- if (isExplicitlySet(ind['@_w:left'])) paragraph.setLeftIndent(safeParseInt(ind['@_w:left']));
2159
- if (isExplicitlySet(ind['@_w:right']))
2160
- paragraph.setRightIndent(safeParseInt(ind['@_w:right']));
2172
+ // Per ECMA-376 §17.3.1.15: w:start/w:end are bidi-aware alternatives to w:left/w:right
2173
+ const leftVal = ind['@_w:start'] ?? ind['@_w:left'];
2174
+ const rightVal = ind['@_w:end'] ?? ind['@_w:right'];
2175
+ if (isExplicitlySet(leftVal)) paragraph.setLeftIndent(safeParseInt(leftVal));
2176
+ if (isExplicitlySet(rightVal)) paragraph.setRightIndent(safeParseInt(rightVal));
2161
2177
  if (isExplicitlySet(ind['@_w:firstLine']))
2162
2178
  paragraph.setFirstLineIndent(safeParseInt(ind['@_w:firstLine']));
2163
2179
  // Parse hanging indent per ECMA-376 Part 1 §17.3.1.17
@@ -2165,7 +2181,7 @@ export class DocumentParser {
2165
2181
  paragraph.setHangingIndent(safeParseInt(ind['@_w:hanging']));
2166
2182
  }
2167
2183
 
2168
- // Spacing
2184
+ // Spacing (ECMA-376 §17.3.1.33 — 8 attributes)
2169
2185
  if (pPrObj['w:spacing']) {
2170
2186
  const spacing = pPrObj['w:spacing'];
2171
2187
  // Use isExplicitlySet to properly handle 0 values (0 spacing is valid)
@@ -2176,18 +2192,39 @@ export class DocumentParser {
2176
2192
  if (isExplicitlySet(spacing['@_w:line'])) {
2177
2193
  paragraph.setLineSpacing(safeParseInt(spacing['@_w:line']), spacing['@_w:lineRule']);
2178
2194
  }
2195
+ // Parse extended spacing attributes — write directly to paragraph.formatting
2196
+ // (getFormatting() returns a shallow copy, so we must access the internal object)
2197
+ if (!paragraph.formatting.spacing) paragraph.formatting.spacing = {};
2198
+ if (isExplicitlySet(spacing['@_w:beforeLines']))
2199
+ paragraph.formatting.spacing.beforeLines = safeParseInt(spacing['@_w:beforeLines']);
2200
+ if (isExplicitlySet(spacing['@_w:afterLines']))
2201
+ paragraph.formatting.spacing.afterLines = safeParseInt(spacing['@_w:afterLines']);
2202
+ const beforeAuto = spacing['@_w:beforeAutospacing'];
2203
+ if (beforeAuto !== undefined)
2204
+ paragraph.formatting.spacing.beforeAutospacing =
2205
+ String(beforeAuto) === '1' || String(beforeAuto) === 'true';
2206
+ const afterAuto = spacing['@_w:afterAutospacing'];
2207
+ if (afterAuto !== undefined)
2208
+ paragraph.formatting.spacing.afterAutospacing =
2209
+ String(afterAuto) === '1' || String(afterAuto) === 'true';
2179
2210
  }
2180
2211
 
2181
- // Keep properties - parse pageBreakBefore FIRST, then apply keep properties
2182
- // This triggers automatic conflict resolution per ECMA-376 v0.28.2
2183
- if (pPrObj['w:pageBreakBefore']) paragraph.formatting.pageBreakBefore = true;
2184
-
2185
- // Keep properties - these will automatically clear pageBreakBefore if both are set
2186
- if (pPrObj['w:keepNext']) paragraph.setKeepNext(true);
2187
- if (pPrObj['w:keepLines']) paragraph.setKeepLines(true);
2212
+ // Keep properties preserve explicit val="0" to override style inheritance
2213
+ // Parse pageBreakBefore FIRST, then keep properties (triggers automatic conflict resolution)
2214
+ if (pPrObj['w:pageBreakBefore'] !== undefined) {
2215
+ paragraph.formatting.pageBreakBefore = parseOoxmlBoolean(pPrObj['w:pageBreakBefore']);
2216
+ }
2217
+ if (pPrObj['w:keepNext'] !== undefined) {
2218
+ paragraph.setKeepNext(parseOoxmlBoolean(pPrObj['w:keepNext']));
2219
+ }
2220
+ if (pPrObj['w:keepLines'] !== undefined) {
2221
+ paragraph.setKeepLines(parseOoxmlBoolean(pPrObj['w:keepLines']));
2222
+ }
2188
2223
 
2189
2224
  // Contextual spacing
2190
- if (pPrObj['w:contextualSpacing']) paragraph.setContextualSpacing(true);
2225
+ if (pPrObj['w:contextualSpacing'] !== undefined) {
2226
+ paragraph.setContextualSpacing(parseOoxmlBoolean(pPrObj['w:contextualSpacing']));
2227
+ }
2191
2228
 
2192
2229
  // Numbering
2193
2230
  // Note: When track changes are present (w:pPrChange), XMLParser merges the
@@ -2304,8 +2341,8 @@ export class DocumentParser {
2304
2341
  }
2305
2342
 
2306
2343
  // Suppress line numbers per ECMA-376 Part 1 §17.3.1.34
2307
- if (pPrObj['w:suppressLineNumbers']) {
2308
- paragraph.setSuppressLineNumbers(true);
2344
+ if (pPrObj['w:suppressLineNumbers'] !== undefined) {
2345
+ paragraph.setSuppressLineNumbers(parseOoxmlBoolean(pPrObj['w:suppressLineNumbers']));
2309
2346
  }
2310
2347
 
2311
2348
  // Bidirectional layout per ECMA-376 Part 1 §17.3.1.6
@@ -2330,8 +2367,8 @@ export class DocumentParser {
2330
2367
  }
2331
2368
 
2332
2369
  // Mirror indents per ECMA-376 Part 1 §17.3.1.18
2333
- if (pPrObj['w:mirrorIndents']) {
2334
- paragraph.setMirrorIndents(true);
2370
+ if (pPrObj['w:mirrorIndents'] !== undefined) {
2371
+ paragraph.setMirrorIndents(parseOoxmlBoolean(pPrObj['w:mirrorIndents']));
2335
2372
  }
2336
2373
 
2337
2374
  // Auto-adjust right indent per ECMA-376 Part 1 §17.3.1.1
@@ -2384,8 +2421,8 @@ export class DocumentParser {
2384
2421
  }
2385
2422
 
2386
2423
  // Suppress automatic hyphenation per ECMA-376 Part 1 §17.3.1.33
2387
- if (pPrObj['w:suppressAutoHyphens']) {
2388
- paragraph.setSuppressAutoHyphens(true);
2424
+ if (pPrObj['w:suppressAutoHyphens'] !== undefined) {
2425
+ paragraph.setSuppressAutoHyphens(parseOoxmlBoolean(pPrObj['w:suppressAutoHyphens']));
2389
2426
  }
2390
2427
 
2391
2428
  // CJK paragraph properties per ECMA-376 Part 1
@@ -2409,8 +2446,8 @@ export class DocumentParser {
2409
2446
  }
2410
2447
 
2411
2448
  // Suppress text frame overlap per ECMA-376 Part 1 §17.3.1.34
2412
- if (pPrObj['w:suppressOverlap']) {
2413
- paragraph.setSuppressOverlap(true);
2449
+ if (pPrObj['w:suppressOverlap'] !== undefined) {
2450
+ paragraph.setSuppressOverlap(parseOoxmlBoolean(pPrObj['w:suppressOverlap']));
2414
2451
  }
2415
2452
 
2416
2453
  // Textbox tight wrap per ECMA-376 Part 1 §17.3.1.37
@@ -2477,13 +2514,14 @@ export class DocumentParser {
2477
2514
  }
2478
2515
 
2479
2516
  // Parse previous indentation
2517
+ // Per ECMA-376 §17.3.1.15: w:start/w:end are bidi-aware alternatives to w:left/w:right
2480
2518
  if (prevPPr['w:ind']) {
2481
2519
  const ind = prevPPr['w:ind'];
2482
2520
  previousProperties.indentation = {};
2483
- if (ind['@_w:left'] !== undefined)
2484
- previousProperties.indentation.left = parseInt(ind['@_w:left'], 10);
2485
- if (ind['@_w:right'] !== undefined)
2486
- previousProperties.indentation.right = parseInt(ind['@_w:right'], 10);
2521
+ const leftVal = ind['@_w:start'] ?? ind['@_w:left'];
2522
+ const rightVal = ind['@_w:end'] ?? ind['@_w:right'];
2523
+ if (leftVal !== undefined) previousProperties.indentation.left = parseInt(leftVal, 10);
2524
+ if (rightVal !== undefined) previousProperties.indentation.right = parseInt(rightVal, 10);
2487
2525
  if (ind['@_w:firstLine'] !== undefined)
2488
2526
  previousProperties.indentation.firstLine = parseInt(ind['@_w:firstLine'], 10);
2489
2527
  if (ind['@_w:hanging'] !== undefined)
@@ -2495,7 +2533,7 @@ export class DocumentParser {
2495
2533
  previousProperties.alignment = String(prevPPr['w:jc']['@_w:val']);
2496
2534
  }
2497
2535
 
2498
- // Parse previous spacing
2536
+ // Parse previous spacing (all 8 CT_Spacing attributes per ECMA-376 §17.3.1.33)
2499
2537
  if (prevPPr['w:spacing']) {
2500
2538
  const spacing = prevPPr['w:spacing'];
2501
2539
  previousProperties.spacing = {};
@@ -2507,6 +2545,18 @@ export class DocumentParser {
2507
2545
  previousProperties.spacing.line = parseInt(spacing['@_w:line'], 10);
2508
2546
  if (spacing['@_w:lineRule'])
2509
2547
  previousProperties.spacing.lineRule = String(spacing['@_w:lineRule']);
2548
+ if (spacing['@_w:beforeLines'] !== undefined)
2549
+ previousProperties.spacing.beforeLines = parseInt(spacing['@_w:beforeLines'], 10);
2550
+ if (spacing['@_w:afterLines'] !== undefined)
2551
+ previousProperties.spacing.afterLines = parseInt(spacing['@_w:afterLines'], 10);
2552
+ const beforeAuto = spacing['@_w:beforeAutospacing'];
2553
+ if (beforeAuto !== undefined)
2554
+ previousProperties.spacing.beforeAutospacing =
2555
+ String(beforeAuto) === '1' || String(beforeAuto) === 'true';
2556
+ const afterAuto = spacing['@_w:afterAutospacing'];
2557
+ if (afterAuto !== undefined)
2558
+ previousProperties.spacing.afterAutospacing =
2559
+ String(afterAuto) === '1' || String(afterAuto) === 'true';
2510
2560
  }
2511
2561
 
2512
2562
  // Parse previous keepNext/keepLines/pageBreakBefore
@@ -4001,7 +4051,13 @@ export class DocumentParser {
4001
4051
  const brElements = toArray(runObj['w:br']);
4002
4052
  const brElement = brElements[elementIndex] || brElements[0];
4003
4053
  const breakType = brElement?.['@_w:type'] as BreakType | undefined;
4004
- content.push({ type: 'break', breakType });
4054
+ const breakClear = brElement?.['@_w:clear'] as
4055
+ | 'none'
4056
+ | 'left'
4057
+ | 'right'
4058
+ | 'all'
4059
+ | undefined;
4060
+ content.push({ type: 'break', breakType, breakClear });
4005
4061
  break;
4006
4062
  }
4007
4063
 
@@ -4210,7 +4266,13 @@ export class DocumentParser {
4210
4266
  if (runObj['w:br'] !== undefined) {
4211
4267
  const brElement = runObj['w:br'];
4212
4268
  const breakType = brElement?.['@_w:type'] as BreakType | undefined;
4213
- content.push({ type: 'break', breakType });
4269
+ const breakClear = brElement?.['@_w:clear'] as
4270
+ | 'none'
4271
+ | 'left'
4272
+ | 'right'
4273
+ | 'all'
4274
+ | undefined;
4275
+ content.push({ type: 'break', breakType, breakClear });
4214
4276
  }
4215
4277
 
4216
4278
  if (runObj['w:cr'] !== undefined) {
@@ -4453,22 +4515,11 @@ export class DocumentParser {
4453
4515
  }
4454
4516
  }
4455
4517
 
4456
- // Handle external hyperlinks with anchor fragments
4457
- // Microsoft Word can store URLs with the base in relationships and fragment in w:anchor
4458
- // Example: rels has "https://example.com/", anchor has "!/view?docid=abc-123"
4459
- // Combined: "https://example.com/#!/view?docid=abc-123"
4460
- // This is common for single-page applications with hash-based routing (theSource, etc.)
4461
- let finalAnchor = anchor;
4462
- let finalRelationshipId = relationshipId;
4463
- if (url && anchor) {
4464
- // Combine URL and anchor for external hyperlinks with fragments
4465
- url = url + '#' + anchor;
4466
- finalAnchor = undefined; // Clear anchor since it's now part of URL
4467
- // Clear relationshipId since the relationship points to the old base URL
4468
- // On save, a new relationship will be created with the combined URL
4469
- finalRelationshipId = undefined;
4470
- defaultLogger.debug(`[DocumentParser] Combined external URL with anchor fragment: ${url}`);
4471
- }
4518
+ // Per ECMA-376 §17.16.22, a hyperlink can have BOTH r:id (external URL) and w:anchor
4519
+ // (bookmark) simultaneously e.g., linking to a bookmark in an external document.
4520
+ // Preserve both attributes as-is; the serializer supports writing both.
4521
+ const finalAnchor = anchor;
4522
+ const finalRelationshipId = relationshipId;
4472
4523
 
4473
4524
  // Skip hyperlinks that have no destination (neither URL nor anchor nor relationship ID)
4474
4525
  // This can happen with malformed HYPERLINK field codes or corrupted documents
@@ -4780,29 +4831,19 @@ export class DocumentParser {
4780
4831
  if (val) run.setEmphasis(val);
4781
4832
  }
4782
4833
 
4783
- // Parse outline text effect (w:outline) per ECMA-376 Part 1 §17.3.2.23
4784
- if (rPrObj['w:outline']) run.setOutline(true);
4785
-
4786
- // Parse shadow text effect (w:shadow) per ECMA-376 Part 1 §17.3.2.32
4787
- if (rPrObj['w:shadow']) run.setShadow(true);
4788
-
4789
- // Parse emboss text effect (w:emboss) per ECMA-376 Part 1 §17.3.2.13
4790
- if (rPrObj['w:emboss']) run.setEmboss(true);
4791
-
4792
- // Parse imprint text effect (w:imprint) per ECMA-376 Part 1 §17.3.2.18
4793
- if (rPrObj['w:imprint']) run.setImprint(true);
4794
-
4795
- // Parse no proofing (w:noProof) per ECMA-376 Part 1 §17.3.2.21
4796
- if (rPrObj['w:noProof']) run.setNoProof(true);
4797
-
4798
- // Parse snap to grid (w:snapToGrid) per ECMA-376 Part 1 §17.3.2.35
4799
- if (rPrObj['w:snapToGrid']) run.setSnapToGrid(true);
4800
-
4801
- // Parse vanish/hidden (w:vanish) per ECMA-376 Part 1 §17.3.2.42
4802
- if (rPrObj['w:vanish']) run.setVanish(true);
4803
-
4804
- // Parse special vanish (w:specVanish) per ECMA-376 Part 1 §17.3.2.36
4805
- if (rPrObj['w:specVanish']) run.setSpecVanish(true);
4834
+ // Parse boolean text effects use parseOoxmlBoolean to correctly handle w:val="0"/"false"
4835
+ // Per ECMA-376, <w:xxx/> or <w:xxx w:val="1"/> = true; <w:xxx w:val="0"/> = false (explicit off)
4836
+ if (parseOoxmlBoolean(rPrObj['w:outline'])) run.setOutline(true);
4837
+ if (parseOoxmlBoolean(rPrObj['w:shadow'])) run.setShadow(true);
4838
+ if (parseOoxmlBoolean(rPrObj['w:emboss'])) run.setEmboss(true);
4839
+ if (parseOoxmlBoolean(rPrObj['w:imprint'])) run.setImprint(true);
4840
+ if (parseOoxmlBoolean(rPrObj['w:noProof'])) run.setNoProof(true);
4841
+ // snapToGrid: default when absent is true (§17.3.2.34), so explicit val="0" must be preserved
4842
+ if (rPrObj['w:snapToGrid'] !== undefined) {
4843
+ run.setSnapToGrid(parseOoxmlBoolean(rPrObj['w:snapToGrid']));
4844
+ }
4845
+ if (parseOoxmlBoolean(rPrObj['w:vanish'])) run.setVanish(true);
4846
+ if (parseOoxmlBoolean(rPrObj['w:specVanish'])) run.setSpecVanish(true);
4806
4847
 
4807
4848
  // Boolean properties - use parseOoxmlBoolean helper
4808
4849
  // Per ECMA-376: <w:b/> or <w:b w:val="1"/> or <w:b w:val="true"/> means true
@@ -4811,16 +4852,20 @@ export class DocumentParser {
4811
4852
  // Parse RTL text (w:rtl) per ECMA-376 Part 1 §17.3.2.30
4812
4853
  if (parseOoxmlBoolean(rPrObj['w:rtl'])) run.setRTL(true);
4813
4854
 
4814
- if (parseOoxmlBoolean(rPrObj['w:b'])) run.setBold(true);
4815
- if (parseOoxmlBoolean(rPrObj['w:bCs'])) run.setComplexScriptBold(true);
4816
- if (parseOoxmlBoolean(rPrObj['w:i'])) run.setItalic(true);
4817
- if (parseOoxmlBoolean(rPrObj['w:iCs'])) run.setComplexScriptItalic(true);
4818
- if (parseOoxmlBoolean(rPrObj['w:strike'])) run.setStrike(true);
4819
- if (parseOoxmlBoolean(rPrObj['w:dstrike'])) {
4820
- (run as any).formatting.dstrike = true;
4821
- }
4822
- if (parseOoxmlBoolean(rPrObj['w:smallCaps'])) run.setSmallCaps(true);
4823
- if (parseOoxmlBoolean(rPrObj['w:caps'])) run.setAllCaps(true);
4855
+ // b, bCs, i, iCs: preserve explicit val="0" to override style-inherited formatting
4856
+ if (rPrObj['w:b'] !== undefined) run.setBold(parseOoxmlBoolean(rPrObj['w:b']));
4857
+ if (rPrObj['w:bCs'] !== undefined) run.setComplexScriptBold(parseOoxmlBoolean(rPrObj['w:bCs']));
4858
+ if (rPrObj['w:i'] !== undefined) run.setItalic(parseOoxmlBoolean(rPrObj['w:i']));
4859
+ if (rPrObj['w:iCs'] !== undefined)
4860
+ run.setComplexScriptItalic(parseOoxmlBoolean(rPrObj['w:iCs']));
4861
+ // strike, dstrike, smallCaps, caps: preserve explicit val="0" to override style-inherited formatting
4862
+ if (rPrObj['w:strike'] !== undefined) run.setStrike(parseOoxmlBoolean(rPrObj['w:strike']));
4863
+ if (rPrObj['w:dstrike'] !== undefined) {
4864
+ (run as any).formatting.dstrike = parseOoxmlBoolean(rPrObj['w:dstrike']);
4865
+ }
4866
+ if (rPrObj['w:smallCaps'] !== undefined)
4867
+ run.setSmallCaps(parseOoxmlBoolean(rPrObj['w:smallCaps']));
4868
+ if (rPrObj['w:caps'] !== undefined) run.setAllCaps(parseOoxmlBoolean(rPrObj['w:caps']));
4824
4869
 
4825
4870
  // Parse complex script flag (w:cs) per ECMA-376 Part 1 §17.3.2.7
4826
4871
  if (parseOoxmlBoolean(rPrObj['w:cs'])) run.setComplexScript(true);
@@ -4870,10 +4915,21 @@ export class DocumentParser {
4870
4915
  if (val) run.setKerning(parseInt(val, 10));
4871
4916
  }
4872
4917
 
4873
- // Parse language (w:lang) per ECMA-376 Part 1 §17.3.2.20
4918
+ // Parse language (w:lang) per ECMA-376 Part 1 §17.3.2.20 (CT_Language)
4874
4919
  if (rPrObj['w:lang']) {
4875
- const val = rPrObj['w:lang']['@_w:val'];
4876
- if (val) run.setLanguage(val);
4920
+ const langObj = rPrObj['w:lang'];
4921
+ const val = langObj['@_w:val'];
4922
+ const eastAsia = langObj['@_w:eastAsia'];
4923
+ const bidi = langObj['@_w:bidi'];
4924
+ if (eastAsia || bidi) {
4925
+ run.setLanguage({
4926
+ val: val ? String(val) : undefined,
4927
+ eastAsia: eastAsia ? String(eastAsia) : undefined,
4928
+ bidi: bidi ? String(bidi) : undefined,
4929
+ });
4930
+ } else if (val) {
4931
+ run.setLanguage(String(val));
4932
+ }
4877
4933
  }
4878
4934
 
4879
4935
  // Parse East Asian layout (w:eastAsianLayout) per ECMA-376 Part 1 §17.3.2.10
@@ -4907,7 +4963,8 @@ export class DocumentParser {
4907
4963
  if (rPrObj['w:vertAlign']) {
4908
4964
  const val = rPrObj['w:vertAlign']['@_w:val'];
4909
4965
  if (val === 'subscript') run.setSubscript(true);
4910
- if (val === 'superscript') run.setSuperscript(true);
4966
+ else if (val === 'superscript') run.setSuperscript(true);
4967
+ else if (val === 'baseline') (run as any).formatting.vertAlignBaseline = true;
4911
4968
  }
4912
4969
 
4913
4970
  if (rPrObj['w:rFonts']) {
@@ -4946,10 +5003,15 @@ export class DocumentParser {
4946
5003
  if (rPrObj['w:color']) {
4947
5004
  const colorObj = rPrObj['w:color'];
4948
5005
  const colorVal = colorObj['@_w:val'];
4949
- // Skip special OOXML values like "auto" (automatic/inherit from style)
4950
- // "auto" is a valid OOXML color that means inherit - not a hex color
4951
- if (colorVal && colorVal !== 'auto') {
4952
- run.setColor(colorVal);
5006
+ // Per ECMA-376 §17.18.6, w:val can be a hex color OR the special value "auto"
5007
+ // "auto" means use the automatic/window text color must be preserved for round-trip
5008
+ if (colorVal) {
5009
+ if (colorVal === 'auto') {
5010
+ // Bypass normalizeColor() which rejects non-hex values
5011
+ (run as any).formatting.color = 'auto';
5012
+ } else {
5013
+ run.setColor(colorVal);
5014
+ }
4953
5015
  }
4954
5016
  // Parse theme color attributes per ECMA-376 Part 1 Section 17.3.2.6
4955
5017
  if (colorObj['@_w:themeColor']) {
@@ -5046,7 +5108,7 @@ export class DocumentParser {
5046
5108
  if (prevRPr['w:color']) {
5047
5109
  const colorObj = prevRPr['w:color'];
5048
5110
  const colorVal = colorObj['@_w:val'];
5049
- if (colorVal && colorVal !== 'auto') {
5111
+ if (colorVal) {
5050
5112
  prevProps.color = colorVal;
5051
5113
  }
5052
5114
  // Parse theme color attributes
@@ -5066,11 +5128,12 @@ export class DocumentParser {
5066
5128
  prevProps.highlight = prevRPr['w:highlight']['@_w:val'];
5067
5129
  }
5068
5130
 
5069
- // Parse previous subscript/superscript
5131
+ // Parse previous subscript/superscript/baseline per ECMA-376 §17.18.96
5070
5132
  if (prevRPr['w:vertAlign']) {
5071
5133
  const val = prevRPr['w:vertAlign']['@_w:val'];
5072
5134
  if (val === 'subscript') prevProps.subscript = true;
5073
- if (val === 'superscript') prevProps.superscript = true;
5135
+ else if (val === 'superscript') prevProps.superscript = true;
5136
+ else if (val === 'baseline') prevProps.vertAlignBaseline = true;
5074
5137
  }
5075
5138
 
5076
5139
  // Parse previous smallCaps/allCaps
@@ -5173,10 +5236,19 @@ export class DocumentParser {
5173
5236
  }
5174
5237
  }
5175
5238
 
5176
- // Parse language (w:lang @w:val)
5239
+ // Parse language (w:lang) per ECMA-376 CT_Language (w:val, w:eastAsia, w:bidi)
5177
5240
  if (prevRPr['w:lang']) {
5178
- const langVal = prevRPr['w:lang']['@_w:val'];
5179
- if (langVal) {
5241
+ const langObj = prevRPr['w:lang'];
5242
+ const langVal = langObj['@_w:val'];
5243
+ const langEastAsia = langObj['@_w:eastAsia'];
5244
+ const langBidi = langObj['@_w:bidi'];
5245
+ if (langEastAsia || langBidi) {
5246
+ prevProps.language = {
5247
+ val: langVal ? String(langVal) : undefined,
5248
+ eastAsia: langEastAsia ? String(langEastAsia) : undefined,
5249
+ bidi: langBidi ? String(langBidi) : undefined,
5250
+ };
5251
+ } else if (langVal) {
5180
5252
  prevProps.language = String(langVal);
5181
5253
  }
5182
5254
  }
@@ -5471,6 +5543,8 @@ export class DocumentParser {
5471
5543
  // --- Group A & C: Parse pic:spPr (shape properties) ---
5472
5544
  const spPrObj = picPicObj['pic:spPr'];
5473
5545
  let border: any = undefined;
5546
+ let zeroWidthLnXml: string | null = null;
5547
+ let hasSpPrNoFill = false;
5474
5548
  let rotation = 0;
5475
5549
  let flipH = false;
5476
5550
  let flipV = false;
@@ -5493,7 +5567,7 @@ export class DocumentParser {
5493
5567
  if (lnObj) {
5494
5568
  const widthEmu = parseInt(lnObj['@_w'] || '0', 10);
5495
5569
  if (widthEmu > 0) {
5496
- border = { width: widthEmu / 12700 } as any;
5570
+ border = { width: widthEmu / 12700, _fromParsed: true } as any;
5497
5571
  // Parse additional a:ln attributes
5498
5572
  if (lnObj['@_cap']) border.cap = String(lnObj['@_cap']);
5499
5573
  if (lnObj['@_cmpd']) border.compound = String(lnObj['@_cmpd']);
@@ -5550,9 +5624,17 @@ export class DocumentParser {
5550
5624
  if (lnObj['a:tailEnd']['@_len'])
5551
5625
  border.tailEnd.length = String(lnObj['a:tailEnd']['@_len']);
5552
5626
  }
5627
+ } else {
5628
+ // Zero-width or absent-width a:ln: preserve as raw XML (BUG 8 fix)
5629
+ zeroWidthLnXml = this.objectToXml({ 'a:ln': lnObj });
5553
5630
  }
5554
5631
  }
5555
5632
 
5633
+ // Detect spPr-level a:noFill (preserve independently from border)
5634
+ if (spPrObj['a:noFill']) {
5635
+ hasSpPrNoFill = true;
5636
+ }
5637
+
5556
5638
  // Parse rotation and flip from a:xfrm
5557
5639
  const xfrmObj = spPrObj['a:xfrm'];
5558
5640
  if (xfrmObj?.['@_rot']) {
@@ -5677,6 +5759,16 @@ export class DocumentParser {
5677
5759
  svgRelationshipId,
5678
5760
  });
5679
5761
 
5762
+ // Preserve zero-width a:ln as raw passthrough (BUG 8 fix)
5763
+ if (zeroWidthLnXml) {
5764
+ image._setRawPassthrough('zero-width-ln', zeroWidthLnXml);
5765
+ }
5766
+
5767
+ // Preserve spPr-level a:noFill independently from border (Bug C fix)
5768
+ if (hasSpPrNoFill) {
5769
+ image._setRawPassthrough('spPr-noFill', '<a:noFill/>');
5770
+ }
5771
+
5680
5772
  // --- Group B: Collect raw passthrough for unmodeled XML subtrees ---
5681
5773
  // Blip effects (children of a:blip that aren't modeled)
5682
5774
  const blipEffectsRaw = this.collectUnmodeledChildren(blipObj, [
@@ -6032,6 +6124,25 @@ export class DocumentParser {
6032
6124
  }
6033
6125
  }
6034
6126
 
6127
+ // Parse table grid change (w:tblGridChange) per ECMA-376 §17.13.5.35
6128
+ if (tableObj['w:tblGrid']?.['w:tblGridChange']) {
6129
+ const changeObj = tableObj['w:tblGrid']['w:tblGridChange'];
6130
+ const prevGridCols = changeObj['w:tblGrid']?.['w:gridCol'];
6131
+ if (prevGridCols) {
6132
+ const prevArray = Array.isArray(prevGridCols) ? prevGridCols : [prevGridCols];
6133
+ const prevWidths = prevArray.map((col: any) => ({
6134
+ width: isExplicitlySet(col['@_w:w']) ? safeParseInt(col['@_w:w'], 2880) : 2880,
6135
+ }));
6136
+ const gridChange = TableGridChange.create(
6137
+ safeParseInt(changeObj['@_w:id'], 0),
6138
+ prevWidths,
6139
+ changeObj['@_w:author'] || undefined,
6140
+ changeObj['@_w:date'] ? new Date(changeObj['@_w:date']) : undefined
6141
+ );
6142
+ table.setTblGridChange(gridChange);
6143
+ }
6144
+ }
6145
+
6035
6146
  // Parse table rows (w:tr)
6036
6147
  const rows = tableObj['w:tr'];
6037
6148
  const rowChildren = Array.isArray(rows) ? rows : rows ? [rows] : [];
@@ -6138,7 +6249,7 @@ export class DocumentParser {
6138
6249
  table.setTblLook(look['@_w:val']);
6139
6250
  } else {
6140
6251
  // Individual attribute format - construct hex value
6141
- // Per ECMA-376: bit 0=firstRow, 1=lastRow, 2=firstCol, 3=lastCol, 4=noHBand, 5=noVBand
6252
+ // Per ECMA-376 §17.4.57: bit5=firstRow, bit6=lastRow, bit7=firstCol, bit8=lastCol, bit9=noHBand, bit10=noVBand
6142
6253
  let value = 0;
6143
6254
  if (look['@_w:firstRow'] === '1') value |= 0x0020;
6144
6255
  if (look['@_w:lastRow'] === '1') value |= 0x0040;
@@ -6229,9 +6340,14 @@ export class DocumentParser {
6229
6340
  if (tblPrObj['w:tblInd']) {
6230
6341
  const indentVal = safeParseInt(tblPrObj['w:tblInd']['@_w:w'], 0);
6231
6342
  table.setIndent(indentVal);
6343
+ const indentType = tblPrObj['w:tblInd']['@_w:type'];
6344
+ if (indentType) {
6345
+ table.setIndentType(indentType as import('../elements/Table').TableWidthType);
6346
+ }
6232
6347
  }
6233
6348
 
6234
6349
  // Parse table cell margins (w:tblCellMar) per ECMA-376 Part 1 §17.4.42
6350
+ // Supports both legacy w:left/w:right and bidi-aware w:start/w:end (w:start takes precedence)
6235
6351
  if (tblPrObj['w:tblCellMar']) {
6236
6352
  const cellMar = tblPrObj['w:tblCellMar'];
6237
6353
  const margins: { top?: number; bottom?: number; left?: number; right?: number } = {};
@@ -6244,12 +6360,14 @@ export class DocumentParser {
6244
6360
  const w = cellMar['w:bottom']['@_w:w'];
6245
6361
  if (w !== undefined) margins.bottom = parseInt(w, 10);
6246
6362
  }
6247
- if (cellMar['w:left']) {
6248
- const w = cellMar['w:left']['@_w:w'];
6363
+ const leftSource = cellMar['w:start'] || cellMar['w:left'];
6364
+ if (leftSource) {
6365
+ const w = leftSource['@_w:w'];
6249
6366
  if (w !== undefined) margins.left = parseInt(w, 10);
6250
6367
  }
6251
- if (cellMar['w:right']) {
6252
- const w = cellMar['w:right']['@_w:w'];
6368
+ const rightSource = cellMar['w:end'] || cellMar['w:right'];
6369
+ if (rightSource) {
6370
+ const w = rightSource['@_w:w'];
6253
6371
  if (w !== undefined) margins.right = parseInt(w, 10);
6254
6372
  }
6255
6373
 
@@ -6393,11 +6511,21 @@ export class DocumentParser {
6393
6511
  if (!trPrObj) return;
6394
6512
 
6395
6513
  // Parse row height (w:trHeight) per ECMA-376 Part 1 §17.4.81
6514
+ // Per §17.18.33 (ST_HeightRule), when w:hRule is absent the default is "auto"
6396
6515
  if (trPrObj['w:trHeight']) {
6397
6516
  const heightVal = parseInt(trPrObj['w:trHeight']['@_w:val'] || '0', 10);
6398
- const heightRule = trPrObj['w:trHeight']['@_w:hRule'] || 'atLeast';
6517
+ const heightRule = trPrObj['w:trHeight']['@_w:hRule'];
6399
6518
  if (heightVal > 0) {
6400
- row.setHeight(heightVal, heightRule);
6519
+ // Set height without defaulting hRule — setHeight defaults to 'atLeast'
6520
+ // so we set height first, then override the rule only if explicitly present
6521
+ row.setHeight(heightVal);
6522
+ if (heightRule) {
6523
+ row.setHeightRule(heightRule);
6524
+ } else {
6525
+ // When w:hRule is absent, clear the defaulted rule so the generator omits it,
6526
+ // preserving round-trip fidelity (absent = "auto" per ECMA-376 §17.18.33)
6527
+ row.setHeightRule(undefined);
6528
+ }
6401
6529
  }
6402
6530
  }
6403
6531
 
@@ -6637,6 +6765,7 @@ export class DocumentParser {
6637
6765
  }
6638
6766
 
6639
6767
  // Parse cell margins (w:tcMar) per ECMA-376 Part 1 §17.4.43
6768
+ // Supports both legacy w:left/w:right and bidi-aware w:start/w:end (w:start takes precedence)
6640
6769
  if (tcPr['w:tcMar']) {
6641
6770
  const tcMar = tcPr['w:tcMar'];
6642
6771
  const margins: any = {};
@@ -6647,11 +6776,13 @@ export class DocumentParser {
6647
6776
  if (tcMar['w:bottom']) {
6648
6777
  margins.bottom = parseInt(tcMar['w:bottom']['@_w:w'] || '0', 10);
6649
6778
  }
6650
- if (tcMar['w:left']) {
6651
- margins.left = parseInt(tcMar['w:left']['@_w:w'] || '0', 10);
6779
+ const leftSrc = tcMar['w:start'] || tcMar['w:left'];
6780
+ if (leftSrc) {
6781
+ margins.left = parseInt(leftSrc['@_w:w'] || '0', 10);
6652
6782
  }
6653
- if (tcMar['w:right']) {
6654
- margins.right = parseInt(tcMar['w:right']['@_w:w'] || '0', 10);
6783
+ const rightSrc = tcMar['w:end'] || tcMar['w:right'];
6784
+ if (rightSrc) {
6785
+ margins.right = parseInt(rightSrc['@_w:w'] || '0', 10);
6655
6786
  }
6656
6787
 
6657
6788
  if (Object.keys(margins).length > 0) {
@@ -7579,7 +7710,7 @@ export class DocumentParser {
7579
7710
  */
7580
7711
  private parseTOCFromSDTContent(
7581
7712
  content: any[],
7582
- properties: any,
7713
+ _properties: any,
7583
7714
  sdtContent: any
7584
7715
  ): TableOfContents | null {
7585
7716
  try {
@@ -8134,12 +8265,14 @@ export class DocumentParser {
8134
8265
  const width = XMLParser.extractAttribute(pgSz, 'w:w');
8135
8266
  const height = XMLParser.extractAttribute(pgSz, 'w:h');
8136
8267
  const orient = XMLParser.extractAttribute(pgSz, 'w:orient');
8268
+ const code = XMLParser.extractAttribute(pgSz, 'w:code');
8137
8269
 
8138
8270
  if (width && height) {
8139
8271
  sectionProps.pageSize = {
8140
8272
  width: parseInt(width, 10),
8141
8273
  height: parseInt(height, 10),
8142
8274
  orientation: orient === 'landscape' ? 'landscape' : 'portrait',
8275
+ code: code ? parseInt(code, 10) : undefined,
8143
8276
  };
8144
8277
  }
8145
8278
  }
@@ -8232,14 +8365,23 @@ export class DocumentParser {
8232
8365
  const equalWidth = XMLParser.extractAttribute(cols, 'w:equalWidth');
8233
8366
  const sep = XMLParser.extractAttribute(cols, 'w:sep');
8234
8367
 
8235
- // Extract individual column widths
8368
+ // Extract individual column widths and per-column spacing (CT_Column: w:w, w:space)
8236
8369
  const colElements = XMLParser.extractElements(cols, 'w:col');
8237
8370
  const columnWidths: number[] = [];
8371
+ const columnSpaces: number[] = [];
8372
+ let hasColumnSpaces = false;
8238
8373
  for (const col of colElements) {
8239
8374
  const width = XMLParser.extractAttribute(col, 'w:w');
8240
8375
  if (width) {
8241
8376
  columnWidths.push(parseInt(width.toString(), 10));
8242
8377
  }
8378
+ const colSpace = XMLParser.extractAttribute(col, 'w:space');
8379
+ if (colSpace) {
8380
+ columnSpaces.push(parseInt(colSpace.toString(), 10));
8381
+ hasColumnSpaces = true;
8382
+ } else {
8383
+ columnSpaces.push(0);
8384
+ }
8243
8385
  }
8244
8386
 
8245
8387
  // Helper to handle boolean conversion (XMLParser may return string or number)
@@ -8252,6 +8394,7 @@ export class DocumentParser {
8252
8394
  equalWidth: equalWidth ? toBool(equalWidth) : undefined,
8253
8395
  separator: sep ? toBool(sep) : undefined,
8254
8396
  columnWidths: columnWidths.length > 0 ? columnWidths : undefined,
8397
+ columnSpaces: hasColumnSpaces ? columnSpaces : undefined,
8255
8398
  };
8256
8399
  }
8257
8400
  }
@@ -8712,13 +8855,18 @@ export class DocumentParser {
8712
8855
  }
8713
8856
  }
8714
8857
 
8715
- // Parse spacing (w:spacing)
8858
+ // Parse spacing (w:spacing) — all 8 CT_Spacing attributes per ECMA-376 §17.3.1.33
8716
8859
  const spacingElement = XMLParser.extractSelfClosingTag(pPrXml, 'w:spacing');
8717
8860
  if (spacingElement) {
8718
- const before = XMLParser.extractAttribute(`<w:spacing${spacingElement}`, 'w:before');
8719
- const after = XMLParser.extractAttribute(`<w:spacing${spacingElement}`, 'w:after');
8720
- const line = XMLParser.extractAttribute(`<w:spacing${spacingElement}`, 'w:line');
8721
- const lineRule = XMLParser.extractAttribute(`<w:spacing${spacingElement}`, 'w:lineRule');
8861
+ const spacingTag = `<w:spacing${spacingElement}`;
8862
+ const before = XMLParser.extractAttribute(spacingTag, 'w:before');
8863
+ const after = XMLParser.extractAttribute(spacingTag, 'w:after');
8864
+ const line = XMLParser.extractAttribute(spacingTag, 'w:line');
8865
+ const lineRule = XMLParser.extractAttribute(spacingTag, 'w:lineRule');
8866
+ const beforeLines = XMLParser.extractAttribute(spacingTag, 'w:beforeLines');
8867
+ const afterLines = XMLParser.extractAttribute(spacingTag, 'w:afterLines');
8868
+ const beforeAutosp = XMLParser.extractAttribute(spacingTag, 'w:beforeAutospacing');
8869
+ const afterAutosp = XMLParser.extractAttribute(spacingTag, 'w:afterAutospacing');
8722
8870
 
8723
8871
  // Validate lineRule
8724
8872
  let validatedLineRule: 'auto' | 'exact' | 'atLeast' | undefined;
@@ -8735,20 +8883,33 @@ export class DocumentParser {
8735
8883
  // If lineRule exists without line, use default 240 twips
8736
8884
  line: line ? parseInt(line, 10) : validatedLineRule ? 240 : undefined,
8737
8885
  lineRule: validatedLineRule,
8886
+ beforeLines: beforeLines ? parseInt(beforeLines, 10) : undefined,
8887
+ afterLines: afterLines ? parseInt(afterLines, 10) : undefined,
8888
+ beforeAutospacing: beforeAutosp
8889
+ ? beforeAutosp === '1' || beforeAutosp === 'true'
8890
+ : undefined,
8891
+ afterAutospacing: afterAutosp ? afterAutosp === '1' || afterAutosp === 'true' : undefined,
8738
8892
  };
8739
8893
  }
8740
8894
 
8741
8895
  // Parse indentation (w:ind)
8896
+ // Per ECMA-376 §17.3.1.15: w:start/w:end are bidi-aware alternatives to w:left/w:right
8742
8897
  const indElement = XMLParser.extractSelfClosingTag(pPrXml, 'w:ind');
8743
8898
  if (indElement) {
8744
- const left = XMLParser.extractAttribute(`<w:ind${indElement}`, 'w:left');
8745
- const right = XMLParser.extractAttribute(`<w:ind${indElement}`, 'w:right');
8746
- const firstLine = XMLParser.extractAttribute(`<w:ind${indElement}`, 'w:firstLine');
8747
- const hanging = XMLParser.extractAttribute(`<w:ind${indElement}`, 'w:hanging');
8899
+ const indTag = `<w:ind${indElement}`;
8900
+ const start = XMLParser.extractAttribute(indTag, 'w:start');
8901
+ const left = XMLParser.extractAttribute(indTag, 'w:left');
8902
+ const end = XMLParser.extractAttribute(indTag, 'w:end');
8903
+ const right = XMLParser.extractAttribute(indTag, 'w:right');
8904
+ const firstLine = XMLParser.extractAttribute(indTag, 'w:firstLine');
8905
+ const hanging = XMLParser.extractAttribute(indTag, 'w:hanging');
8906
+
8907
+ const leftVal = start || left;
8908
+ const rightVal = end || right;
8748
8909
 
8749
8910
  formatting.indentation = {
8750
- left: left ? parseInt(left, 10) : undefined,
8751
- right: right ? parseInt(right, 10) : undefined,
8911
+ left: leftVal ? parseInt(leftVal, 10) : undefined,
8912
+ right: rightVal ? parseInt(rightVal, 10) : undefined,
8752
8913
  firstLine: firstLine ? parseInt(firstLine, 10) : undefined,
8753
8914
  hanging: hanging ? parseInt(hanging, 10) : undefined,
8754
8915
  };
@@ -8784,6 +8945,54 @@ export class DocumentParser {
8784
8945
  }
8785
8946
  }
8786
8947
 
8948
+ // Parse paragraph borders (w:pBdr) per ECMA-376 Part 1 §17.3.1.24
8949
+ const pBdrXml = XMLParser.extractBetweenTags(pPrXml, '<w:pBdr>', '</w:pBdr>');
8950
+ if (pBdrXml) {
8951
+ const borders: any = {};
8952
+ const borderTypes = ['top', 'left', 'bottom', 'right', 'between', 'bar'];
8953
+ for (const type of borderTypes) {
8954
+ if (pBdrXml.includes(`<w:${type}`)) {
8955
+ const tag = XMLParser.extractSelfClosingTag(pBdrXml, `w:${type}`);
8956
+ if (tag) {
8957
+ const bTag = `<w:${type}${tag}`;
8958
+ const style = XMLParser.extractAttribute(bTag, 'w:val');
8959
+ const size = XMLParser.extractAttribute(bTag, 'w:sz');
8960
+ const space = XMLParser.extractAttribute(bTag, 'w:space');
8961
+ const color = XMLParser.extractAttribute(bTag, 'w:color');
8962
+ const border: any = {};
8963
+ if (style) border.style = style;
8964
+ if (size) border.size = parseInt(size, 10);
8965
+ if (space) border.space = parseInt(space, 10);
8966
+ if (color) border.color = color;
8967
+ if (Object.keys(border).length > 0) borders[type] = border;
8968
+ }
8969
+ }
8970
+ }
8971
+ if (Object.keys(borders).length > 0) formatting.borders = borders;
8972
+ }
8973
+
8974
+ // Parse tab stops (w:tabs) per ECMA-376 Part 1 §17.3.1.38
8975
+ const tabsXml = XMLParser.extractBetweenTags(pPrXml, '<w:tabs>', '</w:tabs>');
8976
+ if (tabsXml) {
8977
+ const tabs: any[] = [];
8978
+ // Extract all w:tab elements
8979
+ const tabRegex = /<w:tab\s[^>]*\/>/g;
8980
+ let tabMatch;
8981
+ while ((tabMatch = tabRegex.exec(tabsXml)) !== null) {
8982
+ const tabTag = tabMatch[0];
8983
+ const pos = XMLParser.extractAttribute(tabTag, 'w:pos');
8984
+ const val = XMLParser.extractAttribute(tabTag, 'w:val');
8985
+ const leader = XMLParser.extractAttribute(tabTag, 'w:leader');
8986
+ if (pos) {
8987
+ const tab: any = { position: parseInt(pos, 10) };
8988
+ if (val) tab.val = val;
8989
+ if (leader) tab.leader = leader;
8990
+ tabs.push(tab);
8991
+ }
8992
+ }
8993
+ if (tabs.length > 0) formatting.tabs = tabs;
8994
+ }
8995
+
8787
8996
  // Parse shading (w:shd) per ECMA-376 Part 1 §17.3.1.32
8788
8997
  const shading = this.parseShadingFromXml(pPrXml);
8789
8998
  if (shading) {
@@ -8818,10 +9027,11 @@ export class DocumentParser {
8818
9027
  formatting.allCaps = true;
8819
9028
  }
8820
9029
 
8821
- // Parse underline - use extractSelfClosingTag for accuracy
9030
+ // Parse underline all attributes per ECMA-376 §17.3.2.40
8822
9031
  const uElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:u');
8823
9032
  if (uElement) {
8824
- const uVal = XMLParser.extractAttribute(`<w:u${uElement}`, 'w:val');
9033
+ const uTag = `<w:u${uElement}`;
9034
+ const uVal = XMLParser.extractAttribute(uTag, 'w:val');
8825
9035
  if (
8826
9036
  uVal === 'single' ||
8827
9037
  uVal === 'double' ||
@@ -8834,9 +9044,19 @@ export class DocumentParser {
8834
9044
  } else {
8835
9045
  formatting.underline = true;
8836
9046
  }
9047
+ const uColor = XMLParser.extractAttribute(uTag, 'w:color');
9048
+ if (uColor) formatting.underlineColor = uColor;
9049
+ const uThemeColor = XMLParser.extractAttribute(uTag, 'w:themeColor');
9050
+ if (uThemeColor) {
9051
+ formatting.underlineThemeColor = uThemeColor as import('../elements/Run').ThemeColorValue;
9052
+ }
9053
+ const uThemeTint = XMLParser.extractAttribute(uTag, 'w:themeTint');
9054
+ if (uThemeTint) formatting.underlineThemeTint = parseInt(uThemeTint, 16);
9055
+ const uThemeShade = XMLParser.extractAttribute(uTag, 'w:themeShade');
9056
+ if (uThemeShade) formatting.underlineThemeShade = parseInt(uThemeShade, 16);
8837
9057
  }
8838
9058
 
8839
- // Parse subscript/superscript - use extractSelfClosingTag
9059
+ // Parse subscript/superscript/baseline per ECMA-376 §17.18.96 (ST_VerticalAlignRun)
8840
9060
  const vertAlignElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:vertAlign');
8841
9061
  if (vertAlignElement) {
8842
9062
  const val = XMLParser.extractAttribute(`<w:vertAlign${vertAlignElement}`, 'w:val');
@@ -8844,16 +9064,33 @@ export class DocumentParser {
8844
9064
  formatting.subscript = true;
8845
9065
  } else if (val === 'superscript') {
8846
9066
  formatting.superscript = true;
9067
+ } else if (val === 'baseline') {
9068
+ formatting.vertAlignBaseline = true;
8847
9069
  }
8848
9070
  }
8849
9071
 
8850
- // Parse font (w:rFonts) - use extractSelfClosingTag
9072
+ // Parse font (w:rFonts) — all attributes per ECMA-376 §17.3.2.26
8851
9073
  const rFontsElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:rFonts');
8852
9074
  if (rFontsElement) {
8853
- const ascii = XMLParser.extractAttribute(`<w:rFonts${rFontsElement}`, 'w:ascii');
8854
- if (ascii) {
8855
- formatting.font = ascii;
8856
- }
9075
+ const rFontsTag = `<w:rFonts${rFontsElement}`;
9076
+ const ascii = XMLParser.extractAttribute(rFontsTag, 'w:ascii');
9077
+ if (ascii) formatting.font = ascii;
9078
+ const hAnsi = XMLParser.extractAttribute(rFontsTag, 'w:hAnsi');
9079
+ if (hAnsi) formatting.fontHAnsi = hAnsi;
9080
+ const eastAsia = XMLParser.extractAttribute(rFontsTag, 'w:eastAsia');
9081
+ if (eastAsia) formatting.fontEastAsia = eastAsia;
9082
+ const cs = XMLParser.extractAttribute(rFontsTag, 'w:cs');
9083
+ if (cs) formatting.fontCs = cs;
9084
+ const hint = XMLParser.extractAttribute(rFontsTag, 'w:hint');
9085
+ if (hint) formatting.fontHint = hint;
9086
+ const asciiTheme = XMLParser.extractAttribute(rFontsTag, 'w:asciiTheme');
9087
+ if (asciiTheme) formatting.fontAsciiTheme = asciiTheme;
9088
+ const hAnsiTheme = XMLParser.extractAttribute(rFontsTag, 'w:hAnsiTheme');
9089
+ if (hAnsiTheme) formatting.fontHAnsiTheme = hAnsiTheme;
9090
+ const eastAsiaTheme = XMLParser.extractAttribute(rFontsTag, 'w:eastAsiaTheme');
9091
+ if (eastAsiaTheme) formatting.fontEastAsiaTheme = eastAsiaTheme;
9092
+ const cstheme = XMLParser.extractAttribute(rFontsTag, 'w:cstheme');
9093
+ if (cstheme) formatting.fontCsTheme = cstheme;
8857
9094
  }
8858
9095
 
8859
9096
  // Parse size (w:sz) - size is in half-points
@@ -8866,14 +9103,38 @@ export class DocumentParser {
8866
9103
  }
8867
9104
  }
8868
9105
 
8869
- // Parse color (w:color)
8870
- // Use extractSelfClosingTag to avoid matching other tags
9106
+ // Parse complex script size (w:szCs) per ECMA-376 §17.3.2.40
9107
+ const szCsElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:szCs');
9108
+ if (szCsElement) {
9109
+ const val = XMLParser.extractAttribute(`<w:szCs${szCsElement}`, 'w:val');
9110
+ if (val) {
9111
+ const szCsVal = halfPointsToPoints(parseInt(val, 10));
9112
+ if (formatting.size === undefined || szCsVal !== formatting.size) {
9113
+ formatting.sizeCs = szCsVal;
9114
+ }
9115
+ }
9116
+ }
9117
+
9118
+ // Parse color (w:color) — all attributes per ECMA-376 §17.3.2.6
8871
9119
  const colorElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:color');
8872
9120
  if (colorElement) {
8873
- const val = XMLParser.extractAttribute(`<w:color${colorElement}`, 'w:val');
9121
+ const colorTag = `<w:color${colorElement}`;
9122
+ const val = XMLParser.extractAttribute(colorTag, 'w:val');
8874
9123
  if (val && val !== 'auto') {
8875
9124
  formatting.color = val;
8876
9125
  }
9126
+ const themeColor = XMLParser.extractAttribute(colorTag, 'w:themeColor');
9127
+ if (themeColor) {
9128
+ formatting.themeColor = themeColor as import('../elements/Run').ThemeColorValue;
9129
+ }
9130
+ const themeTint = XMLParser.extractAttribute(colorTag, 'w:themeTint');
9131
+ if (themeTint) {
9132
+ formatting.themeTint = parseInt(themeTint, 16);
9133
+ }
9134
+ const themeShade = XMLParser.extractAttribute(colorTag, 'w:themeShade');
9135
+ if (themeShade) {
9136
+ formatting.themeShade = parseInt(themeShade, 16);
9137
+ }
8877
9138
  }
8878
9139
 
8879
9140
  // Parse highlight (w:highlight) - use extractSelfClosingTag
@@ -8898,6 +9159,7 @@ export class DocumentParser {
8898
9159
  'lightGray',
8899
9160
  'black',
8900
9161
  'white',
9162
+ 'none',
8901
9163
  ];
8902
9164
  if (validHighlights.includes(val)) {
8903
9165
  formatting.highlight = val as
@@ -8916,7 +9178,8 @@ export class DocumentParser {
8916
9178
  | 'darkGray'
8917
9179
  | 'lightGray'
8918
9180
  | 'black'
8919
- | 'white';
9181
+ | 'white'
9182
+ | 'none';
8920
9183
  }
8921
9184
  }
8922
9185
  }
@@ -8994,14 +9257,19 @@ export class DocumentParser {
8994
9257
  ): import('../formatting/Style').TableStyleFormatting {
8995
9258
  const formatting: import('../formatting/Style').TableStyleFormatting = {};
8996
9259
 
8997
- // Parse indent
9260
+ // Parse indent (w:tblInd) — preserve w:type per ECMA-376 ST_TblWidth
8998
9261
  if (tblPrXml.includes('<w:tblInd')) {
8999
9262
  const tag = XMLParser.extractSelfClosingTag(tblPrXml, 'w:tblInd');
9000
9263
  if (tag) {
9001
- const w = XMLParser.extractAttribute(`<w:tblInd${tag}`, 'w:w');
9264
+ const tblIndTag = `<w:tblInd${tag}`;
9265
+ const w = XMLParser.extractAttribute(tblIndTag, 'w:w');
9002
9266
  if (w) {
9003
9267
  formatting.indent = parseInt(w, 10);
9004
9268
  }
9269
+ const type = XMLParser.extractAttribute(tblIndTag, 'w:type');
9270
+ if (type) {
9271
+ formatting.indentType = type as import('../elements/Table').TableWidthType;
9272
+ }
9005
9273
  }
9006
9274
  }
9007
9275
 
@@ -9311,19 +9579,43 @@ export class DocumentParser {
9311
9579
  ): import('../formatting/Style').CellMargins | undefined {
9312
9580
  const margins: import('../formatting/Style').CellMargins = {};
9313
9581
 
9314
- const marginTypes = ['top', 'bottom', 'left', 'right'];
9315
- for (const type of marginTypes) {
9582
+ // Parse top and bottom directly
9583
+ for (const type of ['top', 'bottom'] as const) {
9316
9584
  if (marginXml.includes(`<w:${type}`)) {
9317
9585
  const tag = XMLParser.extractSelfClosingTag(marginXml, `w:${type}`);
9318
9586
  if (tag) {
9319
9587
  const w = XMLParser.extractAttribute(`<w:${type}${tag}`, 'w:w');
9320
9588
  if (w) {
9321
- margins[type as keyof import('../formatting/Style').CellMargins] = parseInt(w, 10);
9589
+ margins[type] = parseInt(w, 10);
9322
9590
  }
9323
9591
  }
9324
9592
  }
9325
9593
  }
9326
9594
 
9595
+ // Parse left/right with bidi-aware w:start/w:end fallback (ECMA-376 §17.4.42/§17.4.43)
9596
+ // w:start takes precedence over w:left; w:end takes precedence over w:right
9597
+ const leftTag = marginXml.includes('<w:start')
9598
+ ? XMLParser.extractSelfClosingTag(marginXml, 'w:start')
9599
+ : XMLParser.extractSelfClosingTag(marginXml, 'w:left');
9600
+ if (leftTag) {
9601
+ const tagName = marginXml.includes('<w:start') ? 'w:start' : 'w:left';
9602
+ const w = XMLParser.extractAttribute(`<${tagName}${leftTag}`, 'w:w');
9603
+ if (w) {
9604
+ margins.left = parseInt(w, 10);
9605
+ }
9606
+ }
9607
+
9608
+ const rightTag = marginXml.includes('<w:end')
9609
+ ? XMLParser.extractSelfClosingTag(marginXml, 'w:end')
9610
+ : XMLParser.extractSelfClosingTag(marginXml, 'w:right');
9611
+ if (rightTag) {
9612
+ const tagName = marginXml.includes('<w:end') ? 'w:end' : 'w:right';
9613
+ const w = XMLParser.extractAttribute(`<${tagName}${rightTag}`, 'w:w');
9614
+ if (w) {
9615
+ margins.right = parseInt(w, 10);
9616
+ }
9617
+ }
9618
+
9327
9619
  return Object.keys(margins).length > 0 ? margins : undefined;
9328
9620
  }
9329
9621
 
@@ -9834,6 +10126,40 @@ export class DocumentParser {
9834
10126
  if (propsObj['w:tblStyle']) {
9835
10127
  result.style = propsObj['w:tblStyle']['@_w:val'] || '';
9836
10128
  }
10129
+ // tblpPr (floating table position)
10130
+ if (propsObj['w:tblpPr']) {
10131
+ const tblpPr = propsObj['w:tblpPr'];
10132
+ const pos: any = {};
10133
+ if (tblpPr['@_w:tblpX']) pos.x = parseInt(tblpPr['@_w:tblpX'], 10);
10134
+ if (tblpPr['@_w:tblpY']) pos.y = parseInt(tblpPr['@_w:tblpY'], 10);
10135
+ if (tblpPr['@_w:horzAnchor']) pos.horizontalAnchor = tblpPr['@_w:horzAnchor'];
10136
+ if (tblpPr['@_w:vertAnchor']) pos.verticalAnchor = tblpPr['@_w:vertAnchor'];
10137
+ if (tblpPr['@_w:leftFromText']) pos.leftFromText = parseInt(tblpPr['@_w:leftFromText'], 10);
10138
+ if (tblpPr['@_w:rightFromText'])
10139
+ pos.rightFromText = parseInt(tblpPr['@_w:rightFromText'], 10);
10140
+ if (tblpPr['@_w:topFromText']) pos.topFromText = parseInt(tblpPr['@_w:topFromText'], 10);
10141
+ if (tblpPr['@_w:bottomFromText'])
10142
+ pos.bottomFromText = parseInt(tblpPr['@_w:bottomFromText'], 10);
10143
+ if (Object.keys(pos).length > 0) result.position = pos;
10144
+ }
10145
+ if (propsObj['w:tblOverlap']) {
10146
+ result.overlap = propsObj['w:tblOverlap']['@_w:val'];
10147
+ }
10148
+ if (propsObj['w:bidiVisual']) {
10149
+ result.bidiVisual = true;
10150
+ }
10151
+ if (propsObj['w:tblStyleRowBandSize']) {
10152
+ result.tblStyleRowBandSize = parseInt(
10153
+ propsObj['w:tblStyleRowBandSize']['@_w:val'] || '1',
10154
+ 10
10155
+ );
10156
+ }
10157
+ if (propsObj['w:tblStyleColBandSize']) {
10158
+ result.tblStyleColBandSize = parseInt(
10159
+ propsObj['w:tblStyleColBandSize']['@_w:val'] || '1',
10160
+ 10
10161
+ );
10162
+ }
9837
10163
  if (propsObj['w:tblW']) {
9838
10164
  result.width = parseInt(propsObj['w:tblW']['@_w:w'] || '0', 10);
9839
10165
  result.widthType = propsObj['w:tblW']['@_w:type'] || 'dxa';
@@ -9843,9 +10169,24 @@ export class DocumentParser {
9843
10169
  }
9844
10170
  if (propsObj['w:tblInd']) {
9845
10171
  result.indent = parseInt(propsObj['w:tblInd']['@_w:w'] || '0', 10);
10172
+ const indType = propsObj['w:tblInd']['@_w:type'];
10173
+ if (indType) result.indentType = indType;
9846
10174
  }
9847
10175
  if (propsObj['w:tblCellSpacing']) {
9848
10176
  result.cellSpacing = parseInt(propsObj['w:tblCellSpacing']['@_w:w'] || '0', 10);
10177
+ const csType = propsObj['w:tblCellSpacing']['@_w:type'];
10178
+ if (csType) result.cellSpacingType = csType;
10179
+ }
10180
+ if (propsObj['w:tblCellMar']) {
10181
+ const cellMar = propsObj['w:tblCellMar'];
10182
+ const margins: any = {};
10183
+ if (cellMar['w:top']) margins.top = parseInt(cellMar['w:top']['@_w:w'] || '0', 10);
10184
+ if (cellMar['w:bottom']) margins.bottom = parseInt(cellMar['w:bottom']['@_w:w'] || '0', 10);
10185
+ const leftSrc = cellMar['w:start'] || cellMar['w:left'];
10186
+ if (leftSrc) margins.left = parseInt(leftSrc['@_w:w'] || '0', 10);
10187
+ const rightSrc = cellMar['w:end'] || cellMar['w:right'];
10188
+ if (rightSrc) margins.right = parseInt(rightSrc['@_w:w'] || '0', 10);
10189
+ if (Object.keys(margins).length > 0) result.cellMargins = margins;
9849
10190
  }
9850
10191
  if (propsObj['w:tblBorders']) {
9851
10192
  const borders: any = {};
@@ -9857,8 +10198,40 @@ export class DocumentParser {
9857
10198
  }
9858
10199
  if (Object.keys(borders).length > 0) result.borders = borders;
9859
10200
  }
10201
+ if (propsObj['w:tblLook']) {
10202
+ const look = propsObj['w:tblLook'];
10203
+ result.tblLook = look['@_w:val'] || '0000';
10204
+ }
10205
+ if (propsObj['w:tblCaption']) {
10206
+ result.caption = propsObj['w:tblCaption']['@_w:val'];
10207
+ }
10208
+ if (propsObj['w:tblDescription']) {
10209
+ result.description = propsObj['w:tblDescription']['@_w:val'];
10210
+ }
9860
10211
 
9861
- // Row-level properties (w:trPr context)
10212
+ // Row-level properties (w:trPr context) — all CT_TrPr elements
10213
+ if (propsObj['w:cnfStyle']) {
10214
+ result.cnfStyle = propsObj['w:cnfStyle']['@_w:val'];
10215
+ }
10216
+ if (propsObj['w:divId']) {
10217
+ result.divId = propsObj['w:divId']['@_w:val'];
10218
+ }
10219
+ if (propsObj['w:gridBefore']) {
10220
+ result.gridBefore = parseInt(propsObj['w:gridBefore']['@_w:val'] || '0', 10);
10221
+ }
10222
+ if (propsObj['w:gridAfter']) {
10223
+ result.gridAfter = parseInt(propsObj['w:gridAfter']['@_w:val'] || '0', 10);
10224
+ }
10225
+ if (propsObj['w:wBefore']) {
10226
+ result.wBefore = parseInt(propsObj['w:wBefore']['@_w:w'] || '0', 10);
10227
+ const wbType = propsObj['w:wBefore']['@_w:type'];
10228
+ if (wbType) result.wBeforeType = wbType;
10229
+ }
10230
+ if (propsObj['w:wAfter']) {
10231
+ result.wAfter = parseInt(propsObj['w:wAfter']['@_w:w'] || '0', 10);
10232
+ const waType = propsObj['w:wAfter']['@_w:type'];
10233
+ if (waType) result.wAfterType = waType;
10234
+ }
9862
10235
  if (propsObj['w:trHeight']) {
9863
10236
  result.height = parseInt(propsObj['w:trHeight']['@_w:val'] || '0', 10);
9864
10237
  const rule = propsObj['w:trHeight']['@_w:hRule'];
@@ -9874,13 +10247,19 @@ export class DocumentParser {
9874
10247
  result.hidden = true;
9875
10248
  }
9876
10249
 
9877
- // Cell-level properties (w:tcPr context)
10250
+ // Cell-level properties (w:tcPr context) — all CT_TcPr elements
9878
10251
  if (propsObj['w:tcW']) {
9879
10252
  result.width = parseInt(propsObj['w:tcW']['@_w:w'] || '0', 10);
9880
10253
  result.widthType = propsObj['w:tcW']['@_w:type'] || 'dxa';
9881
10254
  }
9882
- if (propsObj['w:vAlign']) {
9883
- result.verticalAlignment = propsObj['w:vAlign']['@_w:val'];
10255
+ if (propsObj['w:gridSpan']) {
10256
+ result.columnSpan = parseInt(propsObj['w:gridSpan']['@_w:val'] || '1', 10);
10257
+ }
10258
+ if (propsObj['w:hMerge']) {
10259
+ result.hMerge = propsObj['w:hMerge']['@_w:val'] || 'continue';
10260
+ }
10261
+ if (propsObj['w:vMerge']) {
10262
+ result.vMerge = propsObj['w:vMerge']['@_w:val'] || 'continue';
9884
10263
  }
9885
10264
  if (propsObj['w:tcBorders']) {
9886
10265
  const borders: any = {};
@@ -9892,6 +10271,35 @@ export class DocumentParser {
9892
10271
  }
9893
10272
  if (Object.keys(borders).length > 0) result.borders = borders;
9894
10273
  }
10274
+ if (propsObj['w:noWrap']) {
10275
+ result.noWrap = true;
10276
+ }
10277
+ if (propsObj['w:tcMar']) {
10278
+ const tcMar = propsObj['w:tcMar'];
10279
+ const margins: any = {};
10280
+ if (tcMar['w:top']) margins.top = parseInt(tcMar['w:top']['@_w:w'] || '0', 10);
10281
+ if (tcMar['w:bottom']) margins.bottom = parseInt(tcMar['w:bottom']['@_w:w'] || '0', 10);
10282
+ const leftSrc = tcMar['w:start'] || tcMar['w:left'];
10283
+ if (leftSrc) margins.left = parseInt(leftSrc['@_w:w'] || '0', 10);
10284
+ const rightSrc = tcMar['w:end'] || tcMar['w:right'];
10285
+ if (rightSrc) margins.right = parseInt(rightSrc['@_w:w'] || '0', 10);
10286
+ if (Object.keys(margins).length > 0) result.margins = margins;
10287
+ }
10288
+ if (propsObj['w:textDirection']) {
10289
+ result.textDirection = propsObj['w:textDirection']['@_w:val'];
10290
+ }
10291
+ if (propsObj['w:tcFitText']) {
10292
+ result.fitText = true;
10293
+ }
10294
+ if (propsObj['w:vAlign']) {
10295
+ result.verticalAlignment = propsObj['w:vAlign']['@_w:val'];
10296
+ }
10297
+ if (propsObj['w:hideMark']) {
10298
+ result.hideMark = true;
10299
+ }
10300
+ if (propsObj['w:cnfStyle']) {
10301
+ result.cnfStyle = propsObj['w:cnfStyle']['@_w:val'];
10302
+ }
9895
10303
 
9896
10304
  // Shared properties (appear in multiple contexts)
9897
10305
  if (propsObj['w:jc']) {
@@ -9924,11 +10332,13 @@ export class DocumentParser {
9924
10332
  const width = XMLParser.extractAttribute(pgSz, 'w:w');
9925
10333
  const height = XMLParser.extractAttribute(pgSz, 'w:h');
9926
10334
  const orient = XMLParser.extractAttribute(pgSz, 'w:orient');
10335
+ const code = XMLParser.extractAttribute(pgSz, 'w:code');
9927
10336
  if (width || height) {
9928
10337
  result.pageSize = {
9929
10338
  width: width ? parseInt(width, 10) : undefined,
9930
10339
  height: height ? parseInt(height, 10) : undefined,
9931
10340
  orientation: orient === 'landscape' ? 'landscape' : 'portrait',
10341
+ code: code ? parseInt(code, 10) : undefined,
9932
10342
  };
9933
10343
  }
9934
10344
  }
@@ -9960,6 +10370,34 @@ export class DocumentParser {
9960
10370
  if (val) result.type = val;
9961
10371
  }
9962
10372
 
10373
+ // Line numbering
10374
+ const lnNumElements = XMLParser.extractElements(sectPrXml, 'w:lnNumType');
10375
+ if (lnNumElements.length > 0 && lnNumElements[0]) {
10376
+ const ln = lnNumElements[0];
10377
+ const lnObj: any = {};
10378
+ const countBy = XMLParser.extractAttribute(ln, 'w:countBy');
10379
+ if (countBy) lnObj.countBy = parseInt(countBy, 10);
10380
+ const start = XMLParser.extractAttribute(ln, 'w:start');
10381
+ if (start) lnObj.start = parseInt(start, 10);
10382
+ const restart = XMLParser.extractAttribute(ln, 'w:restart');
10383
+ if (restart) lnObj.restart = restart;
10384
+ const distance = XMLParser.extractAttribute(ln, 'w:distance');
10385
+ if (distance) lnObj.distance = parseInt(distance, 10);
10386
+ if (Object.keys(lnObj).length > 0) result.lineNumbering = lnObj;
10387
+ }
10388
+
10389
+ // Page numbering
10390
+ const pgNumElements = XMLParser.extractElements(sectPrXml, 'w:pgNumType');
10391
+ if (pgNumElements.length > 0 && pgNumElements[0]) {
10392
+ const pn = pgNumElements[0];
10393
+ const pnObj: any = {};
10394
+ const pnStart = XMLParser.extractAttribute(pn, 'w:start');
10395
+ if (pnStart) pnObj.start = parseInt(pnStart, 10);
10396
+ const fmt = XMLParser.extractAttribute(pn, 'w:fmt');
10397
+ if (fmt) pnObj.format = fmt;
10398
+ if (Object.keys(pnObj).length > 0) result.pageNumbering = pnObj;
10399
+ }
10400
+
9963
10401
  // Columns
9964
10402
  const colsElements = XMLParser.extractElements(sectPrXml, 'w:cols');
9965
10403
  if (colsElements.length > 0 && colsElements[0]) {
@@ -9974,6 +10412,49 @@ export class DocumentParser {
9974
10412
  }
9975
10413
  }
9976
10414
 
10415
+ // Form protection
10416
+ if (sectPrXml.includes('<w:formProt')) result.formProt = true;
10417
+
10418
+ // Vertical alignment
10419
+ const vAlignElements = XMLParser.extractElements(sectPrXml, 'w:vAlign');
10420
+ if (vAlignElements.length > 0 && vAlignElements[0]) {
10421
+ const val = XMLParser.extractAttribute(vAlignElements[0], 'w:val');
10422
+ if (val) result.verticalAlignment = val;
10423
+ }
10424
+
10425
+ // Suppress endnotes
10426
+ if (sectPrXml.includes('<w:noEndnote')) result.noEndnote = true;
10427
+
10428
+ // Title page
10429
+ if (sectPrXml.includes('<w:titlePg')) result.titlePage = true;
10430
+
10431
+ // Text direction
10432
+ const textDirElements = XMLParser.extractElements(sectPrXml, 'w:textDirection');
10433
+ if (textDirElements.length > 0 && textDirElements[0]) {
10434
+ const val = XMLParser.extractAttribute(textDirElements[0], 'w:val');
10435
+ if (val) result.textDirection = val;
10436
+ }
10437
+
10438
+ // Bidi section
10439
+ if (sectPrXml.includes('<w:bidi')) result.bidi = true;
10440
+
10441
+ // RTL gutter
10442
+ if (sectPrXml.includes('<w:rtlGutter')) result.rtlGutter = true;
10443
+
10444
+ // Document grid
10445
+ const docGridElements = XMLParser.extractElements(sectPrXml, 'w:docGrid');
10446
+ if (docGridElements.length > 0 && docGridElements[0]) {
10447
+ const dg = docGridElements[0];
10448
+ const dgObj: any = {};
10449
+ const dgType = XMLParser.extractAttribute(dg, 'w:type');
10450
+ if (dgType) dgObj.type = dgType;
10451
+ const linePitch = XMLParser.extractAttribute(dg, 'w:linePitch');
10452
+ if (linePitch) dgObj.linePitch = parseInt(linePitch, 10);
10453
+ const charSpace = XMLParser.extractAttribute(dg, 'w:charSpace');
10454
+ if (charSpace) dgObj.charSpace = parseInt(charSpace, 10);
10455
+ if (Object.keys(dgObj).length > 0) result.docGrid = dgObj;
10456
+ }
10457
+
9977
10458
  return result;
9978
10459
  }
9979
10460
  }