docxmlater 10.3.6 → 10.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/README.md +158 -7
  2. package/dist/core/Document.d.ts +98 -3
  3. package/dist/core/Document.d.ts.map +1 -1
  4. package/dist/core/Document.js +740 -50
  5. package/dist/core/Document.js.map +1 -1
  6. package/dist/core/DocumentContent.d.ts.map +1 -1
  7. package/dist/core/DocumentContent.js +0 -8
  8. package/dist/core/DocumentContent.js.map +1 -1
  9. package/dist/core/DocumentGenerator.d.ts.map +1 -1
  10. package/dist/core/DocumentGenerator.js +9 -5
  11. package/dist/core/DocumentGenerator.js.map +1 -1
  12. package/dist/core/DocumentParser.d.ts.map +1 -1
  13. package/dist/core/DocumentParser.js +617 -104
  14. package/dist/core/DocumentParser.js.map +1 -1
  15. package/dist/core/RelationshipManager.d.ts.map +1 -1
  16. package/dist/core/RelationshipManager.js +4 -3
  17. package/dist/core/RelationshipManager.js.map +1 -1
  18. package/dist/elements/Bookmark.d.ts +7 -0
  19. package/dist/elements/Bookmark.d.ts.map +1 -1
  20. package/dist/elements/Bookmark.js +24 -4
  21. package/dist/elements/Bookmark.js.map +1 -1
  22. package/dist/elements/BookmarkManager.d.ts.map +1 -1
  23. package/dist/elements/BookmarkManager.js +4 -3
  24. package/dist/elements/BookmarkManager.js.map +1 -1
  25. package/dist/elements/CommonTypes.d.ts +2 -2
  26. package/dist/elements/CommonTypes.d.ts.map +1 -1
  27. package/dist/elements/CommonTypes.js +14 -1
  28. package/dist/elements/CommonTypes.js.map +1 -1
  29. package/dist/elements/Field.d.ts +1 -1
  30. package/dist/elements/Field.d.ts.map +1 -1
  31. package/dist/elements/Field.js +1 -1
  32. package/dist/elements/Field.js.map +1 -1
  33. package/dist/elements/Footer.d.ts +2 -0
  34. package/dist/elements/Footer.d.ts.map +1 -1
  35. package/dist/elements/Footer.js +6 -0
  36. package/dist/elements/Footer.js.map +1 -1
  37. package/dist/elements/Header.d.ts +2 -0
  38. package/dist/elements/Header.d.ts.map +1 -1
  39. package/dist/elements/Header.js +6 -0
  40. package/dist/elements/Header.js.map +1 -1
  41. package/dist/elements/Image.d.ts.map +1 -1
  42. package/dist/elements/Image.js +3 -0
  43. package/dist/elements/Image.js.map +1 -1
  44. package/dist/elements/Paragraph.d.ts +81 -1
  45. package/dist/elements/Paragraph.d.ts.map +1 -1
  46. package/dist/elements/Paragraph.js +515 -21
  47. package/dist/elements/Paragraph.js.map +1 -1
  48. package/dist/elements/Revision.d.ts +0 -1
  49. package/dist/elements/Revision.d.ts.map +1 -1
  50. package/dist/elements/Revision.js +0 -12
  51. package/dist/elements/Revision.js.map +1 -1
  52. package/dist/elements/RevisionManager.d.ts +0 -1
  53. package/dist/elements/RevisionManager.d.ts.map +1 -1
  54. package/dist/elements/RevisionManager.js +0 -2
  55. package/dist/elements/RevisionManager.js.map +1 -1
  56. package/dist/elements/Run.d.ts +16 -4
  57. package/dist/elements/Run.d.ts.map +1 -1
  58. package/dist/elements/Run.js +114 -22
  59. package/dist/elements/Run.js.map +1 -1
  60. package/dist/elements/Section.d.ts +7 -1
  61. package/dist/elements/Section.d.ts.map +1 -1
  62. package/dist/elements/Section.js +185 -4
  63. package/dist/elements/Section.js.map +1 -1
  64. package/dist/elements/Shape.js.map +1 -1
  65. package/dist/elements/Table.d.ts +30 -1
  66. package/dist/elements/Table.d.ts.map +1 -1
  67. package/dist/elements/Table.js +357 -40
  68. package/dist/elements/Table.js.map +1 -1
  69. package/dist/elements/TableCell.d.ts +3 -0
  70. package/dist/elements/TableCell.d.ts.map +1 -1
  71. package/dist/elements/TableCell.js +30 -3
  72. package/dist/elements/TableCell.js.map +1 -1
  73. package/dist/elements/TableGridChange.d.ts +0 -1
  74. package/dist/elements/TableGridChange.d.ts.map +1 -1
  75. package/dist/elements/TableGridChange.js +0 -10
  76. package/dist/elements/TableGridChange.js.map +1 -1
  77. package/dist/elements/TableRow.d.ts +4 -0
  78. package/dist/elements/TableRow.d.ts.map +1 -1
  79. package/dist/elements/TableRow.js +31 -3
  80. package/dist/elements/TableRow.js.map +1 -1
  81. package/dist/formatting/AbstractNumbering.d.ts +5 -0
  82. package/dist/formatting/AbstractNumbering.d.ts.map +1 -1
  83. package/dist/formatting/AbstractNumbering.js +22 -0
  84. package/dist/formatting/AbstractNumbering.js.map +1 -1
  85. package/dist/formatting/NumberingLevel.d.ts.map +1 -1
  86. package/dist/formatting/NumberingLevel.js +3 -3
  87. package/dist/formatting/NumberingLevel.js.map +1 -1
  88. package/dist/formatting/Style.d.ts +1 -0
  89. package/dist/formatting/Style.d.ts.map +1 -1
  90. package/dist/formatting/Style.js +25 -59
  91. package/dist/formatting/Style.js.map +1 -1
  92. package/dist/formatting/StylesManager.d.ts +1 -0
  93. package/dist/formatting/StylesManager.d.ts.map +1 -1
  94. package/dist/formatting/StylesManager.js +12 -0
  95. package/dist/formatting/StylesManager.js.map +1 -1
  96. package/dist/helpers/CleanupHelper.js.map +1 -1
  97. package/dist/images/ImageOptimizer.d.ts.map +1 -1
  98. package/dist/images/ImageOptimizer.js +0 -1
  99. package/dist/images/ImageOptimizer.js.map +1 -1
  100. package/dist/index.d.ts +1 -1
  101. package/dist/index.d.ts.map +1 -1
  102. package/dist/index.js.map +1 -1
  103. package/dist/managers/DrawingManager.d.ts.map +1 -1
  104. package/dist/managers/DrawingManager.js +4 -2
  105. package/dist/managers/DrawingManager.js.map +1 -1
  106. package/dist/types/formatting.d.ts +2 -2
  107. package/dist/types/formatting.d.ts.map +1 -1
  108. package/dist/types/formatting.js.map +1 -1
  109. package/dist/utils/ChangelogGenerator.d.ts +2 -2
  110. package/dist/utils/ChangelogGenerator.d.ts.map +1 -1
  111. package/dist/utils/ChangelogGenerator.js +4 -5
  112. package/dist/utils/ChangelogGenerator.js.map +1 -1
  113. package/dist/utils/InMemoryRevisionAcceptor.d.ts.map +1 -1
  114. package/dist/utils/InMemoryRevisionAcceptor.js +0 -1
  115. package/dist/utils/InMemoryRevisionAcceptor.js.map +1 -1
  116. package/dist/utils/RevisionAwareProcessor.d.ts +2 -2
  117. package/dist/utils/RevisionAwareProcessor.d.ts.map +1 -1
  118. package/dist/utils/RevisionAwareProcessor.js +2 -2
  119. package/dist/utils/RevisionAwareProcessor.js.map +1 -1
  120. package/dist/utils/SelectiveRevisionAcceptor.d.ts +0 -2
  121. package/dist/utils/SelectiveRevisionAcceptor.d.ts.map +1 -1
  122. package/dist/utils/SelectiveRevisionAcceptor.js +0 -26
  123. package/dist/utils/SelectiveRevisionAcceptor.js.map +1 -1
  124. package/dist/utils/ShadingResolver.d.ts.map +1 -1
  125. package/dist/utils/ShadingResolver.js.map +1 -1
  126. package/dist/utils/acceptRevisions.js +1 -1
  127. package/dist/utils/acceptRevisions.js.map +1 -1
  128. package/dist/utils/stripTrackedChanges.js +1 -1
  129. package/dist/utils/stripTrackedChanges.js.map +1 -1
  130. package/dist/utils/units.d.ts.map +1 -1
  131. package/dist/utils/units.js +1 -1
  132. package/dist/utils/units.js.map +1 -1
  133. package/dist/validation/RevisionAutoFixer.d.ts +2 -1
  134. package/dist/validation/RevisionAutoFixer.d.ts.map +1 -1
  135. package/dist/validation/RevisionAutoFixer.js.map +1 -1
  136. package/package.json +10 -1
  137. package/src/constants/CLAUDE.md +28 -0
  138. package/src/core/CLAUDE.md +4 -0
  139. package/src/core/Document.ts +1770 -83
  140. package/src/core/DocumentContent.ts +0 -11
  141. package/src/core/DocumentGenerator.ts +11 -12
  142. package/src/core/DocumentParser.ts +654 -141
  143. package/src/core/RelationshipManager.ts +6 -3
  144. package/src/elements/Bookmark.ts +39 -4
  145. package/src/elements/BookmarkManager.ts +4 -3
  146. package/src/elements/CLAUDE.md +18 -2
  147. package/src/elements/CommonTypes.ts +35 -8
  148. package/src/elements/Field.ts +1 -1
  149. package/src/elements/Footer.ts +23 -0
  150. package/src/elements/Header.ts +25 -0
  151. package/src/elements/Image.ts +5 -0
  152. package/src/elements/Paragraph.ts +1069 -41
  153. package/src/elements/Revision.ts +0 -19
  154. package/src/elements/RevisionManager.ts +1 -3
  155. package/src/elements/Run.ts +265 -35
  156. package/src/elements/Section.ts +214 -8
  157. package/src/elements/Shape.ts +1 -1
  158. package/src/elements/Table.ts +850 -61
  159. package/src/elements/TableCell.ts +84 -10
  160. package/src/elements/TableGridChange.ts +2 -16
  161. package/src/elements/TableRow.ts +94 -9
  162. package/src/formatting/AbstractNumbering.ts +42 -1
  163. package/src/formatting/CLAUDE.md +4 -0
  164. package/src/formatting/NumberingLevel.ts +11 -7
  165. package/src/formatting/Style.ts +39 -71
  166. package/src/formatting/StylesManager.ts +36 -0
  167. package/src/helpers/CleanupHelper.ts +1 -1
  168. package/src/images/ImageOptimizer.ts +0 -3
  169. package/src/index.ts +1 -1
  170. package/src/managers/DrawingManager.ts +5 -3
  171. package/src/tracking/CLAUDE.md +30 -0
  172. package/src/types/CLAUDE.md +39 -0
  173. package/src/types/formatting.ts +2 -2
  174. package/src/utils/CLAUDE.md +15 -0
  175. package/src/utils/ChangelogGenerator.ts +4 -5
  176. package/src/utils/InMemoryRevisionAcceptor.ts +0 -9
  177. package/src/utils/RevisionAwareProcessor.ts +2 -3
  178. package/src/utils/SelectiveRevisionAcceptor.ts +0 -39
  179. package/src/utils/ShadingResolver.ts +0 -1
  180. package/src/utils/acceptRevisions.ts +1 -1
  181. package/src/utils/stripTrackedChanges.ts +1 -1
  182. package/src/utils/units.ts +2 -1
  183. package/src/validation/CLAUDE.md +40 -0
  184. package/src/validation/RevisionAutoFixer.ts +2 -1
@@ -31,12 +31,13 @@ import {
31
31
  RunContent,
32
32
  RunFormatting,
33
33
  } from '../elements/Run';
34
- import { PageNumberFormat, Section, SectionProperties, SectionType } from '../elements/Section';
34
+ import { Section, SectionProperties, SectionType } from '../elements/Section';
35
35
  import { StructuredDocumentTag } from '../elements/StructuredDocumentTag';
36
36
  import { Table, TableBorder } from '../elements/Table';
37
37
  import { TableCell } from '../elements/TableCell';
38
38
  import { TableOfContents } from '../elements/TableOfContents';
39
39
  import { TableOfContentsElement } from '../elements/TableOfContentsElement';
40
+ import { TableGridChange } from '../elements/TableGridChange';
40
41
  import { TableRow } from '../elements/TableRow';
41
42
  import { AbstractNumbering } from '../formatting/AbstractNumbering';
42
43
  import { NumberingInstance } from '../formatting/NumberingInstance';
@@ -904,11 +905,15 @@ export class DocumentParser {
904
905
  }
905
906
  }
906
907
 
907
- // Parse w14:paraId if present
908
+ // Parse w14:paraId and w14:textId if present
908
909
  const paraId = pElement['w14:paraId'];
909
910
  if (paraId) {
910
911
  paragraph.formatting.paraId = paraId as string;
911
912
  }
913
+ const textId = pElement['w14:textId'];
914
+ if (textId) {
915
+ paragraph.formatting.textId = textId as string;
916
+ }
912
917
 
913
918
  // CRITICAL FIX: Preserve document order of paragraph children (runs, hyperlinks, fields)
914
919
  // When XMLParser.parseToObject groups multiple runs/hyperlinks, it creates arrays
@@ -1339,9 +1344,61 @@ export class DocumentParser {
1339
1344
  hyperlinkObj['w:moveFrom'] ||
1340
1345
  hyperlinkObj['w:moveTo'];
1341
1346
  if (hasRevisionChildren) {
1342
- const rawXml = extractElementXmlAtPosition(child.pos, 'w:hyperlink');
1343
- if (rawXml) {
1344
- paragraph.addContent(new PreservedElement(rawXml, 'w:hyperlink', 'inline'));
1347
+ // Flatten revisions to make hyperlink editable (setUrl/setText).
1348
+ // Trades revision fidelity inside the hyperlink for editability.
1349
+ const flattenedObj = { ...hyperlinkObj };
1350
+ const allRuns: any[] = [];
1351
+
1352
+ // Keep existing direct runs
1353
+ if (flattenedObj['w:r']) {
1354
+ const directRuns = Array.isArray(flattenedObj['w:r'])
1355
+ ? flattenedObj['w:r']
1356
+ : [flattenedObj['w:r']];
1357
+ allRuns.push(...directRuns);
1358
+ }
1359
+
1360
+ // Unwrap w:ins runs (inserted content — keep)
1361
+ if (flattenedObj['w:ins']) {
1362
+ const insArr = Array.isArray(flattenedObj['w:ins'])
1363
+ ? flattenedObj['w:ins']
1364
+ : [flattenedObj['w:ins']];
1365
+ for (const ins of insArr) {
1366
+ if (ins['w:r']) {
1367
+ const insRuns = Array.isArray(ins['w:r']) ? ins['w:r'] : [ins['w:r']];
1368
+ allRuns.push(...insRuns);
1369
+ }
1370
+ }
1371
+ }
1372
+
1373
+ // Unwrap w:moveTo runs (move destination — keep)
1374
+ if (flattenedObj['w:moveTo']) {
1375
+ const moveToArr = Array.isArray(flattenedObj['w:moveTo'])
1376
+ ? flattenedObj['w:moveTo']
1377
+ : [flattenedObj['w:moveTo']];
1378
+ for (const mt of moveToArr) {
1379
+ if (mt['w:r']) {
1380
+ const mtRuns = Array.isArray(mt['w:r']) ? mt['w:r'] : [mt['w:r']];
1381
+ allRuns.push(...mtRuns);
1382
+ }
1383
+ }
1384
+ }
1385
+
1386
+ // Drop w:del and w:moveFrom (deleted/moved-away content)
1387
+ flattenedObj['w:r'] = allRuns.length > 0 ? allRuns : undefined;
1388
+ delete flattenedObj['w:del'];
1389
+ delete flattenedObj['w:ins'];
1390
+ delete flattenedObj['w:moveFrom'];
1391
+ delete flattenedObj['w:moveTo'];
1392
+
1393
+ const result = this.parseHyperlinkFromObject(flattenedObj, relationshipManager);
1394
+ if (result.hyperlink) {
1395
+ paragraph.addHyperlink(result.hyperlink);
1396
+ }
1397
+ for (const bookmark of result.bookmarkStarts) {
1398
+ paragraph.addBookmarkStart(bookmark);
1399
+ }
1400
+ for (const bookmark of result.bookmarkEnds) {
1401
+ paragraph.addBookmarkEnd(bookmark);
1345
1402
  }
1346
1403
  } else {
1347
1404
  const result = this.parseHyperlinkFromObject(hyperlinkObj, relationshipManager);
@@ -1861,10 +1918,15 @@ export class DocumentParser {
1861
1918
 
1862
1919
  // Create bookmark with skipNormalization to preserve original name exactly
1863
1920
  // (Word allows special characters like = and . in bookmark names)
1921
+ // Parse optional column range for table bookmarks (ECMA-376 §17.16.5)
1922
+ const colFirstAttr = XMLParser.extractAttribute(bookmarkXml, 'w:colFirst');
1923
+ const colLastAttr = XMLParser.extractAttribute(bookmarkXml, 'w:colLast');
1864
1924
  const bookmark = new Bookmark({
1865
1925
  name: nameAttr,
1866
1926
  id: id,
1867
1927
  skipNormalization: true,
1928
+ colFirst: colFirstAttr ? parseInt(colFirstAttr, 10) : undefined,
1929
+ colLast: colLastAttr ? parseInt(colLastAttr, 10) : undefined,
1868
1930
  });
1869
1931
 
1870
1932
  // Register with BookmarkManager to enable hasBookmark() checks
@@ -1934,11 +1996,15 @@ export class DocumentParser {
1934
1996
  try {
1935
1997
  const paragraph = new Paragraph();
1936
1998
 
1937
- // Parse w14:paraId attribute from paragraph element (Word 2010+ requirement)
1999
+ // Parse w14:paraId and w14:textId attributes from paragraph element (Word 2010+)
1938
2000
  const paraId = paraObj['w14:paraId'];
1939
2001
  if (paraId) {
1940
2002
  paragraph.formatting.paraId = paraId;
1941
2003
  }
2004
+ const textId = paraObj['w14:textId'];
2005
+ if (textId) {
2006
+ paragraph.formatting.textId = textId;
2007
+ }
1942
2008
 
1943
2009
  // Parse paragraph properties
1944
2010
  this.parseParagraphPropertiesFromObject(paraObj['w:pPr'], paragraph);
@@ -2155,9 +2221,11 @@ export class DocumentParser {
2155
2221
  if (pPrObj['w:ind']) {
2156
2222
  const ind = pPrObj['w:ind'];
2157
2223
  // Use isExplicitlySet and safeParseInt for robust zero-value handling
2158
- if (isExplicitlySet(ind['@_w:left'])) paragraph.setLeftIndent(safeParseInt(ind['@_w:left']));
2159
- if (isExplicitlySet(ind['@_w:right']))
2160
- paragraph.setRightIndent(safeParseInt(ind['@_w:right']));
2224
+ // Per ECMA-376 §17.3.1.15: w:start/w:end are bidi-aware alternatives to w:left/w:right
2225
+ const leftVal = ind['@_w:start'] ?? ind['@_w:left'];
2226
+ const rightVal = ind['@_w:end'] ?? ind['@_w:right'];
2227
+ if (isExplicitlySet(leftVal)) paragraph.setLeftIndent(safeParseInt(leftVal));
2228
+ if (isExplicitlySet(rightVal)) paragraph.setRightIndent(safeParseInt(rightVal));
2161
2229
  if (isExplicitlySet(ind['@_w:firstLine']))
2162
2230
  paragraph.setFirstLineIndent(safeParseInt(ind['@_w:firstLine']));
2163
2231
  // Parse hanging indent per ECMA-376 Part 1 §17.3.1.17
@@ -2165,7 +2233,7 @@ export class DocumentParser {
2165
2233
  paragraph.setHangingIndent(safeParseInt(ind['@_w:hanging']));
2166
2234
  }
2167
2235
 
2168
- // Spacing
2236
+ // Spacing (ECMA-376 §17.3.1.33 — 8 attributes)
2169
2237
  if (pPrObj['w:spacing']) {
2170
2238
  const spacing = pPrObj['w:spacing'];
2171
2239
  // Use isExplicitlySet to properly handle 0 values (0 spacing is valid)
@@ -2176,18 +2244,39 @@ export class DocumentParser {
2176
2244
  if (isExplicitlySet(spacing['@_w:line'])) {
2177
2245
  paragraph.setLineSpacing(safeParseInt(spacing['@_w:line']), spacing['@_w:lineRule']);
2178
2246
  }
2247
+ // Parse extended spacing attributes — write directly to paragraph.formatting
2248
+ // (getFormatting() returns a shallow copy, so we must access the internal object)
2249
+ if (!paragraph.formatting.spacing) paragraph.formatting.spacing = {};
2250
+ if (isExplicitlySet(spacing['@_w:beforeLines']))
2251
+ paragraph.formatting.spacing.beforeLines = safeParseInt(spacing['@_w:beforeLines']);
2252
+ if (isExplicitlySet(spacing['@_w:afterLines']))
2253
+ paragraph.formatting.spacing.afterLines = safeParseInt(spacing['@_w:afterLines']);
2254
+ const beforeAuto = spacing['@_w:beforeAutospacing'];
2255
+ if (beforeAuto !== undefined)
2256
+ paragraph.formatting.spacing.beforeAutospacing =
2257
+ String(beforeAuto) === '1' || String(beforeAuto) === 'true';
2258
+ const afterAuto = spacing['@_w:afterAutospacing'];
2259
+ if (afterAuto !== undefined)
2260
+ paragraph.formatting.spacing.afterAutospacing =
2261
+ String(afterAuto) === '1' || String(afterAuto) === 'true';
2179
2262
  }
2180
2263
 
2181
- // Keep properties - parse pageBreakBefore FIRST, then apply keep properties
2182
- // This triggers automatic conflict resolution per ECMA-376 v0.28.2
2183
- if (pPrObj['w:pageBreakBefore']) paragraph.formatting.pageBreakBefore = true;
2184
-
2185
- // Keep properties - these will automatically clear pageBreakBefore if both are set
2186
- if (pPrObj['w:keepNext']) paragraph.setKeepNext(true);
2187
- if (pPrObj['w:keepLines']) paragraph.setKeepLines(true);
2264
+ // Keep properties preserve explicit val="0" to override style inheritance
2265
+ // Parse pageBreakBefore FIRST, then keep properties (triggers automatic conflict resolution)
2266
+ if (pPrObj['w:pageBreakBefore'] !== undefined) {
2267
+ paragraph.formatting.pageBreakBefore = parseOoxmlBoolean(pPrObj['w:pageBreakBefore']);
2268
+ }
2269
+ if (pPrObj['w:keepNext'] !== undefined) {
2270
+ paragraph.setKeepNext(parseOoxmlBoolean(pPrObj['w:keepNext']));
2271
+ }
2272
+ if (pPrObj['w:keepLines'] !== undefined) {
2273
+ paragraph.setKeepLines(parseOoxmlBoolean(pPrObj['w:keepLines']));
2274
+ }
2188
2275
 
2189
2276
  // Contextual spacing
2190
- if (pPrObj['w:contextualSpacing']) paragraph.setContextualSpacing(true);
2277
+ if (pPrObj['w:contextualSpacing'] !== undefined) {
2278
+ paragraph.setContextualSpacing(parseOoxmlBoolean(pPrObj['w:contextualSpacing']));
2279
+ }
2191
2280
 
2192
2281
  // Numbering
2193
2282
  // Note: When track changes are present (w:pPrChange), XMLParser merges the
@@ -2304,8 +2393,8 @@ export class DocumentParser {
2304
2393
  }
2305
2394
 
2306
2395
  // Suppress line numbers per ECMA-376 Part 1 §17.3.1.34
2307
- if (pPrObj['w:suppressLineNumbers']) {
2308
- paragraph.setSuppressLineNumbers(true);
2396
+ if (pPrObj['w:suppressLineNumbers'] !== undefined) {
2397
+ paragraph.setSuppressLineNumbers(parseOoxmlBoolean(pPrObj['w:suppressLineNumbers']));
2309
2398
  }
2310
2399
 
2311
2400
  // Bidirectional layout per ECMA-376 Part 1 §17.3.1.6
@@ -2330,8 +2419,8 @@ export class DocumentParser {
2330
2419
  }
2331
2420
 
2332
2421
  // Mirror indents per ECMA-376 Part 1 §17.3.1.18
2333
- if (pPrObj['w:mirrorIndents']) {
2334
- paragraph.setMirrorIndents(true);
2422
+ if (pPrObj['w:mirrorIndents'] !== undefined) {
2423
+ paragraph.setMirrorIndents(parseOoxmlBoolean(pPrObj['w:mirrorIndents']));
2335
2424
  }
2336
2425
 
2337
2426
  // Auto-adjust right indent per ECMA-376 Part 1 §17.3.1.1
@@ -2384,8 +2473,8 @@ export class DocumentParser {
2384
2473
  }
2385
2474
 
2386
2475
  // Suppress automatic hyphenation per ECMA-376 Part 1 §17.3.1.33
2387
- if (pPrObj['w:suppressAutoHyphens']) {
2388
- paragraph.setSuppressAutoHyphens(true);
2476
+ if (pPrObj['w:suppressAutoHyphens'] !== undefined) {
2477
+ paragraph.setSuppressAutoHyphens(parseOoxmlBoolean(pPrObj['w:suppressAutoHyphens']));
2389
2478
  }
2390
2479
 
2391
2480
  // CJK paragraph properties per ECMA-376 Part 1
@@ -2409,8 +2498,8 @@ export class DocumentParser {
2409
2498
  }
2410
2499
 
2411
2500
  // Suppress text frame overlap per ECMA-376 Part 1 §17.3.1.34
2412
- if (pPrObj['w:suppressOverlap']) {
2413
- paragraph.setSuppressOverlap(true);
2501
+ if (pPrObj['w:suppressOverlap'] !== undefined) {
2502
+ paragraph.setSuppressOverlap(parseOoxmlBoolean(pPrObj['w:suppressOverlap']));
2414
2503
  }
2415
2504
 
2416
2505
  // Textbox tight wrap per ECMA-376 Part 1 §17.3.1.37
@@ -2477,13 +2566,14 @@ export class DocumentParser {
2477
2566
  }
2478
2567
 
2479
2568
  // Parse previous indentation
2569
+ // Per ECMA-376 §17.3.1.15: w:start/w:end are bidi-aware alternatives to w:left/w:right
2480
2570
  if (prevPPr['w:ind']) {
2481
2571
  const ind = prevPPr['w:ind'];
2482
2572
  previousProperties.indentation = {};
2483
- if (ind['@_w:left'] !== undefined)
2484
- previousProperties.indentation.left = parseInt(ind['@_w:left'], 10);
2485
- if (ind['@_w:right'] !== undefined)
2486
- previousProperties.indentation.right = parseInt(ind['@_w:right'], 10);
2573
+ const leftVal = ind['@_w:start'] ?? ind['@_w:left'];
2574
+ const rightVal = ind['@_w:end'] ?? ind['@_w:right'];
2575
+ if (leftVal !== undefined) previousProperties.indentation.left = parseInt(leftVal, 10);
2576
+ if (rightVal !== undefined) previousProperties.indentation.right = parseInt(rightVal, 10);
2487
2577
  if (ind['@_w:firstLine'] !== undefined)
2488
2578
  previousProperties.indentation.firstLine = parseInt(ind['@_w:firstLine'], 10);
2489
2579
  if (ind['@_w:hanging'] !== undefined)
@@ -2495,7 +2585,7 @@ export class DocumentParser {
2495
2585
  previousProperties.alignment = String(prevPPr['w:jc']['@_w:val']);
2496
2586
  }
2497
2587
 
2498
- // Parse previous spacing
2588
+ // Parse previous spacing (all 8 CT_Spacing attributes per ECMA-376 §17.3.1.33)
2499
2589
  if (prevPPr['w:spacing']) {
2500
2590
  const spacing = prevPPr['w:spacing'];
2501
2591
  previousProperties.spacing = {};
@@ -2507,6 +2597,18 @@ export class DocumentParser {
2507
2597
  previousProperties.spacing.line = parseInt(spacing['@_w:line'], 10);
2508
2598
  if (spacing['@_w:lineRule'])
2509
2599
  previousProperties.spacing.lineRule = String(spacing['@_w:lineRule']);
2600
+ if (spacing['@_w:beforeLines'] !== undefined)
2601
+ previousProperties.spacing.beforeLines = parseInt(spacing['@_w:beforeLines'], 10);
2602
+ if (spacing['@_w:afterLines'] !== undefined)
2603
+ previousProperties.spacing.afterLines = parseInt(spacing['@_w:afterLines'], 10);
2604
+ const beforeAuto = spacing['@_w:beforeAutospacing'];
2605
+ if (beforeAuto !== undefined)
2606
+ previousProperties.spacing.beforeAutospacing =
2607
+ String(beforeAuto) === '1' || String(beforeAuto) === 'true';
2608
+ const afterAuto = spacing['@_w:afterAutospacing'];
2609
+ if (afterAuto !== undefined)
2610
+ previousProperties.spacing.afterAutospacing =
2611
+ String(afterAuto) === '1' || String(afterAuto) === 'true';
2510
2612
  }
2511
2613
 
2512
2614
  // Parse previous keepNext/keepLines/pageBreakBefore
@@ -4001,7 +4103,13 @@ export class DocumentParser {
4001
4103
  const brElements = toArray(runObj['w:br']);
4002
4104
  const brElement = brElements[elementIndex] || brElements[0];
4003
4105
  const breakType = brElement?.['@_w:type'] as BreakType | undefined;
4004
- content.push({ type: 'break', breakType });
4106
+ const breakClear = brElement?.['@_w:clear'] as
4107
+ | 'none'
4108
+ | 'left'
4109
+ | 'right'
4110
+ | 'all'
4111
+ | undefined;
4112
+ content.push({ type: 'break', breakType, breakClear });
4005
4113
  break;
4006
4114
  }
4007
4115
 
@@ -4210,7 +4318,13 @@ export class DocumentParser {
4210
4318
  if (runObj['w:br'] !== undefined) {
4211
4319
  const brElement = runObj['w:br'];
4212
4320
  const breakType = brElement?.['@_w:type'] as BreakType | undefined;
4213
- content.push({ type: 'break', breakType });
4321
+ const breakClear = brElement?.['@_w:clear'] as
4322
+ | 'none'
4323
+ | 'left'
4324
+ | 'right'
4325
+ | 'all'
4326
+ | undefined;
4327
+ content.push({ type: 'break', breakType, breakClear });
4214
4328
  }
4215
4329
 
4216
4330
  if (runObj['w:cr'] !== undefined) {
@@ -4453,22 +4567,11 @@ export class DocumentParser {
4453
4567
  }
4454
4568
  }
4455
4569
 
4456
- // Handle external hyperlinks with anchor fragments
4457
- // Microsoft Word can store URLs with the base in relationships and fragment in w:anchor
4458
- // Example: rels has "https://example.com/", anchor has "!/view?docid=abc-123"
4459
- // Combined: "https://example.com/#!/view?docid=abc-123"
4460
- // This is common for single-page applications with hash-based routing (theSource, etc.)
4461
- let finalAnchor = anchor;
4462
- let finalRelationshipId = relationshipId;
4463
- if (url && anchor) {
4464
- // Combine URL and anchor for external hyperlinks with fragments
4465
- url = url + '#' + anchor;
4466
- finalAnchor = undefined; // Clear anchor since it's now part of URL
4467
- // Clear relationshipId since the relationship points to the old base URL
4468
- // On save, a new relationship will be created with the combined URL
4469
- finalRelationshipId = undefined;
4470
- defaultLogger.debug(`[DocumentParser] Combined external URL with anchor fragment: ${url}`);
4471
- }
4570
+ // Per ECMA-376 §17.16.22, a hyperlink can have BOTH r:id (external URL) and w:anchor
4571
+ // (bookmark) simultaneously e.g., linking to a bookmark in an external document.
4572
+ // Preserve both attributes as-is; the serializer supports writing both.
4573
+ const finalAnchor = anchor;
4574
+ const finalRelationshipId = relationshipId;
4472
4575
 
4473
4576
  // Skip hyperlinks that have no destination (neither URL nor anchor nor relationship ID)
4474
4577
  // This can happen with malformed HYPERLINK field codes or corrupted documents
@@ -4780,29 +4883,19 @@ export class DocumentParser {
4780
4883
  if (val) run.setEmphasis(val);
4781
4884
  }
4782
4885
 
4783
- // Parse outline text effect (w:outline) per ECMA-376 Part 1 §17.3.2.23
4784
- if (rPrObj['w:outline']) run.setOutline(true);
4785
-
4786
- // Parse shadow text effect (w:shadow) per ECMA-376 Part 1 §17.3.2.32
4787
- if (rPrObj['w:shadow']) run.setShadow(true);
4788
-
4789
- // Parse emboss text effect (w:emboss) per ECMA-376 Part 1 §17.3.2.13
4790
- if (rPrObj['w:emboss']) run.setEmboss(true);
4791
-
4792
- // Parse imprint text effect (w:imprint) per ECMA-376 Part 1 §17.3.2.18
4793
- if (rPrObj['w:imprint']) run.setImprint(true);
4794
-
4795
- // Parse no proofing (w:noProof) per ECMA-376 Part 1 §17.3.2.21
4796
- if (rPrObj['w:noProof']) run.setNoProof(true);
4797
-
4798
- // Parse snap to grid (w:snapToGrid) per ECMA-376 Part 1 §17.3.2.35
4799
- if (rPrObj['w:snapToGrid']) run.setSnapToGrid(true);
4800
-
4801
- // Parse vanish/hidden (w:vanish) per ECMA-376 Part 1 §17.3.2.42
4802
- if (rPrObj['w:vanish']) run.setVanish(true);
4803
-
4804
- // Parse special vanish (w:specVanish) per ECMA-376 Part 1 §17.3.2.36
4805
- if (rPrObj['w:specVanish']) run.setSpecVanish(true);
4886
+ // Parse boolean text effects use parseOoxmlBoolean to correctly handle w:val="0"/"false"
4887
+ // Per ECMA-376, <w:xxx/> or <w:xxx w:val="1"/> = true; <w:xxx w:val="0"/> = false (explicit off)
4888
+ if (parseOoxmlBoolean(rPrObj['w:outline'])) run.setOutline(true);
4889
+ if (parseOoxmlBoolean(rPrObj['w:shadow'])) run.setShadow(true);
4890
+ if (parseOoxmlBoolean(rPrObj['w:emboss'])) run.setEmboss(true);
4891
+ if (parseOoxmlBoolean(rPrObj['w:imprint'])) run.setImprint(true);
4892
+ if (parseOoxmlBoolean(rPrObj['w:noProof'])) run.setNoProof(true);
4893
+ // snapToGrid: default when absent is true (§17.3.2.34), so explicit val="0" must be preserved
4894
+ if (rPrObj['w:snapToGrid'] !== undefined) {
4895
+ run.setSnapToGrid(parseOoxmlBoolean(rPrObj['w:snapToGrid']));
4896
+ }
4897
+ if (parseOoxmlBoolean(rPrObj['w:vanish'])) run.setVanish(true);
4898
+ if (parseOoxmlBoolean(rPrObj['w:specVanish'])) run.setSpecVanish(true);
4806
4899
 
4807
4900
  // Boolean properties - use parseOoxmlBoolean helper
4808
4901
  // Per ECMA-376: <w:b/> or <w:b w:val="1"/> or <w:b w:val="true"/> means true
@@ -4811,16 +4904,20 @@ export class DocumentParser {
4811
4904
  // Parse RTL text (w:rtl) per ECMA-376 Part 1 §17.3.2.30
4812
4905
  if (parseOoxmlBoolean(rPrObj['w:rtl'])) run.setRTL(true);
4813
4906
 
4814
- if (parseOoxmlBoolean(rPrObj['w:b'])) run.setBold(true);
4815
- if (parseOoxmlBoolean(rPrObj['w:bCs'])) run.setComplexScriptBold(true);
4816
- if (parseOoxmlBoolean(rPrObj['w:i'])) run.setItalic(true);
4817
- if (parseOoxmlBoolean(rPrObj['w:iCs'])) run.setComplexScriptItalic(true);
4818
- if (parseOoxmlBoolean(rPrObj['w:strike'])) run.setStrike(true);
4819
- if (parseOoxmlBoolean(rPrObj['w:dstrike'])) {
4820
- (run as any).formatting.dstrike = true;
4821
- }
4822
- if (parseOoxmlBoolean(rPrObj['w:smallCaps'])) run.setSmallCaps(true);
4823
- if (parseOoxmlBoolean(rPrObj['w:caps'])) run.setAllCaps(true);
4907
+ // b, bCs, i, iCs: preserve explicit val="0" to override style-inherited formatting
4908
+ if (rPrObj['w:b'] !== undefined) run.setBold(parseOoxmlBoolean(rPrObj['w:b']));
4909
+ if (rPrObj['w:bCs'] !== undefined) run.setComplexScriptBold(parseOoxmlBoolean(rPrObj['w:bCs']));
4910
+ if (rPrObj['w:i'] !== undefined) run.setItalic(parseOoxmlBoolean(rPrObj['w:i']));
4911
+ if (rPrObj['w:iCs'] !== undefined)
4912
+ run.setComplexScriptItalic(parseOoxmlBoolean(rPrObj['w:iCs']));
4913
+ // strike, dstrike, smallCaps, caps: preserve explicit val="0" to override style-inherited formatting
4914
+ if (rPrObj['w:strike'] !== undefined) run.setStrike(parseOoxmlBoolean(rPrObj['w:strike']));
4915
+ if (rPrObj['w:dstrike'] !== undefined) {
4916
+ (run as any).formatting.dstrike = parseOoxmlBoolean(rPrObj['w:dstrike']);
4917
+ }
4918
+ if (rPrObj['w:smallCaps'] !== undefined)
4919
+ run.setSmallCaps(parseOoxmlBoolean(rPrObj['w:smallCaps']));
4920
+ if (rPrObj['w:caps'] !== undefined) run.setAllCaps(parseOoxmlBoolean(rPrObj['w:caps']));
4824
4921
 
4825
4922
  // Parse complex script flag (w:cs) per ECMA-376 Part 1 §17.3.2.7
4826
4923
  if (parseOoxmlBoolean(rPrObj['w:cs'])) run.setComplexScript(true);
@@ -4870,10 +4967,21 @@ export class DocumentParser {
4870
4967
  if (val) run.setKerning(parseInt(val, 10));
4871
4968
  }
4872
4969
 
4873
- // Parse language (w:lang) per ECMA-376 Part 1 §17.3.2.20
4970
+ // Parse language (w:lang) per ECMA-376 Part 1 §17.3.2.20 (CT_Language)
4874
4971
  if (rPrObj['w:lang']) {
4875
- const val = rPrObj['w:lang']['@_w:val'];
4876
- if (val) run.setLanguage(val);
4972
+ const langObj = rPrObj['w:lang'];
4973
+ const val = langObj['@_w:val'];
4974
+ const eastAsia = langObj['@_w:eastAsia'];
4975
+ const bidi = langObj['@_w:bidi'];
4976
+ if (eastAsia || bidi) {
4977
+ run.setLanguage({
4978
+ val: val ? String(val) : undefined,
4979
+ eastAsia: eastAsia ? String(eastAsia) : undefined,
4980
+ bidi: bidi ? String(bidi) : undefined,
4981
+ });
4982
+ } else if (val) {
4983
+ run.setLanguage(String(val));
4984
+ }
4877
4985
  }
4878
4986
 
4879
4987
  // Parse East Asian layout (w:eastAsianLayout) per ECMA-376 Part 1 §17.3.2.10
@@ -4907,7 +5015,8 @@ export class DocumentParser {
4907
5015
  if (rPrObj['w:vertAlign']) {
4908
5016
  const val = rPrObj['w:vertAlign']['@_w:val'];
4909
5017
  if (val === 'subscript') run.setSubscript(true);
4910
- if (val === 'superscript') run.setSuperscript(true);
5018
+ else if (val === 'superscript') run.setSuperscript(true);
5019
+ else if (val === 'baseline') (run as any).formatting.vertAlignBaseline = true;
4911
5020
  }
4912
5021
 
4913
5022
  if (rPrObj['w:rFonts']) {
@@ -4946,10 +5055,15 @@ export class DocumentParser {
4946
5055
  if (rPrObj['w:color']) {
4947
5056
  const colorObj = rPrObj['w:color'];
4948
5057
  const colorVal = colorObj['@_w:val'];
4949
- // Skip special OOXML values like "auto" (automatic/inherit from style)
4950
- // "auto" is a valid OOXML color that means inherit - not a hex color
4951
- if (colorVal && colorVal !== 'auto') {
4952
- run.setColor(colorVal);
5058
+ // Per ECMA-376 §17.18.6, w:val can be a hex color OR the special value "auto"
5059
+ // "auto" means use the automatic/window text color must be preserved for round-trip
5060
+ if (colorVal) {
5061
+ if (colorVal === 'auto') {
5062
+ // Bypass normalizeColor() which rejects non-hex values
5063
+ (run as any).formatting.color = 'auto';
5064
+ } else {
5065
+ run.setColor(colorVal);
5066
+ }
4953
5067
  }
4954
5068
  // Parse theme color attributes per ECMA-376 Part 1 Section 17.3.2.6
4955
5069
  if (colorObj['@_w:themeColor']) {
@@ -5046,7 +5160,7 @@ export class DocumentParser {
5046
5160
  if (prevRPr['w:color']) {
5047
5161
  const colorObj = prevRPr['w:color'];
5048
5162
  const colorVal = colorObj['@_w:val'];
5049
- if (colorVal && colorVal !== 'auto') {
5163
+ if (colorVal) {
5050
5164
  prevProps.color = colorVal;
5051
5165
  }
5052
5166
  // Parse theme color attributes
@@ -5066,11 +5180,12 @@ export class DocumentParser {
5066
5180
  prevProps.highlight = prevRPr['w:highlight']['@_w:val'];
5067
5181
  }
5068
5182
 
5069
- // Parse previous subscript/superscript
5183
+ // Parse previous subscript/superscript/baseline per ECMA-376 §17.18.96
5070
5184
  if (prevRPr['w:vertAlign']) {
5071
5185
  const val = prevRPr['w:vertAlign']['@_w:val'];
5072
5186
  if (val === 'subscript') prevProps.subscript = true;
5073
- if (val === 'superscript') prevProps.superscript = true;
5187
+ else if (val === 'superscript') prevProps.superscript = true;
5188
+ else if (val === 'baseline') prevProps.vertAlignBaseline = true;
5074
5189
  }
5075
5190
 
5076
5191
  // Parse previous smallCaps/allCaps
@@ -5173,10 +5288,19 @@ export class DocumentParser {
5173
5288
  }
5174
5289
  }
5175
5290
 
5176
- // Parse language (w:lang @w:val)
5291
+ // Parse language (w:lang) per ECMA-376 CT_Language (w:val, w:eastAsia, w:bidi)
5177
5292
  if (prevRPr['w:lang']) {
5178
- const langVal = prevRPr['w:lang']['@_w:val'];
5179
- if (langVal) {
5293
+ const langObj = prevRPr['w:lang'];
5294
+ const langVal = langObj['@_w:val'];
5295
+ const langEastAsia = langObj['@_w:eastAsia'];
5296
+ const langBidi = langObj['@_w:bidi'];
5297
+ if (langEastAsia || langBidi) {
5298
+ prevProps.language = {
5299
+ val: langVal ? String(langVal) : undefined,
5300
+ eastAsia: langEastAsia ? String(langEastAsia) : undefined,
5301
+ bidi: langBidi ? String(langBidi) : undefined,
5302
+ };
5303
+ } else if (langVal) {
5180
5304
  prevProps.language = String(langVal);
5181
5305
  }
5182
5306
  }
@@ -6052,6 +6176,25 @@ export class DocumentParser {
6052
6176
  }
6053
6177
  }
6054
6178
 
6179
+ // Parse table grid change (w:tblGridChange) per ECMA-376 §17.13.5.35
6180
+ if (tableObj['w:tblGrid']?.['w:tblGridChange']) {
6181
+ const changeObj = tableObj['w:tblGrid']['w:tblGridChange'];
6182
+ const prevGridCols = changeObj['w:tblGrid']?.['w:gridCol'];
6183
+ if (prevGridCols) {
6184
+ const prevArray = Array.isArray(prevGridCols) ? prevGridCols : [prevGridCols];
6185
+ const prevWidths = prevArray.map((col: any) => ({
6186
+ width: isExplicitlySet(col['@_w:w']) ? safeParseInt(col['@_w:w'], 2880) : 2880,
6187
+ }));
6188
+ const gridChange = TableGridChange.create(
6189
+ safeParseInt(changeObj['@_w:id'], 0),
6190
+ prevWidths,
6191
+ changeObj['@_w:author'] || undefined,
6192
+ changeObj['@_w:date'] ? new Date(changeObj['@_w:date']) : undefined
6193
+ );
6194
+ table.setTblGridChange(gridChange);
6195
+ }
6196
+ }
6197
+
6055
6198
  // Parse table rows (w:tr)
6056
6199
  const rows = tableObj['w:tr'];
6057
6200
  const rowChildren = Array.isArray(rows) ? rows : rows ? [rows] : [];
@@ -6158,7 +6301,7 @@ export class DocumentParser {
6158
6301
  table.setTblLook(look['@_w:val']);
6159
6302
  } else {
6160
6303
  // Individual attribute format - construct hex value
6161
- // Per ECMA-376: bit 0=firstRow, 1=lastRow, 2=firstCol, 3=lastCol, 4=noHBand, 5=noVBand
6304
+ // Per ECMA-376 §17.4.57: bit5=firstRow, bit6=lastRow, bit7=firstCol, bit8=lastCol, bit9=noHBand, bit10=noVBand
6162
6305
  let value = 0;
6163
6306
  if (look['@_w:firstRow'] === '1') value |= 0x0020;
6164
6307
  if (look['@_w:lastRow'] === '1') value |= 0x0040;
@@ -6249,9 +6392,14 @@ export class DocumentParser {
6249
6392
  if (tblPrObj['w:tblInd']) {
6250
6393
  const indentVal = safeParseInt(tblPrObj['w:tblInd']['@_w:w'], 0);
6251
6394
  table.setIndent(indentVal);
6395
+ const indentType = tblPrObj['w:tblInd']['@_w:type'];
6396
+ if (indentType) {
6397
+ table.setIndentType(indentType as import('../elements/Table').TableWidthType);
6398
+ }
6252
6399
  }
6253
6400
 
6254
6401
  // Parse table cell margins (w:tblCellMar) per ECMA-376 Part 1 §17.4.42
6402
+ // Supports both legacy w:left/w:right and bidi-aware w:start/w:end (w:start takes precedence)
6255
6403
  if (tblPrObj['w:tblCellMar']) {
6256
6404
  const cellMar = tblPrObj['w:tblCellMar'];
6257
6405
  const margins: { top?: number; bottom?: number; left?: number; right?: number } = {};
@@ -6264,12 +6412,14 @@ export class DocumentParser {
6264
6412
  const w = cellMar['w:bottom']['@_w:w'];
6265
6413
  if (w !== undefined) margins.bottom = parseInt(w, 10);
6266
6414
  }
6267
- if (cellMar['w:left']) {
6268
- const w = cellMar['w:left']['@_w:w'];
6415
+ const leftSource = cellMar['w:start'] || cellMar['w:left'];
6416
+ if (leftSource) {
6417
+ const w = leftSource['@_w:w'];
6269
6418
  if (w !== undefined) margins.left = parseInt(w, 10);
6270
6419
  }
6271
- if (cellMar['w:right']) {
6272
- const w = cellMar['w:right']['@_w:w'];
6420
+ const rightSource = cellMar['w:end'] || cellMar['w:right'];
6421
+ if (rightSource) {
6422
+ const w = rightSource['@_w:w'];
6273
6423
  if (w !== undefined) margins.right = parseInt(w, 10);
6274
6424
  }
6275
6425
 
@@ -6413,11 +6563,21 @@ export class DocumentParser {
6413
6563
  if (!trPrObj) return;
6414
6564
 
6415
6565
  // Parse row height (w:trHeight) per ECMA-376 Part 1 §17.4.81
6566
+ // Per §17.18.33 (ST_HeightRule), when w:hRule is absent the default is "auto"
6416
6567
  if (trPrObj['w:trHeight']) {
6417
6568
  const heightVal = parseInt(trPrObj['w:trHeight']['@_w:val'] || '0', 10);
6418
- const heightRule = trPrObj['w:trHeight']['@_w:hRule'] || 'atLeast';
6569
+ const heightRule = trPrObj['w:trHeight']['@_w:hRule'];
6419
6570
  if (heightVal > 0) {
6420
- row.setHeight(heightVal, heightRule);
6571
+ // Set height without defaulting hRule — setHeight defaults to 'atLeast'
6572
+ // so we set height first, then override the rule only if explicitly present
6573
+ row.setHeight(heightVal);
6574
+ if (heightRule) {
6575
+ row.setHeightRule(heightRule);
6576
+ } else {
6577
+ // When w:hRule is absent, clear the defaulted rule so the generator omits it,
6578
+ // preserving round-trip fidelity (absent = "auto" per ECMA-376 §17.18.33)
6579
+ row.setHeightRule(undefined);
6580
+ }
6421
6581
  }
6422
6582
  }
6423
6583
 
@@ -6657,6 +6817,7 @@ export class DocumentParser {
6657
6817
  }
6658
6818
 
6659
6819
  // Parse cell margins (w:tcMar) per ECMA-376 Part 1 §17.4.43
6820
+ // Supports both legacy w:left/w:right and bidi-aware w:start/w:end (w:start takes precedence)
6660
6821
  if (tcPr['w:tcMar']) {
6661
6822
  const tcMar = tcPr['w:tcMar'];
6662
6823
  const margins: any = {};
@@ -6667,11 +6828,13 @@ export class DocumentParser {
6667
6828
  if (tcMar['w:bottom']) {
6668
6829
  margins.bottom = parseInt(tcMar['w:bottom']['@_w:w'] || '0', 10);
6669
6830
  }
6670
- if (tcMar['w:left']) {
6671
- margins.left = parseInt(tcMar['w:left']['@_w:w'] || '0', 10);
6831
+ const leftSrc = tcMar['w:start'] || tcMar['w:left'];
6832
+ if (leftSrc) {
6833
+ margins.left = parseInt(leftSrc['@_w:w'] || '0', 10);
6672
6834
  }
6673
- if (tcMar['w:right']) {
6674
- margins.right = parseInt(tcMar['w:right']['@_w:w'] || '0', 10);
6835
+ const rightSrc = tcMar['w:end'] || tcMar['w:right'];
6836
+ if (rightSrc) {
6837
+ margins.right = parseInt(rightSrc['@_w:w'] || '0', 10);
6675
6838
  }
6676
6839
 
6677
6840
  if (Object.keys(margins).length > 0) {
@@ -7599,7 +7762,7 @@ export class DocumentParser {
7599
7762
  */
7600
7763
  private parseTOCFromSDTContent(
7601
7764
  content: any[],
7602
- properties: any,
7765
+ _properties: any,
7603
7766
  sdtContent: any
7604
7767
  ): TableOfContents | null {
7605
7768
  try {
@@ -8154,12 +8317,14 @@ export class DocumentParser {
8154
8317
  const width = XMLParser.extractAttribute(pgSz, 'w:w');
8155
8318
  const height = XMLParser.extractAttribute(pgSz, 'w:h');
8156
8319
  const orient = XMLParser.extractAttribute(pgSz, 'w:orient');
8320
+ const code = XMLParser.extractAttribute(pgSz, 'w:code');
8157
8321
 
8158
8322
  if (width && height) {
8159
8323
  sectionProps.pageSize = {
8160
8324
  width: parseInt(width, 10),
8161
8325
  height: parseInt(height, 10),
8162
8326
  orientation: orient === 'landscape' ? 'landscape' : 'portrait',
8327
+ code: code ? parseInt(code, 10) : undefined,
8163
8328
  };
8164
8329
  }
8165
8330
  }
@@ -8252,14 +8417,23 @@ export class DocumentParser {
8252
8417
  const equalWidth = XMLParser.extractAttribute(cols, 'w:equalWidth');
8253
8418
  const sep = XMLParser.extractAttribute(cols, 'w:sep');
8254
8419
 
8255
- // Extract individual column widths
8420
+ // Extract individual column widths and per-column spacing (CT_Column: w:w, w:space)
8256
8421
  const colElements = XMLParser.extractElements(cols, 'w:col');
8257
8422
  const columnWidths: number[] = [];
8423
+ const columnSpaces: number[] = [];
8424
+ let hasColumnSpaces = false;
8258
8425
  for (const col of colElements) {
8259
8426
  const width = XMLParser.extractAttribute(col, 'w:w');
8260
8427
  if (width) {
8261
8428
  columnWidths.push(parseInt(width.toString(), 10));
8262
8429
  }
8430
+ const colSpace = XMLParser.extractAttribute(col, 'w:space');
8431
+ if (colSpace) {
8432
+ columnSpaces.push(parseInt(colSpace.toString(), 10));
8433
+ hasColumnSpaces = true;
8434
+ } else {
8435
+ columnSpaces.push(0);
8436
+ }
8263
8437
  }
8264
8438
 
8265
8439
  // Helper to handle boolean conversion (XMLParser may return string or number)
@@ -8272,6 +8446,7 @@ export class DocumentParser {
8272
8446
  equalWidth: equalWidth ? toBool(equalWidth) : undefined,
8273
8447
  separator: sep ? toBool(sep) : undefined,
8274
8448
  columnWidths: columnWidths.length > 0 ? columnWidths : undefined,
8449
+ columnSpaces: hasColumnSpaces ? columnSpaces : undefined,
8275
8450
  };
8276
8451
  }
8277
8452
  }
@@ -8732,13 +8907,18 @@ export class DocumentParser {
8732
8907
  }
8733
8908
  }
8734
8909
 
8735
- // Parse spacing (w:spacing)
8910
+ // Parse spacing (w:spacing) — all 8 CT_Spacing attributes per ECMA-376 §17.3.1.33
8736
8911
  const spacingElement = XMLParser.extractSelfClosingTag(pPrXml, 'w:spacing');
8737
8912
  if (spacingElement) {
8738
- const before = XMLParser.extractAttribute(`<w:spacing${spacingElement}`, 'w:before');
8739
- const after = XMLParser.extractAttribute(`<w:spacing${spacingElement}`, 'w:after');
8740
- const line = XMLParser.extractAttribute(`<w:spacing${spacingElement}`, 'w:line');
8741
- const lineRule = XMLParser.extractAttribute(`<w:spacing${spacingElement}`, 'w:lineRule');
8913
+ const spacingTag = `<w:spacing${spacingElement}`;
8914
+ const before = XMLParser.extractAttribute(spacingTag, 'w:before');
8915
+ const after = XMLParser.extractAttribute(spacingTag, 'w:after');
8916
+ const line = XMLParser.extractAttribute(spacingTag, 'w:line');
8917
+ const lineRule = XMLParser.extractAttribute(spacingTag, 'w:lineRule');
8918
+ const beforeLines = XMLParser.extractAttribute(spacingTag, 'w:beforeLines');
8919
+ const afterLines = XMLParser.extractAttribute(spacingTag, 'w:afterLines');
8920
+ const beforeAutosp = XMLParser.extractAttribute(spacingTag, 'w:beforeAutospacing');
8921
+ const afterAutosp = XMLParser.extractAttribute(spacingTag, 'w:afterAutospacing');
8742
8922
 
8743
8923
  // Validate lineRule
8744
8924
  let validatedLineRule: 'auto' | 'exact' | 'atLeast' | undefined;
@@ -8755,20 +8935,33 @@ export class DocumentParser {
8755
8935
  // If lineRule exists without line, use default 240 twips
8756
8936
  line: line ? parseInt(line, 10) : validatedLineRule ? 240 : undefined,
8757
8937
  lineRule: validatedLineRule,
8938
+ beforeLines: beforeLines ? parseInt(beforeLines, 10) : undefined,
8939
+ afterLines: afterLines ? parseInt(afterLines, 10) : undefined,
8940
+ beforeAutospacing: beforeAutosp
8941
+ ? beforeAutosp === '1' || beforeAutosp === 'true'
8942
+ : undefined,
8943
+ afterAutospacing: afterAutosp ? afterAutosp === '1' || afterAutosp === 'true' : undefined,
8758
8944
  };
8759
8945
  }
8760
8946
 
8761
8947
  // Parse indentation (w:ind)
8948
+ // Per ECMA-376 §17.3.1.15: w:start/w:end are bidi-aware alternatives to w:left/w:right
8762
8949
  const indElement = XMLParser.extractSelfClosingTag(pPrXml, 'w:ind');
8763
8950
  if (indElement) {
8764
- const left = XMLParser.extractAttribute(`<w:ind${indElement}`, 'w:left');
8765
- const right = XMLParser.extractAttribute(`<w:ind${indElement}`, 'w:right');
8766
- const firstLine = XMLParser.extractAttribute(`<w:ind${indElement}`, 'w:firstLine');
8767
- const hanging = XMLParser.extractAttribute(`<w:ind${indElement}`, 'w:hanging');
8951
+ const indTag = `<w:ind${indElement}`;
8952
+ const start = XMLParser.extractAttribute(indTag, 'w:start');
8953
+ const left = XMLParser.extractAttribute(indTag, 'w:left');
8954
+ const end = XMLParser.extractAttribute(indTag, 'w:end');
8955
+ const right = XMLParser.extractAttribute(indTag, 'w:right');
8956
+ const firstLine = XMLParser.extractAttribute(indTag, 'w:firstLine');
8957
+ const hanging = XMLParser.extractAttribute(indTag, 'w:hanging');
8958
+
8959
+ const leftVal = start || left;
8960
+ const rightVal = end || right;
8768
8961
 
8769
8962
  formatting.indentation = {
8770
- left: left ? parseInt(left, 10) : undefined,
8771
- right: right ? parseInt(right, 10) : undefined,
8963
+ left: leftVal ? parseInt(leftVal, 10) : undefined,
8964
+ right: rightVal ? parseInt(rightVal, 10) : undefined,
8772
8965
  firstLine: firstLine ? parseInt(firstLine, 10) : undefined,
8773
8966
  hanging: hanging ? parseInt(hanging, 10) : undefined,
8774
8967
  };
@@ -8804,6 +8997,54 @@ export class DocumentParser {
8804
8997
  }
8805
8998
  }
8806
8999
 
9000
+ // Parse paragraph borders (w:pBdr) per ECMA-376 Part 1 §17.3.1.24
9001
+ const pBdrXml = XMLParser.extractBetweenTags(pPrXml, '<w:pBdr>', '</w:pBdr>');
9002
+ if (pBdrXml) {
9003
+ const borders: any = {};
9004
+ const borderTypes = ['top', 'left', 'bottom', 'right', 'between', 'bar'];
9005
+ for (const type of borderTypes) {
9006
+ if (pBdrXml.includes(`<w:${type}`)) {
9007
+ const tag = XMLParser.extractSelfClosingTag(pBdrXml, `w:${type}`);
9008
+ if (tag) {
9009
+ const bTag = `<w:${type}${tag}`;
9010
+ const style = XMLParser.extractAttribute(bTag, 'w:val');
9011
+ const size = XMLParser.extractAttribute(bTag, 'w:sz');
9012
+ const space = XMLParser.extractAttribute(bTag, 'w:space');
9013
+ const color = XMLParser.extractAttribute(bTag, 'w:color');
9014
+ const border: any = {};
9015
+ if (style) border.style = style;
9016
+ if (size) border.size = parseInt(size, 10);
9017
+ if (space) border.space = parseInt(space, 10);
9018
+ if (color) border.color = color;
9019
+ if (Object.keys(border).length > 0) borders[type] = border;
9020
+ }
9021
+ }
9022
+ }
9023
+ if (Object.keys(borders).length > 0) formatting.borders = borders;
9024
+ }
9025
+
9026
+ // Parse tab stops (w:tabs) per ECMA-376 Part 1 §17.3.1.38
9027
+ const tabsXml = XMLParser.extractBetweenTags(pPrXml, '<w:tabs>', '</w:tabs>');
9028
+ if (tabsXml) {
9029
+ const tabs: any[] = [];
9030
+ // Extract all w:tab elements
9031
+ const tabRegex = /<w:tab\s[^>]*\/>/g;
9032
+ let tabMatch;
9033
+ while ((tabMatch = tabRegex.exec(tabsXml)) !== null) {
9034
+ const tabTag = tabMatch[0];
9035
+ const pos = XMLParser.extractAttribute(tabTag, 'w:pos');
9036
+ const val = XMLParser.extractAttribute(tabTag, 'w:val');
9037
+ const leader = XMLParser.extractAttribute(tabTag, 'w:leader');
9038
+ if (pos) {
9039
+ const tab: any = { position: parseInt(pos, 10) };
9040
+ if (val) tab.val = val;
9041
+ if (leader) tab.leader = leader;
9042
+ tabs.push(tab);
9043
+ }
9044
+ }
9045
+ if (tabs.length > 0) formatting.tabs = tabs;
9046
+ }
9047
+
8807
9048
  // Parse shading (w:shd) per ECMA-376 Part 1 §17.3.1.32
8808
9049
  const shading = this.parseShadingFromXml(pPrXml);
8809
9050
  if (shading) {
@@ -8838,10 +9079,11 @@ export class DocumentParser {
8838
9079
  formatting.allCaps = true;
8839
9080
  }
8840
9081
 
8841
- // Parse underline - use extractSelfClosingTag for accuracy
9082
+ // Parse underline all attributes per ECMA-376 §17.3.2.40
8842
9083
  const uElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:u');
8843
9084
  if (uElement) {
8844
- const uVal = XMLParser.extractAttribute(`<w:u${uElement}`, 'w:val');
9085
+ const uTag = `<w:u${uElement}`;
9086
+ const uVal = XMLParser.extractAttribute(uTag, 'w:val');
8845
9087
  if (
8846
9088
  uVal === 'single' ||
8847
9089
  uVal === 'double' ||
@@ -8854,9 +9096,19 @@ export class DocumentParser {
8854
9096
  } else {
8855
9097
  formatting.underline = true;
8856
9098
  }
9099
+ const uColor = XMLParser.extractAttribute(uTag, 'w:color');
9100
+ if (uColor) formatting.underlineColor = uColor;
9101
+ const uThemeColor = XMLParser.extractAttribute(uTag, 'w:themeColor');
9102
+ if (uThemeColor) {
9103
+ formatting.underlineThemeColor = uThemeColor as import('../elements/Run').ThemeColorValue;
9104
+ }
9105
+ const uThemeTint = XMLParser.extractAttribute(uTag, 'w:themeTint');
9106
+ if (uThemeTint) formatting.underlineThemeTint = parseInt(uThemeTint, 16);
9107
+ const uThemeShade = XMLParser.extractAttribute(uTag, 'w:themeShade');
9108
+ if (uThemeShade) formatting.underlineThemeShade = parseInt(uThemeShade, 16);
8857
9109
  }
8858
9110
 
8859
- // Parse subscript/superscript - use extractSelfClosingTag
9111
+ // Parse subscript/superscript/baseline per ECMA-376 §17.18.96 (ST_VerticalAlignRun)
8860
9112
  const vertAlignElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:vertAlign');
8861
9113
  if (vertAlignElement) {
8862
9114
  const val = XMLParser.extractAttribute(`<w:vertAlign${vertAlignElement}`, 'w:val');
@@ -8864,16 +9116,33 @@ export class DocumentParser {
8864
9116
  formatting.subscript = true;
8865
9117
  } else if (val === 'superscript') {
8866
9118
  formatting.superscript = true;
9119
+ } else if (val === 'baseline') {
9120
+ formatting.vertAlignBaseline = true;
8867
9121
  }
8868
9122
  }
8869
9123
 
8870
- // Parse font (w:rFonts) - use extractSelfClosingTag
9124
+ // Parse font (w:rFonts) — all attributes per ECMA-376 §17.3.2.26
8871
9125
  const rFontsElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:rFonts');
8872
9126
  if (rFontsElement) {
8873
- const ascii = XMLParser.extractAttribute(`<w:rFonts${rFontsElement}`, 'w:ascii');
8874
- if (ascii) {
8875
- formatting.font = ascii;
8876
- }
9127
+ const rFontsTag = `<w:rFonts${rFontsElement}`;
9128
+ const ascii = XMLParser.extractAttribute(rFontsTag, 'w:ascii');
9129
+ if (ascii) formatting.font = ascii;
9130
+ const hAnsi = XMLParser.extractAttribute(rFontsTag, 'w:hAnsi');
9131
+ if (hAnsi) formatting.fontHAnsi = hAnsi;
9132
+ const eastAsia = XMLParser.extractAttribute(rFontsTag, 'w:eastAsia');
9133
+ if (eastAsia) formatting.fontEastAsia = eastAsia;
9134
+ const cs = XMLParser.extractAttribute(rFontsTag, 'w:cs');
9135
+ if (cs) formatting.fontCs = cs;
9136
+ const hint = XMLParser.extractAttribute(rFontsTag, 'w:hint');
9137
+ if (hint) formatting.fontHint = hint;
9138
+ const asciiTheme = XMLParser.extractAttribute(rFontsTag, 'w:asciiTheme');
9139
+ if (asciiTheme) formatting.fontAsciiTheme = asciiTheme;
9140
+ const hAnsiTheme = XMLParser.extractAttribute(rFontsTag, 'w:hAnsiTheme');
9141
+ if (hAnsiTheme) formatting.fontHAnsiTheme = hAnsiTheme;
9142
+ const eastAsiaTheme = XMLParser.extractAttribute(rFontsTag, 'w:eastAsiaTheme');
9143
+ if (eastAsiaTheme) formatting.fontEastAsiaTheme = eastAsiaTheme;
9144
+ const cstheme = XMLParser.extractAttribute(rFontsTag, 'w:cstheme');
9145
+ if (cstheme) formatting.fontCsTheme = cstheme;
8877
9146
  }
8878
9147
 
8879
9148
  // Parse size (w:sz) - size is in half-points
@@ -8886,14 +9155,38 @@ export class DocumentParser {
8886
9155
  }
8887
9156
  }
8888
9157
 
8889
- // Parse color (w:color)
8890
- // Use extractSelfClosingTag to avoid matching other tags
9158
+ // Parse complex script size (w:szCs) per ECMA-376 §17.3.2.40
9159
+ const szCsElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:szCs');
9160
+ if (szCsElement) {
9161
+ const val = XMLParser.extractAttribute(`<w:szCs${szCsElement}`, 'w:val');
9162
+ if (val) {
9163
+ const szCsVal = halfPointsToPoints(parseInt(val, 10));
9164
+ if (formatting.size === undefined || szCsVal !== formatting.size) {
9165
+ formatting.sizeCs = szCsVal;
9166
+ }
9167
+ }
9168
+ }
9169
+
9170
+ // Parse color (w:color) — all attributes per ECMA-376 §17.3.2.6
8891
9171
  const colorElement = XMLParser.extractSelfClosingTag(rPrXml, 'w:color');
8892
9172
  if (colorElement) {
8893
- const val = XMLParser.extractAttribute(`<w:color${colorElement}`, 'w:val');
9173
+ const colorTag = `<w:color${colorElement}`;
9174
+ const val = XMLParser.extractAttribute(colorTag, 'w:val');
8894
9175
  if (val && val !== 'auto') {
8895
9176
  formatting.color = val;
8896
9177
  }
9178
+ const themeColor = XMLParser.extractAttribute(colorTag, 'w:themeColor');
9179
+ if (themeColor) {
9180
+ formatting.themeColor = themeColor as import('../elements/Run').ThemeColorValue;
9181
+ }
9182
+ const themeTint = XMLParser.extractAttribute(colorTag, 'w:themeTint');
9183
+ if (themeTint) {
9184
+ formatting.themeTint = parseInt(themeTint, 16);
9185
+ }
9186
+ const themeShade = XMLParser.extractAttribute(colorTag, 'w:themeShade');
9187
+ if (themeShade) {
9188
+ formatting.themeShade = parseInt(themeShade, 16);
9189
+ }
8897
9190
  }
8898
9191
 
8899
9192
  // Parse highlight (w:highlight) - use extractSelfClosingTag
@@ -8918,6 +9211,7 @@ export class DocumentParser {
8918
9211
  'lightGray',
8919
9212
  'black',
8920
9213
  'white',
9214
+ 'none',
8921
9215
  ];
8922
9216
  if (validHighlights.includes(val)) {
8923
9217
  formatting.highlight = val as
@@ -8936,7 +9230,8 @@ export class DocumentParser {
8936
9230
  | 'darkGray'
8937
9231
  | 'lightGray'
8938
9232
  | 'black'
8939
- | 'white';
9233
+ | 'white'
9234
+ | 'none';
8940
9235
  }
8941
9236
  }
8942
9237
  }
@@ -9014,14 +9309,19 @@ export class DocumentParser {
9014
9309
  ): import('../formatting/Style').TableStyleFormatting {
9015
9310
  const formatting: import('../formatting/Style').TableStyleFormatting = {};
9016
9311
 
9017
- // Parse indent
9312
+ // Parse indent (w:tblInd) — preserve w:type per ECMA-376 ST_TblWidth
9018
9313
  if (tblPrXml.includes('<w:tblInd')) {
9019
9314
  const tag = XMLParser.extractSelfClosingTag(tblPrXml, 'w:tblInd');
9020
9315
  if (tag) {
9021
- const w = XMLParser.extractAttribute(`<w:tblInd${tag}`, 'w:w');
9316
+ const tblIndTag = `<w:tblInd${tag}`;
9317
+ const w = XMLParser.extractAttribute(tblIndTag, 'w:w');
9022
9318
  if (w) {
9023
9319
  formatting.indent = parseInt(w, 10);
9024
9320
  }
9321
+ const type = XMLParser.extractAttribute(tblIndTag, 'w:type');
9322
+ if (type) {
9323
+ formatting.indentType = type as import('../elements/Table').TableWidthType;
9324
+ }
9025
9325
  }
9026
9326
  }
9027
9327
 
@@ -9331,19 +9631,43 @@ export class DocumentParser {
9331
9631
  ): import('../formatting/Style').CellMargins | undefined {
9332
9632
  const margins: import('../formatting/Style').CellMargins = {};
9333
9633
 
9334
- const marginTypes = ['top', 'bottom', 'left', 'right'];
9335
- for (const type of marginTypes) {
9634
+ // Parse top and bottom directly
9635
+ for (const type of ['top', 'bottom'] as const) {
9336
9636
  if (marginXml.includes(`<w:${type}`)) {
9337
9637
  const tag = XMLParser.extractSelfClosingTag(marginXml, `w:${type}`);
9338
9638
  if (tag) {
9339
9639
  const w = XMLParser.extractAttribute(`<w:${type}${tag}`, 'w:w');
9340
9640
  if (w) {
9341
- margins[type as keyof import('../formatting/Style').CellMargins] = parseInt(w, 10);
9641
+ margins[type] = parseInt(w, 10);
9342
9642
  }
9343
9643
  }
9344
9644
  }
9345
9645
  }
9346
9646
 
9647
+ // Parse left/right with bidi-aware w:start/w:end fallback (ECMA-376 §17.4.42/§17.4.43)
9648
+ // w:start takes precedence over w:left; w:end takes precedence over w:right
9649
+ const leftTag = marginXml.includes('<w:start')
9650
+ ? XMLParser.extractSelfClosingTag(marginXml, 'w:start')
9651
+ : XMLParser.extractSelfClosingTag(marginXml, 'w:left');
9652
+ if (leftTag) {
9653
+ const tagName = marginXml.includes('<w:start') ? 'w:start' : 'w:left';
9654
+ const w = XMLParser.extractAttribute(`<${tagName}${leftTag}`, 'w:w');
9655
+ if (w) {
9656
+ margins.left = parseInt(w, 10);
9657
+ }
9658
+ }
9659
+
9660
+ const rightTag = marginXml.includes('<w:end')
9661
+ ? XMLParser.extractSelfClosingTag(marginXml, 'w:end')
9662
+ : XMLParser.extractSelfClosingTag(marginXml, 'w:right');
9663
+ if (rightTag) {
9664
+ const tagName = marginXml.includes('<w:end') ? 'w:end' : 'w:right';
9665
+ const w = XMLParser.extractAttribute(`<${tagName}${rightTag}`, 'w:w');
9666
+ if (w) {
9667
+ margins.right = parseInt(w, 10);
9668
+ }
9669
+ }
9670
+
9347
9671
  return Object.keys(margins).length > 0 ? margins : undefined;
9348
9672
  }
9349
9673
 
@@ -9854,6 +10178,40 @@ export class DocumentParser {
9854
10178
  if (propsObj['w:tblStyle']) {
9855
10179
  result.style = propsObj['w:tblStyle']['@_w:val'] || '';
9856
10180
  }
10181
+ // tblpPr (floating table position)
10182
+ if (propsObj['w:tblpPr']) {
10183
+ const tblpPr = propsObj['w:tblpPr'];
10184
+ const pos: any = {};
10185
+ if (tblpPr['@_w:tblpX']) pos.x = parseInt(tblpPr['@_w:tblpX'], 10);
10186
+ if (tblpPr['@_w:tblpY']) pos.y = parseInt(tblpPr['@_w:tblpY'], 10);
10187
+ if (tblpPr['@_w:horzAnchor']) pos.horizontalAnchor = tblpPr['@_w:horzAnchor'];
10188
+ if (tblpPr['@_w:vertAnchor']) pos.verticalAnchor = tblpPr['@_w:vertAnchor'];
10189
+ if (tblpPr['@_w:leftFromText']) pos.leftFromText = parseInt(tblpPr['@_w:leftFromText'], 10);
10190
+ if (tblpPr['@_w:rightFromText'])
10191
+ pos.rightFromText = parseInt(tblpPr['@_w:rightFromText'], 10);
10192
+ if (tblpPr['@_w:topFromText']) pos.topFromText = parseInt(tblpPr['@_w:topFromText'], 10);
10193
+ if (tblpPr['@_w:bottomFromText'])
10194
+ pos.bottomFromText = parseInt(tblpPr['@_w:bottomFromText'], 10);
10195
+ if (Object.keys(pos).length > 0) result.position = pos;
10196
+ }
10197
+ if (propsObj['w:tblOverlap']) {
10198
+ result.overlap = propsObj['w:tblOverlap']['@_w:val'];
10199
+ }
10200
+ if (propsObj['w:bidiVisual']) {
10201
+ result.bidiVisual = true;
10202
+ }
10203
+ if (propsObj['w:tblStyleRowBandSize']) {
10204
+ result.tblStyleRowBandSize = parseInt(
10205
+ propsObj['w:tblStyleRowBandSize']['@_w:val'] || '1',
10206
+ 10
10207
+ );
10208
+ }
10209
+ if (propsObj['w:tblStyleColBandSize']) {
10210
+ result.tblStyleColBandSize = parseInt(
10211
+ propsObj['w:tblStyleColBandSize']['@_w:val'] || '1',
10212
+ 10
10213
+ );
10214
+ }
9857
10215
  if (propsObj['w:tblW']) {
9858
10216
  result.width = parseInt(propsObj['w:tblW']['@_w:w'] || '0', 10);
9859
10217
  result.widthType = propsObj['w:tblW']['@_w:type'] || 'dxa';
@@ -9863,9 +10221,24 @@ export class DocumentParser {
9863
10221
  }
9864
10222
  if (propsObj['w:tblInd']) {
9865
10223
  result.indent = parseInt(propsObj['w:tblInd']['@_w:w'] || '0', 10);
10224
+ const indType = propsObj['w:tblInd']['@_w:type'];
10225
+ if (indType) result.indentType = indType;
9866
10226
  }
9867
10227
  if (propsObj['w:tblCellSpacing']) {
9868
10228
  result.cellSpacing = parseInt(propsObj['w:tblCellSpacing']['@_w:w'] || '0', 10);
10229
+ const csType = propsObj['w:tblCellSpacing']['@_w:type'];
10230
+ if (csType) result.cellSpacingType = csType;
10231
+ }
10232
+ if (propsObj['w:tblCellMar']) {
10233
+ const cellMar = propsObj['w:tblCellMar'];
10234
+ const margins: any = {};
10235
+ if (cellMar['w:top']) margins.top = parseInt(cellMar['w:top']['@_w:w'] || '0', 10);
10236
+ if (cellMar['w:bottom']) margins.bottom = parseInt(cellMar['w:bottom']['@_w:w'] || '0', 10);
10237
+ const leftSrc = cellMar['w:start'] || cellMar['w:left'];
10238
+ if (leftSrc) margins.left = parseInt(leftSrc['@_w:w'] || '0', 10);
10239
+ const rightSrc = cellMar['w:end'] || cellMar['w:right'];
10240
+ if (rightSrc) margins.right = parseInt(rightSrc['@_w:w'] || '0', 10);
10241
+ if (Object.keys(margins).length > 0) result.cellMargins = margins;
9869
10242
  }
9870
10243
  if (propsObj['w:tblBorders']) {
9871
10244
  const borders: any = {};
@@ -9877,8 +10250,40 @@ export class DocumentParser {
9877
10250
  }
9878
10251
  if (Object.keys(borders).length > 0) result.borders = borders;
9879
10252
  }
10253
+ if (propsObj['w:tblLook']) {
10254
+ const look = propsObj['w:tblLook'];
10255
+ result.tblLook = look['@_w:val'] || '0000';
10256
+ }
10257
+ if (propsObj['w:tblCaption']) {
10258
+ result.caption = propsObj['w:tblCaption']['@_w:val'];
10259
+ }
10260
+ if (propsObj['w:tblDescription']) {
10261
+ result.description = propsObj['w:tblDescription']['@_w:val'];
10262
+ }
9880
10263
 
9881
- // Row-level properties (w:trPr context)
10264
+ // Row-level properties (w:trPr context) — all CT_TrPr elements
10265
+ if (propsObj['w:cnfStyle']) {
10266
+ result.cnfStyle = propsObj['w:cnfStyle']['@_w:val'];
10267
+ }
10268
+ if (propsObj['w:divId']) {
10269
+ result.divId = propsObj['w:divId']['@_w:val'];
10270
+ }
10271
+ if (propsObj['w:gridBefore']) {
10272
+ result.gridBefore = parseInt(propsObj['w:gridBefore']['@_w:val'] || '0', 10);
10273
+ }
10274
+ if (propsObj['w:gridAfter']) {
10275
+ result.gridAfter = parseInt(propsObj['w:gridAfter']['@_w:val'] || '0', 10);
10276
+ }
10277
+ if (propsObj['w:wBefore']) {
10278
+ result.wBefore = parseInt(propsObj['w:wBefore']['@_w:w'] || '0', 10);
10279
+ const wbType = propsObj['w:wBefore']['@_w:type'];
10280
+ if (wbType) result.wBeforeType = wbType;
10281
+ }
10282
+ if (propsObj['w:wAfter']) {
10283
+ result.wAfter = parseInt(propsObj['w:wAfter']['@_w:w'] || '0', 10);
10284
+ const waType = propsObj['w:wAfter']['@_w:type'];
10285
+ if (waType) result.wAfterType = waType;
10286
+ }
9882
10287
  if (propsObj['w:trHeight']) {
9883
10288
  result.height = parseInt(propsObj['w:trHeight']['@_w:val'] || '0', 10);
9884
10289
  const rule = propsObj['w:trHeight']['@_w:hRule'];
@@ -9894,13 +10299,19 @@ export class DocumentParser {
9894
10299
  result.hidden = true;
9895
10300
  }
9896
10301
 
9897
- // Cell-level properties (w:tcPr context)
10302
+ // Cell-level properties (w:tcPr context) — all CT_TcPr elements
9898
10303
  if (propsObj['w:tcW']) {
9899
10304
  result.width = parseInt(propsObj['w:tcW']['@_w:w'] || '0', 10);
9900
10305
  result.widthType = propsObj['w:tcW']['@_w:type'] || 'dxa';
9901
10306
  }
9902
- if (propsObj['w:vAlign']) {
9903
- result.verticalAlignment = propsObj['w:vAlign']['@_w:val'];
10307
+ if (propsObj['w:gridSpan']) {
10308
+ result.columnSpan = parseInt(propsObj['w:gridSpan']['@_w:val'] || '1', 10);
10309
+ }
10310
+ if (propsObj['w:hMerge']) {
10311
+ result.hMerge = propsObj['w:hMerge']['@_w:val'] || 'continue';
10312
+ }
10313
+ if (propsObj['w:vMerge']) {
10314
+ result.vMerge = propsObj['w:vMerge']['@_w:val'] || 'continue';
9904
10315
  }
9905
10316
  if (propsObj['w:tcBorders']) {
9906
10317
  const borders: any = {};
@@ -9912,6 +10323,35 @@ export class DocumentParser {
9912
10323
  }
9913
10324
  if (Object.keys(borders).length > 0) result.borders = borders;
9914
10325
  }
10326
+ if (propsObj['w:noWrap']) {
10327
+ result.noWrap = true;
10328
+ }
10329
+ if (propsObj['w:tcMar']) {
10330
+ const tcMar = propsObj['w:tcMar'];
10331
+ const margins: any = {};
10332
+ if (tcMar['w:top']) margins.top = parseInt(tcMar['w:top']['@_w:w'] || '0', 10);
10333
+ if (tcMar['w:bottom']) margins.bottom = parseInt(tcMar['w:bottom']['@_w:w'] || '0', 10);
10334
+ const leftSrc = tcMar['w:start'] || tcMar['w:left'];
10335
+ if (leftSrc) margins.left = parseInt(leftSrc['@_w:w'] || '0', 10);
10336
+ const rightSrc = tcMar['w:end'] || tcMar['w:right'];
10337
+ if (rightSrc) margins.right = parseInt(rightSrc['@_w:w'] || '0', 10);
10338
+ if (Object.keys(margins).length > 0) result.margins = margins;
10339
+ }
10340
+ if (propsObj['w:textDirection']) {
10341
+ result.textDirection = propsObj['w:textDirection']['@_w:val'];
10342
+ }
10343
+ if (propsObj['w:tcFitText']) {
10344
+ result.fitText = true;
10345
+ }
10346
+ if (propsObj['w:vAlign']) {
10347
+ result.verticalAlignment = propsObj['w:vAlign']['@_w:val'];
10348
+ }
10349
+ if (propsObj['w:hideMark']) {
10350
+ result.hideMark = true;
10351
+ }
10352
+ if (propsObj['w:cnfStyle']) {
10353
+ result.cnfStyle = propsObj['w:cnfStyle']['@_w:val'];
10354
+ }
9915
10355
 
9916
10356
  // Shared properties (appear in multiple contexts)
9917
10357
  if (propsObj['w:jc']) {
@@ -9944,11 +10384,13 @@ export class DocumentParser {
9944
10384
  const width = XMLParser.extractAttribute(pgSz, 'w:w');
9945
10385
  const height = XMLParser.extractAttribute(pgSz, 'w:h');
9946
10386
  const orient = XMLParser.extractAttribute(pgSz, 'w:orient');
10387
+ const code = XMLParser.extractAttribute(pgSz, 'w:code');
9947
10388
  if (width || height) {
9948
10389
  result.pageSize = {
9949
10390
  width: width ? parseInt(width, 10) : undefined,
9950
10391
  height: height ? parseInt(height, 10) : undefined,
9951
10392
  orientation: orient === 'landscape' ? 'landscape' : 'portrait',
10393
+ code: code ? parseInt(code, 10) : undefined,
9952
10394
  };
9953
10395
  }
9954
10396
  }
@@ -9980,6 +10422,34 @@ export class DocumentParser {
9980
10422
  if (val) result.type = val;
9981
10423
  }
9982
10424
 
10425
+ // Line numbering
10426
+ const lnNumElements = XMLParser.extractElements(sectPrXml, 'w:lnNumType');
10427
+ if (lnNumElements.length > 0 && lnNumElements[0]) {
10428
+ const ln = lnNumElements[0];
10429
+ const lnObj: any = {};
10430
+ const countBy = XMLParser.extractAttribute(ln, 'w:countBy');
10431
+ if (countBy) lnObj.countBy = parseInt(countBy, 10);
10432
+ const start = XMLParser.extractAttribute(ln, 'w:start');
10433
+ if (start) lnObj.start = parseInt(start, 10);
10434
+ const restart = XMLParser.extractAttribute(ln, 'w:restart');
10435
+ if (restart) lnObj.restart = restart;
10436
+ const distance = XMLParser.extractAttribute(ln, 'w:distance');
10437
+ if (distance) lnObj.distance = parseInt(distance, 10);
10438
+ if (Object.keys(lnObj).length > 0) result.lineNumbering = lnObj;
10439
+ }
10440
+
10441
+ // Page numbering
10442
+ const pgNumElements = XMLParser.extractElements(sectPrXml, 'w:pgNumType');
10443
+ if (pgNumElements.length > 0 && pgNumElements[0]) {
10444
+ const pn = pgNumElements[0];
10445
+ const pnObj: any = {};
10446
+ const pnStart = XMLParser.extractAttribute(pn, 'w:start');
10447
+ if (pnStart) pnObj.start = parseInt(pnStart, 10);
10448
+ const fmt = XMLParser.extractAttribute(pn, 'w:fmt');
10449
+ if (fmt) pnObj.format = fmt;
10450
+ if (Object.keys(pnObj).length > 0) result.pageNumbering = pnObj;
10451
+ }
10452
+
9983
10453
  // Columns
9984
10454
  const colsElements = XMLParser.extractElements(sectPrXml, 'w:cols');
9985
10455
  if (colsElements.length > 0 && colsElements[0]) {
@@ -9994,6 +10464,49 @@ export class DocumentParser {
9994
10464
  }
9995
10465
  }
9996
10466
 
10467
+ // Form protection
10468
+ if (sectPrXml.includes('<w:formProt')) result.formProt = true;
10469
+
10470
+ // Vertical alignment
10471
+ const vAlignElements = XMLParser.extractElements(sectPrXml, 'w:vAlign');
10472
+ if (vAlignElements.length > 0 && vAlignElements[0]) {
10473
+ const val = XMLParser.extractAttribute(vAlignElements[0], 'w:val');
10474
+ if (val) result.verticalAlignment = val;
10475
+ }
10476
+
10477
+ // Suppress endnotes
10478
+ if (sectPrXml.includes('<w:noEndnote')) result.noEndnote = true;
10479
+
10480
+ // Title page
10481
+ if (sectPrXml.includes('<w:titlePg')) result.titlePage = true;
10482
+
10483
+ // Text direction
10484
+ const textDirElements = XMLParser.extractElements(sectPrXml, 'w:textDirection');
10485
+ if (textDirElements.length > 0 && textDirElements[0]) {
10486
+ const val = XMLParser.extractAttribute(textDirElements[0], 'w:val');
10487
+ if (val) result.textDirection = val;
10488
+ }
10489
+
10490
+ // Bidi section
10491
+ if (sectPrXml.includes('<w:bidi')) result.bidi = true;
10492
+
10493
+ // RTL gutter
10494
+ if (sectPrXml.includes('<w:rtlGutter')) result.rtlGutter = true;
10495
+
10496
+ // Document grid
10497
+ const docGridElements = XMLParser.extractElements(sectPrXml, 'w:docGrid');
10498
+ if (docGridElements.length > 0 && docGridElements[0]) {
10499
+ const dg = docGridElements[0];
10500
+ const dgObj: any = {};
10501
+ const dgType = XMLParser.extractAttribute(dg, 'w:type');
10502
+ if (dgType) dgObj.type = dgType;
10503
+ const linePitch = XMLParser.extractAttribute(dg, 'w:linePitch');
10504
+ if (linePitch) dgObj.linePitch = parseInt(linePitch, 10);
10505
+ const charSpace = XMLParser.extractAttribute(dg, 'w:charSpace');
10506
+ if (charSpace) dgObj.charSpace = parseInt(charSpace, 10);
10507
+ if (Object.keys(dgObj).length > 0) result.docGrid = dgObj;
10508
+ }
10509
+
9997
10510
  return result;
9998
10511
  }
9999
10512
  }