docxmlater 10.1.8 → 10.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1192,14 +1192,6 @@ export class DocumentParser {
|
|
|
1192
1192
|
}
|
|
1193
1193
|
}
|
|
1194
1194
|
|
|
1195
|
-
// Extract revision XMLs from paragraph content for raw XML parsing
|
|
1196
|
-
const insXmls = XMLParser.extractElements(paraContent, 'w:ins');
|
|
1197
|
-
const delXmls = XMLParser.extractElements(paraContent, 'w:del');
|
|
1198
|
-
const moveFromXmls = XMLParser.extractElements(paraContent, 'w:moveFrom');
|
|
1199
|
-
const moveToXmls = XMLParser.extractElements(paraContent, 'w:moveTo');
|
|
1200
|
-
const bookmarkStartXmls = XMLParser.extractElements(paraContent, 'w:bookmarkStart');
|
|
1201
|
-
const bookmarkEndXmls = XMLParser.extractElements(paraContent, 'w:bookmarkEnd');
|
|
1202
|
-
|
|
1203
1195
|
// Helper to extract raw run XML from paraContent using position
|
|
1204
1196
|
const extractRunXmlAtPosition = (pos: number): string | null => {
|
|
1205
1197
|
// Find the end of this run element
|
|
@@ -1228,6 +1220,40 @@ export class DocumentParser {
|
|
|
1228
1220
|
return null;
|
|
1229
1221
|
};
|
|
1230
1222
|
|
|
1223
|
+
// Helper to extract element XML at a known position, depth-aware
|
|
1224
|
+
const extractElementXmlAtPosition = (pos: number, tagName: string): string => {
|
|
1225
|
+
const openTag = `<${tagName}`;
|
|
1226
|
+
const closeTag = `</${tagName}>`;
|
|
1227
|
+
const openEnd = paraContent.indexOf('>', pos);
|
|
1228
|
+
if (openEnd === -1) return '';
|
|
1229
|
+
// Self-closing
|
|
1230
|
+
if (paraContent[openEnd - 1] === '/') {
|
|
1231
|
+
return paraContent.substring(pos, openEnd + 1);
|
|
1232
|
+
}
|
|
1233
|
+
let depth = 1;
|
|
1234
|
+
let searchFrom = openEnd + 1;
|
|
1235
|
+
while (depth > 0 && searchFrom < paraContent.length) {
|
|
1236
|
+
const nextOpen = paraContent.indexOf(openTag, searchFrom);
|
|
1237
|
+
const nextClose = paraContent.indexOf(closeTag, searchFrom);
|
|
1238
|
+
if (nextClose === -1) break;
|
|
1239
|
+
if (nextOpen !== -1 && nextOpen < nextClose) {
|
|
1240
|
+
const charAfter = paraContent[nextOpen + openTag.length];
|
|
1241
|
+
if (charAfter === '>' || charAfter === ' ' || charAfter === '/' ||
|
|
1242
|
+
charAfter === '\t' || charAfter === '\n' || charAfter === '\r') {
|
|
1243
|
+
depth++;
|
|
1244
|
+
}
|
|
1245
|
+
searchFrom = nextOpen + openTag.length;
|
|
1246
|
+
} else {
|
|
1247
|
+
depth--;
|
|
1248
|
+
if (depth === 0) {
|
|
1249
|
+
return paraContent.substring(pos, nextClose + closeTag.length);
|
|
1250
|
+
}
|
|
1251
|
+
searchFrom = nextClose + closeTag.length;
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1254
|
+
return '';
|
|
1255
|
+
};
|
|
1256
|
+
|
|
1231
1257
|
// Now process children in the order they were found
|
|
1232
1258
|
for (const child of children) {
|
|
1233
1259
|
if (child.type === 'w:r') {
|
|
@@ -1321,82 +1347,20 @@ export class DocumentParser {
|
|
|
1321
1347
|
paragraph.addField(field);
|
|
1322
1348
|
}
|
|
1323
1349
|
}
|
|
1324
|
-
} else if (child.type === 'w:ins'
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
if (revResult.revision) {
|
|
1336
|
-
paragraph.addRevision(revResult.revision);
|
|
1337
|
-
}
|
|
1338
|
-
// Add any bookmarks found inside the revision to the paragraph
|
|
1339
|
-
for (const bookmark of revResult.bookmarkStarts) {
|
|
1340
|
-
paragraph.addBookmarkStart(bookmark);
|
|
1341
|
-
}
|
|
1342
|
-
for (const bookmark of revResult.bookmarkEnds) {
|
|
1343
|
-
paragraph.addBookmarkEnd(bookmark);
|
|
1344
|
-
}
|
|
1345
|
-
}
|
|
1346
|
-
}
|
|
1347
|
-
} else if (child.type === 'w:del') {
|
|
1348
|
-
if (child.index < delXmls.length) {
|
|
1349
|
-
const revisionXml = delXmls[child.index];
|
|
1350
|
-
if (revisionXml) {
|
|
1351
|
-
const revResult = await this.parseRevisionFromXml(
|
|
1352
|
-
revisionXml,
|
|
1353
|
-
'w:del',
|
|
1354
|
-
relationshipManager,
|
|
1355
|
-
zipHandler,
|
|
1356
|
-
imageManager
|
|
1357
|
-
);
|
|
1358
|
-
if (revResult.revision) {
|
|
1359
|
-
paragraph.addRevision(revResult.revision);
|
|
1360
|
-
}
|
|
1361
|
-
// Add any bookmarks found inside the revision to the paragraph
|
|
1362
|
-
for (const bookmark of revResult.bookmarkStarts) {
|
|
1363
|
-
paragraph.addBookmarkStart(bookmark);
|
|
1364
|
-
}
|
|
1365
|
-
for (const bookmark of revResult.bookmarkEnds) {
|
|
1366
|
-
paragraph.addBookmarkEnd(bookmark);
|
|
1367
|
-
}
|
|
1368
|
-
}
|
|
1369
|
-
}
|
|
1370
|
-
} else if (child.type === 'w:moveFrom') {
|
|
1371
|
-
if (child.index < moveFromXmls.length) {
|
|
1372
|
-
const revisionXml = moveFromXmls[child.index];
|
|
1373
|
-
if (revisionXml) {
|
|
1374
|
-
const revResult = await this.parseRevisionFromXml(
|
|
1375
|
-
revisionXml,
|
|
1376
|
-
'w:moveFrom',
|
|
1377
|
-
relationshipManager,
|
|
1378
|
-
zipHandler,
|
|
1379
|
-
imageManager
|
|
1380
|
-
);
|
|
1381
|
-
if (revResult.revision) {
|
|
1382
|
-
paragraph.addRevision(revResult.revision);
|
|
1383
|
-
}
|
|
1384
|
-
// Add any bookmarks found inside the revision to the paragraph
|
|
1385
|
-
for (const bookmark of revResult.bookmarkStarts) {
|
|
1386
|
-
paragraph.addBookmarkStart(bookmark);
|
|
1387
|
-
}
|
|
1388
|
-
for (const bookmark of revResult.bookmarkEnds) {
|
|
1389
|
-
paragraph.addBookmarkEnd(bookmark);
|
|
1390
|
-
}
|
|
1391
|
-
}
|
|
1392
|
-
}
|
|
1393
|
-
} else if (child.type === 'w:moveTo') {
|
|
1394
|
-
if (child.index < moveToXmls.length) {
|
|
1395
|
-
const revisionXml = moveToXmls[child.index];
|
|
1396
|
-
if (revisionXml) {
|
|
1350
|
+
} else if (child.type === 'w:ins' || child.type === 'w:del' ||
|
|
1351
|
+
child.type === 'w:moveFrom' || child.type === 'w:moveTo') {
|
|
1352
|
+
const revisionXml = extractElementXmlAtPosition(child.pos, child.type);
|
|
1353
|
+
if (revisionXml) {
|
|
1354
|
+
// Detect nested revision elements (e.g., w:del inside w:ins)
|
|
1355
|
+
const innerContent = revisionXml.substring(revisionXml.indexOf('>') + 1);
|
|
1356
|
+
const hasNestedRevision = /<w:(del|moveFrom|moveTo|ins)\s/.test(innerContent);
|
|
1357
|
+
if (hasNestedRevision) {
|
|
1358
|
+
// Preserve entire nested structure as raw XML for round-trip fidelity
|
|
1359
|
+
paragraph.addContent(new PreservedElement(revisionXml, child.type, 'inline'));
|
|
1360
|
+
} else {
|
|
1397
1361
|
const revResult = await this.parseRevisionFromXml(
|
|
1398
1362
|
revisionXml,
|
|
1399
|
-
|
|
1363
|
+
child.type,
|
|
1400
1364
|
relationshipManager,
|
|
1401
1365
|
zipHandler,
|
|
1402
1366
|
imageManager
|
|
@@ -1404,7 +1368,6 @@ export class DocumentParser {
|
|
|
1404
1368
|
if (revResult.revision) {
|
|
1405
1369
|
paragraph.addRevision(revResult.revision);
|
|
1406
1370
|
}
|
|
1407
|
-
// Add any bookmarks found inside the revision to the paragraph
|
|
1408
1371
|
for (const bookmark of revResult.bookmarkStarts) {
|
|
1409
1372
|
paragraph.addBookmarkStart(bookmark);
|
|
1410
1373
|
}
|
|
@@ -1414,25 +1377,21 @@ export class DocumentParser {
|
|
|
1414
1377
|
}
|
|
1415
1378
|
}
|
|
1416
1379
|
} else if (child.type === 'w:bookmarkStart') {
|
|
1417
|
-
|
|
1418
|
-
if (
|
|
1419
|
-
const bookmarkXml =
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
paragraph.addBookmarkStart(bookmark);
|
|
1424
|
-
}
|
|
1380
|
+
const endPos = paraContent.indexOf('>', child.pos);
|
|
1381
|
+
if (endPos !== -1) {
|
|
1382
|
+
const bookmarkXml = paraContent.substring(child.pos, endPos + 1);
|
|
1383
|
+
const bookmark = this.parseBookmarkStart(bookmarkXml);
|
|
1384
|
+
if (bookmark) {
|
|
1385
|
+
paragraph.addBookmarkStart(bookmark);
|
|
1425
1386
|
}
|
|
1426
1387
|
}
|
|
1427
1388
|
} else if (child.type === 'w:bookmarkEnd') {
|
|
1428
|
-
|
|
1429
|
-
if (
|
|
1430
|
-
const bookmarkXml =
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
paragraph.addBookmarkEnd(bookmark);
|
|
1435
|
-
}
|
|
1389
|
+
const endPos = paraContent.indexOf('>', child.pos);
|
|
1390
|
+
if (endPos !== -1) {
|
|
1391
|
+
const bookmarkXml = paraContent.substring(child.pos, endPos + 1);
|
|
1392
|
+
const bookmark = this.parseBookmarkEnd(bookmarkXml);
|
|
1393
|
+
if (bookmark) {
|
|
1394
|
+
paragraph.addBookmarkEnd(bookmark);
|
|
1436
1395
|
}
|
|
1437
1396
|
}
|
|
1438
1397
|
} else if (child.type === 'w:commentRangeStart' || child.type === 'w:commentRangeEnd') {
|
|
@@ -4554,6 +4513,9 @@ export class DocumentParser {
|
|
|
4554
4513
|
paragraph.addRun(item);
|
|
4555
4514
|
} else if (item instanceof Field) {
|
|
4556
4515
|
paragraph.addField(item);
|
|
4516
|
+
} else {
|
|
4517
|
+
// Preserve all other content types: Revision, RangeMarker, Shape, TextBox, PreservedElement
|
|
4518
|
+
paragraph.addContent(item);
|
|
4557
4519
|
}
|
|
4558
4520
|
}
|
|
4559
4521
|
}
|