docxmlater 7.7.7 → 7.7.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/Document.d.ts.map +1 -1
- package/dist/core/Document.js +1 -1
- package/dist/core/Document.js.map +1 -1
- package/dist/core/DocumentParser.d.ts +7 -1
- package/dist/core/DocumentParser.d.ts.map +1 -1
- package/dist/core/DocumentParser.js +343 -22
- package/dist/core/DocumentParser.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { BookmarkManager } from "../elements/BookmarkManager";
|
|
1
2
|
import { ImageManager } from "../elements/ImageManager";
|
|
2
3
|
import { Paragraph } from "../elements/Paragraph";
|
|
3
4
|
import { Section } from "../elements/Section";
|
|
@@ -18,10 +19,11 @@ type BodyElement = Paragraph | Table | TableOfContentsElement | StructuredDocume
|
|
|
18
19
|
export declare class DocumentParser {
|
|
19
20
|
private parseErrors;
|
|
20
21
|
private strictParsing;
|
|
22
|
+
private bookmarkManager;
|
|
21
23
|
constructor(strictParsing?: boolean);
|
|
22
24
|
getParseErrors(): ParseError[];
|
|
23
25
|
clearParseErrors(): void;
|
|
24
|
-
parseDocument(zipHandler: ZipHandler, relationshipManager: RelationshipManager, imageManager: ImageManager): Promise<{
|
|
26
|
+
parseDocument(zipHandler: ZipHandler, relationshipManager: RelationshipManager, imageManager: ImageManager, bookmarkManager?: BookmarkManager): Promise<{
|
|
25
27
|
bodyElements: BodyElement[];
|
|
26
28
|
properties: DocumentProperties;
|
|
27
29
|
relationshipManager: RelationshipManager;
|
|
@@ -32,6 +34,8 @@ export declare class DocumentParser {
|
|
|
32
34
|
namespaces: Record<string, string>;
|
|
33
35
|
}>;
|
|
34
36
|
private parseBodyElements;
|
|
37
|
+
private extractBodyLevelBookmarkEnds;
|
|
38
|
+
private extractBookmarkEndsFromContent;
|
|
35
39
|
private findNextTag;
|
|
36
40
|
private findNextTopLevelTag;
|
|
37
41
|
private isPositionInsideTable;
|
|
@@ -40,6 +44,8 @@ export declare class DocumentParser {
|
|
|
40
44
|
private parseParagraphWithOrder;
|
|
41
45
|
private parseOrderedParagraphChildren;
|
|
42
46
|
private parseRevisionFromXml;
|
|
47
|
+
private parseBookmarkStart;
|
|
48
|
+
private parseBookmarkEnd;
|
|
43
49
|
private parseParagraphFromObject;
|
|
44
50
|
private parseParagraphPropertiesFromObject;
|
|
45
51
|
private assembleComplexFields;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"DocumentParser.d.ts","sourceRoot":"","sources":["../../src/core/DocumentParser.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"DocumentParser.d.ts","sourceRoot":"","sources":["../../src/core/DocumentParser.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAM9D,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,SAAS,EAAyC,MAAM,uBAAuB,CAAC;AAGzF,OAAO,EAEL,OAAO,EAGR,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mCAAmC,CAAC;AAC1E,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAG1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAE5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,KAAK,EAA8B,MAAM,qBAAqB,CAAC;AAcxE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAK5D,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;CACd;AAKD,KAAK,WAAW,GACZ,SAAS,GACT,KAAK,GACL,sBAAsB,GACtB,qBAAqB,CAAC;AAK1B,qBAAa,cAAc;IACzB,OAAO,CAAC,WAAW,CAAoB;IACvC,OAAO,CAAC,aAAa,CAAU;IAC/B,OAAO,CAAC,eAAe,CAAgC;gBAE3C,aAAa,GAAE,OAAe;IAO1C,cAAc,IAAI,UAAU,EAAE;IAO9B,gBAAgB,IAAI,IAAI;IAWlB,aAAa,CACjB,UAAU,EAAE,UAAU,EACtB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,EAC1B,eAAe,CAAC,EAAE,eAAe,GAChC,OAAO,CAAC;QACT,YAAY,EAAE,WAAW,EAAE,CAAC;QAC5B,UAAU,EAAE,kBAAkB,CAAC;QAC/B,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,MAAM,EAAE,KAAK,EAAE,CAAC;QAChB,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;QACxB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KACpC,CAAC;YAyFY,iBAAiB;IA6L/B,OAAO,CAAC,4BAA4B;IAsCpC,OAAO,CAAC,8BAA8B;IA2BtC,OAAO,CAAC,WAAW;IAmCnB,OAAO,CAAC,mBAAmB;IAkC3B,OAAO,CAAC,qBAAqB;IAoB7B,OAAO,CAAC,oBAAoB;IAuB5B,OAAO,CAAC,qBAAqB;YAwEf,uBAAuB;YA4FvB,6BAA6B;YAgW7B,oBAAoB;IAyHlC,OAAO,CAAC,kBAAkB;IAmD1B,OAAO,CAAC,gBAAgB;YA8BV,wBAAwB;IAoLtC,OAAO,CAAC,kCAAkC;IA6Y1C,OAAO,CAAC,qBAAqB;IAsP7B,OAAO,CAAC,4BAA4B;IAiHpC,OAAO,CAAC,oBAAoB;IAiC5B,OAAO,CAAC,0BAA0B;IA8IlC,OAAO,CAAC,wCAAwC;IA0EhD,OAAO,CAAC,0BAA0B;IAmHlC,OAAO,CAAC,kBAAkB;IAsN1B,OAAO,CAAC,wBAAwB;IAwLhC,OAAO,CAAC,0BAA0B;IAiIlC,OAAO,CAAC,8BAA8B;IActC,OAAO,CAAC,0BAA0B;IAwClC,OAAO,CAAC,4BAA4B;YAwLtB,sBAAsB;IAqNpC,OAAO,CAAC,iBAAiB;IA4CzB,OAAO,CAAC,kBAAkB;IA6D1B,OAAO,CAAC,cAAc;IAmBtB,OAAO,CAAC,iBAAiB;YAuBX,oBAAoB;IA4HlC,OAAO,CAAC,8BAA8B;YAuFxB,uBAAuB;IAyErC,OAAO,CAAC,iCAAiC;IAyDzC,OAAO,CAAC,sCAAsC;IA4D9C,OAAO,CAAC,2BAA2B;YAgCrB,wBAAwB;IAkOtC,OAAO,CAAC,yBAAyB;IA0GjC,OAAO,CAAC,UAAU;IAmBlB,OAAO,CAAC,cAAc;YAsCR,kBAAkB;IAqPhC,OAAO,CAAC,6BAA6B;IAwCrC,OAAO,CAAC,4BAA4B;IA6JpC,OAAO,CAAC,4BAA4B;IA6FpC,OAAO,CAAC,sBAAsB;IAmI9B,OAAO,CAAC,cAAc;IA2BtB,OAAO,CAAC,WAAW;IAgHnB,OAAO,CAAC,kBAAkB;IAoB1B,OAAO,CAAC,eAAe;IAuEvB,OAAO,CAAC,qBAAqB;IA8D7B,OAAO,CAAC,WAAW;IA8CnB,OAAO,CAAC,cAAc;IAqEtB,OAAO,CAAC,sBAAsB;IAoP9B,OAAO,CAAC,UAAU;IAwMlB,OAAO,CAAC,+BAA+B;IAqHvC,OAAO,CAAC,yBAAyB;IAyJjC,OAAO,CAAC,yBAAyB;IA+EjC,OAAO,CAAC,2BAA2B;IAqEnC,OAAO,CAAC,+BAA+B;IAmDvC,OAAO,CAAC,8BAA8B;IA8CtC,OAAO,CAAC,iCAAiC;IA6FzC,OAAO,CAAC,mBAAmB;IAqF3B,OAAO,CAAC,mBAAmB;IAqB3B,OAAO,CAAC,uBAAuB;IA6B/B,MAAM,CAAC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IA+BzE,MAAM,CAAC,SAAS,CACd,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,GACjB,OAAO;IAwBV,MAAM,CAAC,gBAAgB,CACrB,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,GACf,KAAK,CAAC;QACP,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;IAmFF,OAAO,CAAC,eAAe;IA4BjB,sBAAsB,CAC1B,UAAU,EAAE,UAAU,EACtB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC;QACT,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;QACH,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;KACJ,CAAC;YA4FY,WAAW;YA6DX,WAAW;CAmD1B"}
|
|
@@ -34,6 +34,7 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.DocumentParser = void 0;
|
|
37
|
+
const Bookmark_1 = require("../elements/Bookmark");
|
|
37
38
|
const Field_1 = require("../elements/Field");
|
|
38
39
|
const FieldHelpers_1 = require("../elements/FieldHelpers");
|
|
39
40
|
const Footer_1 = require("../elements/Footer");
|
|
@@ -65,6 +66,7 @@ const RelationshipManager_1 = require("./RelationshipManager");
|
|
|
65
66
|
class DocumentParser {
|
|
66
67
|
parseErrors = [];
|
|
67
68
|
strictParsing;
|
|
69
|
+
bookmarkManager = null;
|
|
68
70
|
constructor(strictParsing = false) {
|
|
69
71
|
this.strictParsing = strictParsing;
|
|
70
72
|
}
|
|
@@ -74,9 +76,10 @@ class DocumentParser {
|
|
|
74
76
|
clearParseErrors() {
|
|
75
77
|
this.parseErrors = [];
|
|
76
78
|
}
|
|
77
|
-
async parseDocument(zipHandler, relationshipManager, imageManager) {
|
|
79
|
+
async parseDocument(zipHandler, relationshipManager, imageManager, bookmarkManager) {
|
|
78
80
|
const logger = getLogger();
|
|
79
81
|
logger.info('Parsing document');
|
|
82
|
+
this.bookmarkManager = bookmarkManager || null;
|
|
80
83
|
const docXml = zipHandler.getFileAsString(types_1.DOCX_PATHS.DOCUMENT);
|
|
81
84
|
if (!docXml) {
|
|
82
85
|
logger.error('Invalid document: word/document.xml not found');
|
|
@@ -145,6 +148,34 @@ class DocumentParser {
|
|
|
145
148
|
candidates.sort((a, b) => a.pos - b.pos);
|
|
146
149
|
const next = candidates[0];
|
|
147
150
|
if (next) {
|
|
151
|
+
if (bodyElements.length > 0 && next.pos > pos) {
|
|
152
|
+
const bookmarkEnds = this.extractBodyLevelBookmarkEnds(bodyContent, pos, next.pos);
|
|
153
|
+
if (bookmarkEnds.length > 0) {
|
|
154
|
+
const prevElement = bodyElements[bodyElements.length - 1];
|
|
155
|
+
if (prevElement instanceof Paragraph_1.Paragraph) {
|
|
156
|
+
for (const bookmark of bookmarkEnds) {
|
|
157
|
+
prevElement.addBookmarkEnd(bookmark);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
else if (prevElement instanceof Table_1.Table) {
|
|
161
|
+
const rows = prevElement.getRows();
|
|
162
|
+
const lastRow = rows[rows.length - 1];
|
|
163
|
+
if (lastRow) {
|
|
164
|
+
const cells = lastRow.getCells();
|
|
165
|
+
const lastCell = cells[cells.length - 1];
|
|
166
|
+
if (lastCell) {
|
|
167
|
+
const cellParas = lastCell.getParagraphs();
|
|
168
|
+
const lastPara = cellParas[cellParas.length - 1];
|
|
169
|
+
if (lastPara) {
|
|
170
|
+
for (const bookmark of bookmarkEnds) {
|
|
171
|
+
lastPara.addBookmarkEnd(bookmark);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
148
179
|
if (next.type === "p") {
|
|
149
180
|
const elementXml = this.extractSingleElement(bodyContent, "w:p", next.pos);
|
|
150
181
|
if (elementXml) {
|
|
@@ -189,10 +220,81 @@ class DocumentParser {
|
|
|
189
220
|
}
|
|
190
221
|
}
|
|
191
222
|
}
|
|
223
|
+
if (bodyElements.length > 0 && pos < bodyContent.length) {
|
|
224
|
+
const trailingBookmarkEnds = this.extractBodyLevelBookmarkEnds(bodyContent, pos, -1);
|
|
225
|
+
if (trailingBookmarkEnds.length > 0) {
|
|
226
|
+
const lastElement = bodyElements[bodyElements.length - 1];
|
|
227
|
+
if (lastElement instanceof Paragraph_1.Paragraph) {
|
|
228
|
+
for (const bookmark of trailingBookmarkEnds) {
|
|
229
|
+
lastElement.addBookmarkEnd(bookmark);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
else if (lastElement instanceof Table_1.Table) {
|
|
233
|
+
const rows = lastElement.getRows();
|
|
234
|
+
const lastRow = rows[rows.length - 1];
|
|
235
|
+
if (lastRow) {
|
|
236
|
+
const cells = lastRow.getCells();
|
|
237
|
+
const lastCell = cells[cells.length - 1];
|
|
238
|
+
if (lastCell) {
|
|
239
|
+
const cellParas = lastCell.getParagraphs();
|
|
240
|
+
const lastPara = cellParas[cellParas.length - 1];
|
|
241
|
+
if (lastPara) {
|
|
242
|
+
for (const bookmark of trailingBookmarkEnds) {
|
|
243
|
+
lastPara.addBookmarkEnd(bookmark);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
192
251
|
this.assembleMultiParagraphFields(bodyElements);
|
|
193
252
|
this.validateLoadedContent(bodyElements);
|
|
194
253
|
return bodyElements;
|
|
195
254
|
}
|
|
255
|
+
extractBodyLevelBookmarkEnds(content, startPos, endPos) {
|
|
256
|
+
const bookmarks = [];
|
|
257
|
+
const searchContent = endPos === -1
|
|
258
|
+
? content.slice(startPos)
|
|
259
|
+
: content.slice(startPos, endPos);
|
|
260
|
+
const bookmarkEndRegex = /<w:bookmarkEnd[^>]*w:id="(\d+)"[^>]*\/?>/g;
|
|
261
|
+
let match;
|
|
262
|
+
while ((match = bookmarkEndRegex.exec(searchContent)) !== null) {
|
|
263
|
+
const idStr = match[1];
|
|
264
|
+
if (idStr) {
|
|
265
|
+
const id = parseInt(idStr, 10);
|
|
266
|
+
if (!isNaN(id)) {
|
|
267
|
+
const bookmark = new Bookmark_1.Bookmark({
|
|
268
|
+
name: `_end_${id}`,
|
|
269
|
+
id: id,
|
|
270
|
+
skipNormalization: true,
|
|
271
|
+
});
|
|
272
|
+
bookmarks.push(bookmark);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
return bookmarks;
|
|
277
|
+
}
|
|
278
|
+
extractBookmarkEndsFromContent(content) {
|
|
279
|
+
const bookmarks = [];
|
|
280
|
+
const bookmarkEndRegex = /<w:bookmarkEnd[^>]*w:id="(\d+)"[^>]*\/?>/g;
|
|
281
|
+
let match;
|
|
282
|
+
while ((match = bookmarkEndRegex.exec(content)) !== null) {
|
|
283
|
+
const idStr = match[1];
|
|
284
|
+
if (idStr) {
|
|
285
|
+
const id = parseInt(idStr, 10);
|
|
286
|
+
if (!isNaN(id)) {
|
|
287
|
+
const bookmark = new Bookmark_1.Bookmark({
|
|
288
|
+
name: `_end_${id}`,
|
|
289
|
+
id: id,
|
|
290
|
+
skipNormalization: true,
|
|
291
|
+
});
|
|
292
|
+
bookmarks.push(bookmark);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
return bookmarks;
|
|
297
|
+
}
|
|
196
298
|
findNextTag(content, tagName, startPos) {
|
|
197
299
|
const tag = `<${tagName}`;
|
|
198
300
|
let pos = content.indexOf(tag, startPos);
|
|
@@ -344,6 +446,8 @@ class DocumentParser {
|
|
|
344
446
|
let delIndex = 0;
|
|
345
447
|
let moveFromIndex = 0;
|
|
346
448
|
let moveToIndex = 0;
|
|
449
|
+
let bookmarkStartIndex = 0;
|
|
450
|
+
let bookmarkEndIndex = 0;
|
|
347
451
|
const findClosingTagEnd = (content, tagName, startPos) => {
|
|
348
452
|
const closingTag = `</${tagName}>`;
|
|
349
453
|
const closingPos = content.indexOf(closingTag, startPos);
|
|
@@ -417,6 +521,22 @@ class DocumentParser {
|
|
|
417
521
|
});
|
|
418
522
|
searchPos = selfClosing ? tagEnd + 1 : findClosingTagEnd(paraContent, "w:moveTo", tagEnd);
|
|
419
523
|
}
|
|
524
|
+
else if (tagName === "w:bookmarkStart") {
|
|
525
|
+
children.push({
|
|
526
|
+
type: "w:bookmarkStart",
|
|
527
|
+
pos: tagStart,
|
|
528
|
+
index: bookmarkStartIndex++,
|
|
529
|
+
});
|
|
530
|
+
searchPos = tagEnd + 1;
|
|
531
|
+
}
|
|
532
|
+
else if (tagName === "w:bookmarkEnd") {
|
|
533
|
+
children.push({
|
|
534
|
+
type: "w:bookmarkEnd",
|
|
535
|
+
pos: tagStart,
|
|
536
|
+
index: bookmarkEndIndex++,
|
|
537
|
+
});
|
|
538
|
+
searchPos = tagEnd + 1;
|
|
539
|
+
}
|
|
420
540
|
else {
|
|
421
541
|
searchPos = tagEnd + 1;
|
|
422
542
|
}
|
|
@@ -425,6 +545,8 @@ class DocumentParser {
|
|
|
425
545
|
const delXmls = XMLParser_1.XMLParser.extractElements(paraContent, "w:del");
|
|
426
546
|
const moveFromXmls = XMLParser_1.XMLParser.extractElements(paraContent, "w:moveFrom");
|
|
427
547
|
const moveToXmls = XMLParser_1.XMLParser.extractElements(paraContent, "w:moveTo");
|
|
548
|
+
const bookmarkStartXmls = XMLParser_1.XMLParser.extractElements(paraContent, "w:bookmarkStart");
|
|
549
|
+
const bookmarkEndXmls = XMLParser_1.XMLParser.extractElements(paraContent, "w:bookmarkEnd");
|
|
428
550
|
const extractRunXmlAtPosition = (pos) => {
|
|
429
551
|
const closeTag = "</w:r>";
|
|
430
552
|
let depth = 1;
|
|
@@ -492,9 +614,15 @@ class DocumentParser {
|
|
|
492
614
|
? [hyperlinks]
|
|
493
615
|
: [];
|
|
494
616
|
if (child.index < hyperlinkArray.length) {
|
|
495
|
-
const
|
|
496
|
-
if (hyperlink) {
|
|
497
|
-
paragraph.addHyperlink(hyperlink);
|
|
617
|
+
const result = this.parseHyperlinkFromObject(hyperlinkArray[child.index], relationshipManager);
|
|
618
|
+
if (result.hyperlink) {
|
|
619
|
+
paragraph.addHyperlink(result.hyperlink);
|
|
620
|
+
}
|
|
621
|
+
for (const bookmark of result.bookmarkStarts) {
|
|
622
|
+
paragraph.addBookmarkStart(bookmark);
|
|
623
|
+
}
|
|
624
|
+
for (const bookmark of result.bookmarkEnds) {
|
|
625
|
+
paragraph.addBookmarkEnd(bookmark);
|
|
498
626
|
}
|
|
499
627
|
}
|
|
500
628
|
}
|
|
@@ -556,6 +684,28 @@ class DocumentParser {
|
|
|
556
684
|
}
|
|
557
685
|
}
|
|
558
686
|
}
|
|
687
|
+
else if (child.type === "w:bookmarkStart") {
|
|
688
|
+
if (child.index < bookmarkStartXmls.length) {
|
|
689
|
+
const bookmarkXml = bookmarkStartXmls[child.index];
|
|
690
|
+
if (bookmarkXml) {
|
|
691
|
+
const bookmark = this.parseBookmarkStart(bookmarkXml);
|
|
692
|
+
if (bookmark) {
|
|
693
|
+
paragraph.addBookmarkStart(bookmark);
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
else if (child.type === "w:bookmarkEnd") {
|
|
699
|
+
if (child.index < bookmarkEndXmls.length) {
|
|
700
|
+
const bookmarkXml = bookmarkEndXmls[child.index];
|
|
701
|
+
if (bookmarkXml) {
|
|
702
|
+
const bookmark = this.parseBookmarkEnd(bookmarkXml);
|
|
703
|
+
if (bookmark) {
|
|
704
|
+
paragraph.addBookmarkEnd(bookmark);
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
}
|
|
559
709
|
}
|
|
560
710
|
}
|
|
561
711
|
async parseRevisionFromXml(revisionXml, tagName, relationshipManager, zipHandler, imageManager) {
|
|
@@ -609,9 +759,9 @@ class DocumentParser {
|
|
|
609
759
|
}
|
|
610
760
|
for (const hyperlinkXml of hyperlinkXmls) {
|
|
611
761
|
const hyperlinkObj = XMLParser_1.XMLParser.parseToObject(hyperlinkXml, { trimValues: false });
|
|
612
|
-
const
|
|
613
|
-
if (hyperlink) {
|
|
614
|
-
content.push(hyperlink);
|
|
762
|
+
const result = this.parseHyperlinkFromObject(hyperlinkObj["w:hyperlink"], relationshipManager);
|
|
763
|
+
if (result.hyperlink) {
|
|
764
|
+
content.push(result.hyperlink);
|
|
615
765
|
}
|
|
616
766
|
}
|
|
617
767
|
if (content.length === 0) {
|
|
@@ -634,6 +784,57 @@ class DocumentParser {
|
|
|
634
784
|
return null;
|
|
635
785
|
}
|
|
636
786
|
}
|
|
787
|
+
parseBookmarkStart(bookmarkXml) {
|
|
788
|
+
try {
|
|
789
|
+
const idAttr = XMLParser_1.XMLParser.extractAttribute(bookmarkXml, "w:id");
|
|
790
|
+
const nameAttr = XMLParser_1.XMLParser.extractAttribute(bookmarkXml, "w:name");
|
|
791
|
+
if (!idAttr || !nameAttr) {
|
|
792
|
+
return null;
|
|
793
|
+
}
|
|
794
|
+
const id = parseInt(idAttr, 10);
|
|
795
|
+
const bookmark = new Bookmark_1.Bookmark({
|
|
796
|
+
name: nameAttr,
|
|
797
|
+
id: id,
|
|
798
|
+
skipNormalization: true,
|
|
799
|
+
});
|
|
800
|
+
if (this.bookmarkManager) {
|
|
801
|
+
try {
|
|
802
|
+
this.bookmarkManager.registerExisting(bookmark);
|
|
803
|
+
}
|
|
804
|
+
catch (e) {
|
|
805
|
+
logger_1.defaultLogger.debug("[DocumentParser] Bookmark already registered:", { name: nameAttr, id: id });
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
return bookmark;
|
|
809
|
+
}
|
|
810
|
+
catch (error) {
|
|
811
|
+
logger_1.defaultLogger.warn("[DocumentParser] Failed to parse bookmark start:", error instanceof Error
|
|
812
|
+
? { message: error.message }
|
|
813
|
+
: { error: String(error) });
|
|
814
|
+
return null;
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
parseBookmarkEnd(bookmarkXml) {
|
|
818
|
+
try {
|
|
819
|
+
const idAttr = XMLParser_1.XMLParser.extractAttribute(bookmarkXml, "w:id");
|
|
820
|
+
if (!idAttr) {
|
|
821
|
+
return null;
|
|
822
|
+
}
|
|
823
|
+
const id = parseInt(idAttr, 10);
|
|
824
|
+
const bookmark = new Bookmark_1.Bookmark({
|
|
825
|
+
name: `_end_${id}`,
|
|
826
|
+
id: id,
|
|
827
|
+
skipNormalization: true,
|
|
828
|
+
});
|
|
829
|
+
return bookmark;
|
|
830
|
+
}
|
|
831
|
+
catch (error) {
|
|
832
|
+
logger_1.defaultLogger.warn("[DocumentParser] Failed to parse bookmark end:", error instanceof Error
|
|
833
|
+
? { message: error.message }
|
|
834
|
+
: { error: String(error) });
|
|
835
|
+
return null;
|
|
836
|
+
}
|
|
837
|
+
}
|
|
637
838
|
async parseParagraphFromObject(paraObj, relationshipManager, zipHandler, imageManager) {
|
|
638
839
|
try {
|
|
639
840
|
const paragraph = new Paragraph_1.Paragraph();
|
|
@@ -676,9 +877,15 @@ class DocumentParser {
|
|
|
676
877
|
? [hyperlinks]
|
|
677
878
|
: [];
|
|
678
879
|
if (elementIndex < hyperlinkArray.length) {
|
|
679
|
-
const
|
|
680
|
-
if (hyperlink) {
|
|
681
|
-
paragraph.addHyperlink(hyperlink);
|
|
880
|
+
const result = this.parseHyperlinkFromObject(hyperlinkArray[elementIndex], relationshipManager);
|
|
881
|
+
if (result.hyperlink) {
|
|
882
|
+
paragraph.addHyperlink(result.hyperlink);
|
|
883
|
+
}
|
|
884
|
+
for (const bookmark of result.bookmarkStarts) {
|
|
885
|
+
paragraph.addBookmarkStart(bookmark);
|
|
886
|
+
}
|
|
887
|
+
for (const bookmark of result.bookmarkEnds) {
|
|
888
|
+
paragraph.addBookmarkEnd(bookmark);
|
|
682
889
|
}
|
|
683
890
|
}
|
|
684
891
|
}
|
|
@@ -724,9 +931,15 @@ class DocumentParser {
|
|
|
724
931
|
? [hyperlinks]
|
|
725
932
|
: [];
|
|
726
933
|
for (const hyperlinkObj of hyperlinkChildren) {
|
|
727
|
-
const
|
|
728
|
-
if (hyperlink) {
|
|
729
|
-
paragraph.addHyperlink(hyperlink);
|
|
934
|
+
const result = this.parseHyperlinkFromObject(hyperlinkObj, relationshipManager);
|
|
935
|
+
if (result.hyperlink) {
|
|
936
|
+
paragraph.addHyperlink(result.hyperlink);
|
|
937
|
+
}
|
|
938
|
+
for (const bookmark of result.bookmarkStarts) {
|
|
939
|
+
paragraph.addBookmarkStart(bookmark);
|
|
940
|
+
}
|
|
941
|
+
for (const bookmark of result.bookmarkEnds) {
|
|
942
|
+
paragraph.addBookmarkEnd(bookmark);
|
|
730
943
|
}
|
|
731
944
|
}
|
|
732
945
|
const fields = paraObj["w:fldSimple"];
|
|
@@ -1731,7 +1944,48 @@ class DocumentParser {
|
|
|
1731
1944
|
}
|
|
1732
1945
|
}
|
|
1733
1946
|
parseHyperlinkFromObject(hyperlinkObj, relationshipManager) {
|
|
1947
|
+
const result = { hyperlink: null, bookmarkStarts: [], bookmarkEnds: [] };
|
|
1734
1948
|
try {
|
|
1949
|
+
if (hyperlinkObj["w:bookmarkStart"]) {
|
|
1950
|
+
const bookmarkStarts = Array.isArray(hyperlinkObj["w:bookmarkStart"])
|
|
1951
|
+
? hyperlinkObj["w:bookmarkStart"]
|
|
1952
|
+
: [hyperlinkObj["w:bookmarkStart"]];
|
|
1953
|
+
for (const bs of bookmarkStarts) {
|
|
1954
|
+
const id = bs["@_w:id"];
|
|
1955
|
+
const name = bs["@_w:name"];
|
|
1956
|
+
if (id !== undefined && name) {
|
|
1957
|
+
const bookmark = new Bookmark_1.Bookmark({
|
|
1958
|
+
name: name,
|
|
1959
|
+
id: typeof id === "number" ? id : parseInt(id, 10),
|
|
1960
|
+
skipNormalization: true,
|
|
1961
|
+
});
|
|
1962
|
+
result.bookmarkStarts.push(bookmark);
|
|
1963
|
+
if (this.bookmarkManager) {
|
|
1964
|
+
try {
|
|
1965
|
+
this.bookmarkManager.registerExisting(bookmark);
|
|
1966
|
+
}
|
|
1967
|
+
catch {
|
|
1968
|
+
}
|
|
1969
|
+
}
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
}
|
|
1973
|
+
if (hyperlinkObj["w:bookmarkEnd"]) {
|
|
1974
|
+
const bookmarkEnds = Array.isArray(hyperlinkObj["w:bookmarkEnd"])
|
|
1975
|
+
? hyperlinkObj["w:bookmarkEnd"]
|
|
1976
|
+
: [hyperlinkObj["w:bookmarkEnd"]];
|
|
1977
|
+
for (const be of bookmarkEnds) {
|
|
1978
|
+
const id = be["@_w:id"];
|
|
1979
|
+
if (id !== undefined) {
|
|
1980
|
+
const bookmark = new Bookmark_1.Bookmark({
|
|
1981
|
+
name: `_end_${id}`,
|
|
1982
|
+
id: typeof id === "number" ? id : parseInt(id, 10),
|
|
1983
|
+
skipNormalization: true,
|
|
1984
|
+
});
|
|
1985
|
+
result.bookmarkEnds.push(bookmark);
|
|
1986
|
+
}
|
|
1987
|
+
}
|
|
1988
|
+
}
|
|
1735
1989
|
const relationshipId = hyperlinkObj["@_r:id"];
|
|
1736
1990
|
const anchor = hyperlinkObj["@_w:anchor"];
|
|
1737
1991
|
const tooltip = hyperlinkObj["@_w:tooltip"];
|
|
@@ -1793,13 +2047,14 @@ class DocumentParser {
|
|
|
1793
2047
|
if (parsedRun && parsedRun.getContent().length > 1) {
|
|
1794
2048
|
hyperlink.setRun(parsedRun);
|
|
1795
2049
|
}
|
|
1796
|
-
|
|
2050
|
+
result.hyperlink = hyperlink;
|
|
2051
|
+
return result;
|
|
1797
2052
|
}
|
|
1798
2053
|
catch (error) {
|
|
1799
2054
|
logger_1.defaultLogger.warn("[DocumentParser] Failed to parse hyperlink:", error instanceof Error
|
|
1800
2055
|
? { message: error.message, stack: error.stack }
|
|
1801
2056
|
: { error: String(error) });
|
|
1802
|
-
return
|
|
2057
|
+
return result;
|
|
1803
2058
|
}
|
|
1804
2059
|
}
|
|
1805
2060
|
mergeConsecutiveHyperlinks(paragraph, resetFormatting = false) {
|
|
@@ -2373,12 +2628,49 @@ class DocumentParser {
|
|
|
2373
2628
|
}
|
|
2374
2629
|
rowXmls = XMLParser_1.XMLParser.extractElements(rawTableXml, "w:tr");
|
|
2375
2630
|
}
|
|
2631
|
+
const rowPositions = [];
|
|
2632
|
+
if (rawTableXml) {
|
|
2633
|
+
let searchPos = 0;
|
|
2634
|
+
for (const rowXml of rowXmls) {
|
|
2635
|
+
const rowStart = rawTableXml.indexOf(rowXml, searchPos);
|
|
2636
|
+
if (rowStart !== -1) {
|
|
2637
|
+
rowPositions.push({
|
|
2638
|
+
start: rowStart,
|
|
2639
|
+
end: rowStart + rowXml.length,
|
|
2640
|
+
});
|
|
2641
|
+
searchPos = rowStart + rowXml.length;
|
|
2642
|
+
}
|
|
2643
|
+
}
|
|
2644
|
+
}
|
|
2376
2645
|
for (let i = 0; i < rowChildren.length; i++) {
|
|
2377
2646
|
const rowObj = rowChildren[i];
|
|
2378
2647
|
const rawRowXml = i < rowXmls.length ? rowXmls[i] : undefined;
|
|
2379
2648
|
const row = await this.parseTableRowFromObject(rowObj, relationshipManager, zipHandler, imageManager, rawRowXml);
|
|
2380
2649
|
if (row) {
|
|
2381
2650
|
table.addRow(row);
|
|
2651
|
+
if (rawTableXml && i < rowPositions.length) {
|
|
2652
|
+
const currentRowEnd = rowPositions[i]?.end || 0;
|
|
2653
|
+
const nextRowStart = i + 1 < rowPositions.length
|
|
2654
|
+
? rowPositions[i + 1]?.start
|
|
2655
|
+
: rawTableXml.length;
|
|
2656
|
+
if (nextRowStart && currentRowEnd < nextRowStart) {
|
|
2657
|
+
const betweenContent = rawTableXml.slice(currentRowEnd, nextRowStart);
|
|
2658
|
+
const bookmarkEnds = this.extractBookmarkEndsFromContent(betweenContent);
|
|
2659
|
+
if (bookmarkEnds.length > 0) {
|
|
2660
|
+
const cells = row.getCells();
|
|
2661
|
+
const lastCell = cells[cells.length - 1];
|
|
2662
|
+
if (lastCell) {
|
|
2663
|
+
const cellParas = lastCell.getParagraphs();
|
|
2664
|
+
const lastPara = cellParas[cellParas.length - 1];
|
|
2665
|
+
if (lastPara) {
|
|
2666
|
+
for (const bookmark of bookmarkEnds) {
|
|
2667
|
+
lastPara.addBookmarkEnd(bookmark);
|
|
2668
|
+
}
|
|
2669
|
+
}
|
|
2670
|
+
}
|
|
2671
|
+
}
|
|
2672
|
+
}
|
|
2673
|
+
}
|
|
2382
2674
|
}
|
|
2383
2675
|
}
|
|
2384
2676
|
return table;
|
|
@@ -2728,17 +3020,27 @@ class DocumentParser {
|
|
|
2728
3020
|
if (rawCellXml) {
|
|
2729
3021
|
const cellContent = this.extractCellContentInOrder(rawCellXml);
|
|
2730
3022
|
let paragraphIndex = 0;
|
|
3023
|
+
let lastParagraph = null;
|
|
2731
3024
|
for (const item of cellContent) {
|
|
2732
3025
|
if (item.type === "paragraph") {
|
|
2733
3026
|
const paragraph = await this.parseParagraphWithOrder(item.xml, relationshipManager, zipHandler, imageManager);
|
|
2734
3027
|
if (paragraph) {
|
|
2735
3028
|
cell.addParagraph(paragraph);
|
|
3029
|
+
lastParagraph = paragraph;
|
|
2736
3030
|
paragraphIndex++;
|
|
2737
3031
|
}
|
|
2738
3032
|
}
|
|
2739
3033
|
else if (item.type === "table" || item.type === "sdt") {
|
|
2740
3034
|
cell.addRawNestedContent(paragraphIndex, item.xml, item.type);
|
|
2741
3035
|
}
|
|
3036
|
+
else if (item.type === "bookmarkEnd") {
|
|
3037
|
+
if (lastParagraph) {
|
|
3038
|
+
const bookmarkEnds = this.extractBookmarkEndsFromContent(item.xml);
|
|
3039
|
+
for (const bookmark of bookmarkEnds) {
|
|
3040
|
+
lastParagraph.addBookmarkEnd(bookmark);
|
|
3041
|
+
}
|
|
3042
|
+
}
|
|
3043
|
+
}
|
|
2742
3044
|
}
|
|
2743
3045
|
}
|
|
2744
3046
|
else {
|
|
@@ -2783,6 +3085,7 @@ class DocumentParser {
|
|
|
2783
3085
|
const pStart = content.indexOf("<w:p", pos);
|
|
2784
3086
|
const tblStart = content.indexOf("<w:tbl", pos);
|
|
2785
3087
|
const sdtStart = content.indexOf("<w:sdt", pos);
|
|
3088
|
+
const bookmarkEndStart = content.indexOf("<w:bookmarkEnd", pos);
|
|
2786
3089
|
let nextStart = -1;
|
|
2787
3090
|
let nextType = null;
|
|
2788
3091
|
let nextTag = "";
|
|
@@ -2807,16 +3110,34 @@ class DocumentParser {
|
|
|
2807
3110
|
nextType = "sdt";
|
|
2808
3111
|
nextTag = "w:sdt";
|
|
2809
3112
|
}
|
|
3113
|
+
if (bookmarkEndStart !== -1 &&
|
|
3114
|
+
(nextStart === -1 || bookmarkEndStart < nextStart)) {
|
|
3115
|
+
nextStart = bookmarkEndStart;
|
|
3116
|
+
nextType = "bookmarkEnd";
|
|
3117
|
+
nextTag = "w:bookmarkEnd";
|
|
3118
|
+
}
|
|
2810
3119
|
if (nextStart === -1 || nextType === null)
|
|
2811
3120
|
break;
|
|
2812
|
-
|
|
2813
|
-
|
|
2814
|
-
|
|
2815
|
-
|
|
3121
|
+
if (nextType === "bookmarkEnd") {
|
|
3122
|
+
const elementEnd = content.indexOf(">", nextStart) + 1;
|
|
3123
|
+
if (elementEnd === 0) {
|
|
3124
|
+
pos = nextStart + 1;
|
|
3125
|
+
continue;
|
|
3126
|
+
}
|
|
3127
|
+
const elementXml = content.substring(nextStart, elementEnd);
|
|
3128
|
+
result.push({ type: nextType, xml: elementXml });
|
|
3129
|
+
pos = elementEnd;
|
|
3130
|
+
}
|
|
3131
|
+
else {
|
|
3132
|
+
const elementEnd = this.findClosingTag(content, nextTag, nextStart);
|
|
3133
|
+
if (elementEnd === -1) {
|
|
3134
|
+
pos = nextStart + 1;
|
|
3135
|
+
continue;
|
|
3136
|
+
}
|
|
3137
|
+
const elementXml = content.substring(nextStart, elementEnd);
|
|
3138
|
+
result.push({ type: nextType, xml: elementXml });
|
|
3139
|
+
pos = elementEnd;
|
|
2816
3140
|
}
|
|
2817
|
-
const elementXml = content.substring(nextStart, elementEnd);
|
|
2818
|
-
result.push({ type: nextType, xml: elementXml });
|
|
2819
|
-
pos = elementEnd;
|
|
2820
3141
|
}
|
|
2821
3142
|
return result;
|
|
2822
3143
|
}
|