docxmlater 7.7.8 → 7.7.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/Document.d.ts.map +1 -1
- package/dist/core/Document.js +1 -1
- package/dist/core/Document.js.map +1 -1
- package/dist/core/DocumentParser.d.ts +5 -1
- package/dist/core/DocumentParser.d.ts.map +1 -1
- package/dist/core/DocumentParser.js +257 -22
- package/dist/core/DocumentParser.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { BookmarkManager } from "../elements/BookmarkManager";
|
|
1
2
|
import { ImageManager } from "../elements/ImageManager";
|
|
2
3
|
import { Paragraph } from "../elements/Paragraph";
|
|
3
4
|
import { Section } from "../elements/Section";
|
|
@@ -18,10 +19,11 @@ type BodyElement = Paragraph | Table | TableOfContentsElement | StructuredDocume
|
|
|
18
19
|
export declare class DocumentParser {
|
|
19
20
|
private parseErrors;
|
|
20
21
|
private strictParsing;
|
|
22
|
+
private bookmarkManager;
|
|
21
23
|
constructor(strictParsing?: boolean);
|
|
22
24
|
getParseErrors(): ParseError[];
|
|
23
25
|
clearParseErrors(): void;
|
|
24
|
-
parseDocument(zipHandler: ZipHandler, relationshipManager: RelationshipManager, imageManager: ImageManager): Promise<{
|
|
26
|
+
parseDocument(zipHandler: ZipHandler, relationshipManager: RelationshipManager, imageManager: ImageManager, bookmarkManager?: BookmarkManager): Promise<{
|
|
25
27
|
bodyElements: BodyElement[];
|
|
26
28
|
properties: DocumentProperties;
|
|
27
29
|
relationshipManager: RelationshipManager;
|
|
@@ -32,6 +34,8 @@ export declare class DocumentParser {
|
|
|
32
34
|
namespaces: Record<string, string>;
|
|
33
35
|
}>;
|
|
34
36
|
private parseBodyElements;
|
|
37
|
+
private extractBodyLevelBookmarkEnds;
|
|
38
|
+
private extractBookmarkEndsFromContent;
|
|
35
39
|
private findNextTag;
|
|
36
40
|
private findNextTopLevelTag;
|
|
37
41
|
private isPositionInsideTable;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"DocumentParser.d.ts","sourceRoot":"","sources":["../../src/core/DocumentParser.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"DocumentParser.d.ts","sourceRoot":"","sources":["../../src/core/DocumentParser.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAM9D,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,SAAS,EAAyC,MAAM,uBAAuB,CAAC;AAGzF,OAAO,EAEL,OAAO,EAGR,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mCAAmC,CAAC;AAC1E,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAG1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAE5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,KAAK,EAA8B,MAAM,qBAAqB,CAAC;AAcxE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAK5D,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;CACd;AAKD,KAAK,WAAW,GACZ,SAAS,GACT,KAAK,GACL,sBAAsB,GACtB,qBAAqB,CAAC;AAK1B,qBAAa,cAAc;IACzB,OAAO,CAAC,WAAW,CAAoB;IACvC,OAAO,CAAC,aAAa,CAAU;IAC/B,OAAO,CAAC,eAAe,CAAgC;gBAE3C,aAAa,GAAE,OAAe;IAO1C,cAAc,IAAI,UAAU,EAAE;IAO9B,gBAAgB,IAAI,IAAI;IAWlB,aAAa,CACjB,UAAU,EAAE,UAAU,EACtB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,EAC1B,eAAe,CAAC,EAAE,eAAe,GAChC,OAAO,CAAC;QACT,YAAY,EAAE,WAAW,EAAE,CAAC;QAC5B,UAAU,EAAE,kBAAkB,CAAC;QAC/B,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,MAAM,EAAE,KAAK,EAAE,CAAC;QAChB,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;QACxB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KACpC,CAAC;YAyFY,iBAAiB;IA6L/B,OAAO,CAAC,4BAA4B;IAsCpC,OAAO,CAAC,8BAA8B;IA2BtC,OAAO,CAAC,WAAW;IAmCnB,OAAO,CAAC,mBAAmB;IAkC3B,OAAO,CAAC,qBAAqB;IAoB7B,OAAO,CAAC,oBAAoB;IAuB5B,OAAO,CAAC,qBAAqB;YAwEf,uBAAuB;YA4FvB,6BAA6B;YAgW7B,oBAAoB;IAyHlC,OAAO,CAAC,kBAAkB;IAmD1B,OAAO,CAAC,gBAAgB;YA8BV,wBAAwB;IAoLtC,OAAO,CAAC,kCAAkC;IA6Y1C,OAAO,CAAC,qBAAqB;IAsP7B,OAAO,CAAC,4BAA4B;IAiHpC,OAAO,CAAC,oBAAoB;IAiC5B,OAAO,CAAC,0BAA0B;IA8IlC,OAAO,CAAC,wCAAwC;IA0EhD,OAAO,CAAC,0BAA0B;IAmHlC,OAAO,CAAC,kBAAkB;IAsN1B,OAAO,CAAC,wBAAwB;IAwLhC,OAAO,CAAC,0BAA0B;IAiIlC,OAAO,CAAC,8BAA8B;IActC,OAAO,CAAC,0BAA0B;IAwClC,OAAO,CAAC,4BAA4B;YAwLtB,sBAAsB;IAqNpC,OAAO,CAAC,iBAAiB;IA4CzB,OAAO,CAAC,kBAAkB;IA6D1B,OAAO,CAAC,cAAc;IAmBtB,OAAO,CAAC,iBAAiB;YAuBX,oBAAoB;IA4HlC,OAAO,CAAC,8BAA8B;YAuFxB,uBAAuB;IAyErC,OAAO,CAAC,iCAAiC;IAyDzC,OAAO,CAAC,sCAAsC;IA4D9C,OAAO,CAAC,2BAA2B;YAgCrB,wBAAwB;IAkOtC,OAAO,CAAC,yBAAyB;IA0GjC,OAAO,CAAC,UAAU;IAmBlB,OAAO,CAAC,cAAc;YAsCR,kBAAkB;IAqPhC,OAAO,CAAC,6BAA6B;IAwCrC,OAAO,CAAC,4BAA4B;IA6JpC,OAAO,CAAC,4BAA4B;IA6FpC,OAAO,CAAC,sBAAsB;IAmI9B,OAAO,CAAC,cAAc;IA2BtB,OAAO,CAAC,WAAW;IAgHnB,OAAO,CAAC,kBAAkB;IAoB1B,OAAO,CAAC,eAAe;IAuEvB,OAAO,CAAC,qBAAqB;IA8D7B,OAAO,CAAC,WAAW;IA8CnB,OAAO,CAAC,cAAc;IAqEtB,OAAO,CAAC,sBAAsB;IAoP9B,OAAO,CAAC,UAAU;IAwMlB,OAAO,CAAC,+BAA+B;IAqHvC,OAAO,CAAC,yBAAyB;IAyJjC,OAAO,CAAC,yBAAyB;IA+EjC,OAAO,CAAC,2BAA2B;IAqEnC,OAAO,CAAC,+BAA+B;IAmDvC,OAAO,CAAC,8BAA8B;IA8CtC,OAAO,CAAC,iCAAiC;IA6FzC,OAAO,CAAC,mBAAmB;IAqF3B,OAAO,CAAC,mBAAmB;IAqB3B,OAAO,CAAC,uBAAuB;IA6B/B,MAAM,CAAC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IA+BzE,MAAM,CAAC,SAAS,CACd,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,GACjB,OAAO;IAwBV,MAAM,CAAC,gBAAgB,CACrB,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,GACf,KAAK,CAAC;QACP,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;IAmFF,OAAO,CAAC,eAAe;IA4BjB,sBAAsB,CAC1B,UAAU,EAAE,UAAU,EACtB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC;QACT,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;QACH,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;KACJ,CAAC;YA4FY,WAAW;YA6DX,WAAW;CAmD1B"}
|
|
@@ -66,6 +66,7 @@ const RelationshipManager_1 = require("./RelationshipManager");
|
|
|
66
66
|
class DocumentParser {
|
|
67
67
|
parseErrors = [];
|
|
68
68
|
strictParsing;
|
|
69
|
+
bookmarkManager = null;
|
|
69
70
|
constructor(strictParsing = false) {
|
|
70
71
|
this.strictParsing = strictParsing;
|
|
71
72
|
}
|
|
@@ -75,9 +76,10 @@ class DocumentParser {
|
|
|
75
76
|
clearParseErrors() {
|
|
76
77
|
this.parseErrors = [];
|
|
77
78
|
}
|
|
78
|
-
async parseDocument(zipHandler, relationshipManager, imageManager) {
|
|
79
|
+
async parseDocument(zipHandler, relationshipManager, imageManager, bookmarkManager) {
|
|
79
80
|
const logger = getLogger();
|
|
80
81
|
logger.info('Parsing document');
|
|
82
|
+
this.bookmarkManager = bookmarkManager || null;
|
|
81
83
|
const docXml = zipHandler.getFileAsString(types_1.DOCX_PATHS.DOCUMENT);
|
|
82
84
|
if (!docXml) {
|
|
83
85
|
logger.error('Invalid document: word/document.xml not found');
|
|
@@ -146,6 +148,34 @@ class DocumentParser {
|
|
|
146
148
|
candidates.sort((a, b) => a.pos - b.pos);
|
|
147
149
|
const next = candidates[0];
|
|
148
150
|
if (next) {
|
|
151
|
+
if (bodyElements.length > 0 && next.pos > pos) {
|
|
152
|
+
const bookmarkEnds = this.extractBodyLevelBookmarkEnds(bodyContent, pos, next.pos);
|
|
153
|
+
if (bookmarkEnds.length > 0) {
|
|
154
|
+
const prevElement = bodyElements[bodyElements.length - 1];
|
|
155
|
+
if (prevElement instanceof Paragraph_1.Paragraph) {
|
|
156
|
+
for (const bookmark of bookmarkEnds) {
|
|
157
|
+
prevElement.addBookmarkEnd(bookmark);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
else if (prevElement instanceof Table_1.Table) {
|
|
161
|
+
const rows = prevElement.getRows();
|
|
162
|
+
const lastRow = rows[rows.length - 1];
|
|
163
|
+
if (lastRow) {
|
|
164
|
+
const cells = lastRow.getCells();
|
|
165
|
+
const lastCell = cells[cells.length - 1];
|
|
166
|
+
if (lastCell) {
|
|
167
|
+
const cellParas = lastCell.getParagraphs();
|
|
168
|
+
const lastPara = cellParas[cellParas.length - 1];
|
|
169
|
+
if (lastPara) {
|
|
170
|
+
for (const bookmark of bookmarkEnds) {
|
|
171
|
+
lastPara.addBookmarkEnd(bookmark);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
149
179
|
if (next.type === "p") {
|
|
150
180
|
const elementXml = this.extractSingleElement(bodyContent, "w:p", next.pos);
|
|
151
181
|
if (elementXml) {
|
|
@@ -190,10 +220,81 @@ class DocumentParser {
|
|
|
190
220
|
}
|
|
191
221
|
}
|
|
192
222
|
}
|
|
223
|
+
if (bodyElements.length > 0 && pos < bodyContent.length) {
|
|
224
|
+
const trailingBookmarkEnds = this.extractBodyLevelBookmarkEnds(bodyContent, pos, -1);
|
|
225
|
+
if (trailingBookmarkEnds.length > 0) {
|
|
226
|
+
const lastElement = bodyElements[bodyElements.length - 1];
|
|
227
|
+
if (lastElement instanceof Paragraph_1.Paragraph) {
|
|
228
|
+
for (const bookmark of trailingBookmarkEnds) {
|
|
229
|
+
lastElement.addBookmarkEnd(bookmark);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
else if (lastElement instanceof Table_1.Table) {
|
|
233
|
+
const rows = lastElement.getRows();
|
|
234
|
+
const lastRow = rows[rows.length - 1];
|
|
235
|
+
if (lastRow) {
|
|
236
|
+
const cells = lastRow.getCells();
|
|
237
|
+
const lastCell = cells[cells.length - 1];
|
|
238
|
+
if (lastCell) {
|
|
239
|
+
const cellParas = lastCell.getParagraphs();
|
|
240
|
+
const lastPara = cellParas[cellParas.length - 1];
|
|
241
|
+
if (lastPara) {
|
|
242
|
+
for (const bookmark of trailingBookmarkEnds) {
|
|
243
|
+
lastPara.addBookmarkEnd(bookmark);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
193
251
|
this.assembleMultiParagraphFields(bodyElements);
|
|
194
252
|
this.validateLoadedContent(bodyElements);
|
|
195
253
|
return bodyElements;
|
|
196
254
|
}
|
|
255
|
+
extractBodyLevelBookmarkEnds(content, startPos, endPos) {
|
|
256
|
+
const bookmarks = [];
|
|
257
|
+
const searchContent = endPos === -1
|
|
258
|
+
? content.slice(startPos)
|
|
259
|
+
: content.slice(startPos, endPos);
|
|
260
|
+
const bookmarkEndRegex = /<w:bookmarkEnd[^>]*w:id="(\d+)"[^>]*\/?>/g;
|
|
261
|
+
let match;
|
|
262
|
+
while ((match = bookmarkEndRegex.exec(searchContent)) !== null) {
|
|
263
|
+
const idStr = match[1];
|
|
264
|
+
if (idStr) {
|
|
265
|
+
const id = parseInt(idStr, 10);
|
|
266
|
+
if (!isNaN(id)) {
|
|
267
|
+
const bookmark = new Bookmark_1.Bookmark({
|
|
268
|
+
name: `_end_${id}`,
|
|
269
|
+
id: id,
|
|
270
|
+
skipNormalization: true,
|
|
271
|
+
});
|
|
272
|
+
bookmarks.push(bookmark);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
return bookmarks;
|
|
277
|
+
}
|
|
278
|
+
extractBookmarkEndsFromContent(content) {
|
|
279
|
+
const bookmarks = [];
|
|
280
|
+
const bookmarkEndRegex = /<w:bookmarkEnd[^>]*w:id="(\d+)"[^>]*\/?>/g;
|
|
281
|
+
let match;
|
|
282
|
+
while ((match = bookmarkEndRegex.exec(content)) !== null) {
|
|
283
|
+
const idStr = match[1];
|
|
284
|
+
if (idStr) {
|
|
285
|
+
const id = parseInt(idStr, 10);
|
|
286
|
+
if (!isNaN(id)) {
|
|
287
|
+
const bookmark = new Bookmark_1.Bookmark({
|
|
288
|
+
name: `_end_${id}`,
|
|
289
|
+
id: id,
|
|
290
|
+
skipNormalization: true,
|
|
291
|
+
});
|
|
292
|
+
bookmarks.push(bookmark);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
return bookmarks;
|
|
297
|
+
}
|
|
197
298
|
findNextTag(content, tagName, startPos) {
|
|
198
299
|
const tag = `<${tagName}`;
|
|
199
300
|
let pos = content.indexOf(tag, startPos);
|
|
@@ -513,9 +614,15 @@ class DocumentParser {
|
|
|
513
614
|
? [hyperlinks]
|
|
514
615
|
: [];
|
|
515
616
|
if (child.index < hyperlinkArray.length) {
|
|
516
|
-
const
|
|
517
|
-
if (hyperlink) {
|
|
518
|
-
paragraph.addHyperlink(hyperlink);
|
|
617
|
+
const result = this.parseHyperlinkFromObject(hyperlinkArray[child.index], relationshipManager);
|
|
618
|
+
if (result.hyperlink) {
|
|
619
|
+
paragraph.addHyperlink(result.hyperlink);
|
|
620
|
+
}
|
|
621
|
+
for (const bookmark of result.bookmarkStarts) {
|
|
622
|
+
paragraph.addBookmarkStart(bookmark);
|
|
623
|
+
}
|
|
624
|
+
for (const bookmark of result.bookmarkEnds) {
|
|
625
|
+
paragraph.addBookmarkEnd(bookmark);
|
|
519
626
|
}
|
|
520
627
|
}
|
|
521
628
|
}
|
|
@@ -652,9 +759,9 @@ class DocumentParser {
|
|
|
652
759
|
}
|
|
653
760
|
for (const hyperlinkXml of hyperlinkXmls) {
|
|
654
761
|
const hyperlinkObj = XMLParser_1.XMLParser.parseToObject(hyperlinkXml, { trimValues: false });
|
|
655
|
-
const
|
|
656
|
-
if (hyperlink) {
|
|
657
|
-
content.push(hyperlink);
|
|
762
|
+
const result = this.parseHyperlinkFromObject(hyperlinkObj["w:hyperlink"], relationshipManager);
|
|
763
|
+
if (result.hyperlink) {
|
|
764
|
+
content.push(result.hyperlink);
|
|
658
765
|
}
|
|
659
766
|
}
|
|
660
767
|
if (content.length === 0) {
|
|
@@ -690,6 +797,14 @@ class DocumentParser {
|
|
|
690
797
|
id: id,
|
|
691
798
|
skipNormalization: true,
|
|
692
799
|
});
|
|
800
|
+
if (this.bookmarkManager) {
|
|
801
|
+
try {
|
|
802
|
+
this.bookmarkManager.registerExisting(bookmark);
|
|
803
|
+
}
|
|
804
|
+
catch (e) {
|
|
805
|
+
logger_1.defaultLogger.debug("[DocumentParser] Bookmark already registered:", { name: nameAttr, id: id });
|
|
806
|
+
}
|
|
807
|
+
}
|
|
693
808
|
return bookmark;
|
|
694
809
|
}
|
|
695
810
|
catch (error) {
|
|
@@ -762,9 +877,15 @@ class DocumentParser {
|
|
|
762
877
|
? [hyperlinks]
|
|
763
878
|
: [];
|
|
764
879
|
if (elementIndex < hyperlinkArray.length) {
|
|
765
|
-
const
|
|
766
|
-
if (hyperlink) {
|
|
767
|
-
paragraph.addHyperlink(hyperlink);
|
|
880
|
+
const result = this.parseHyperlinkFromObject(hyperlinkArray[elementIndex], relationshipManager);
|
|
881
|
+
if (result.hyperlink) {
|
|
882
|
+
paragraph.addHyperlink(result.hyperlink);
|
|
883
|
+
}
|
|
884
|
+
for (const bookmark of result.bookmarkStarts) {
|
|
885
|
+
paragraph.addBookmarkStart(bookmark);
|
|
886
|
+
}
|
|
887
|
+
for (const bookmark of result.bookmarkEnds) {
|
|
888
|
+
paragraph.addBookmarkEnd(bookmark);
|
|
768
889
|
}
|
|
769
890
|
}
|
|
770
891
|
}
|
|
@@ -810,9 +931,15 @@ class DocumentParser {
|
|
|
810
931
|
? [hyperlinks]
|
|
811
932
|
: [];
|
|
812
933
|
for (const hyperlinkObj of hyperlinkChildren) {
|
|
813
|
-
const
|
|
814
|
-
if (hyperlink) {
|
|
815
|
-
paragraph.addHyperlink(hyperlink);
|
|
934
|
+
const result = this.parseHyperlinkFromObject(hyperlinkObj, relationshipManager);
|
|
935
|
+
if (result.hyperlink) {
|
|
936
|
+
paragraph.addHyperlink(result.hyperlink);
|
|
937
|
+
}
|
|
938
|
+
for (const bookmark of result.bookmarkStarts) {
|
|
939
|
+
paragraph.addBookmarkStart(bookmark);
|
|
940
|
+
}
|
|
941
|
+
for (const bookmark of result.bookmarkEnds) {
|
|
942
|
+
paragraph.addBookmarkEnd(bookmark);
|
|
816
943
|
}
|
|
817
944
|
}
|
|
818
945
|
const fields = paraObj["w:fldSimple"];
|
|
@@ -1817,7 +1944,48 @@ class DocumentParser {
|
|
|
1817
1944
|
}
|
|
1818
1945
|
}
|
|
1819
1946
|
parseHyperlinkFromObject(hyperlinkObj, relationshipManager) {
|
|
1947
|
+
const result = { hyperlink: null, bookmarkStarts: [], bookmarkEnds: [] };
|
|
1820
1948
|
try {
|
|
1949
|
+
if (hyperlinkObj["w:bookmarkStart"]) {
|
|
1950
|
+
const bookmarkStarts = Array.isArray(hyperlinkObj["w:bookmarkStart"])
|
|
1951
|
+
? hyperlinkObj["w:bookmarkStart"]
|
|
1952
|
+
: [hyperlinkObj["w:bookmarkStart"]];
|
|
1953
|
+
for (const bs of bookmarkStarts) {
|
|
1954
|
+
const id = bs["@_w:id"];
|
|
1955
|
+
const name = bs["@_w:name"];
|
|
1956
|
+
if (id !== undefined && name) {
|
|
1957
|
+
const bookmark = new Bookmark_1.Bookmark({
|
|
1958
|
+
name: name,
|
|
1959
|
+
id: typeof id === "number" ? id : parseInt(id, 10),
|
|
1960
|
+
skipNormalization: true,
|
|
1961
|
+
});
|
|
1962
|
+
result.bookmarkStarts.push(bookmark);
|
|
1963
|
+
if (this.bookmarkManager) {
|
|
1964
|
+
try {
|
|
1965
|
+
this.bookmarkManager.registerExisting(bookmark);
|
|
1966
|
+
}
|
|
1967
|
+
catch {
|
|
1968
|
+
}
|
|
1969
|
+
}
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
}
|
|
1973
|
+
if (hyperlinkObj["w:bookmarkEnd"]) {
|
|
1974
|
+
const bookmarkEnds = Array.isArray(hyperlinkObj["w:bookmarkEnd"])
|
|
1975
|
+
? hyperlinkObj["w:bookmarkEnd"]
|
|
1976
|
+
: [hyperlinkObj["w:bookmarkEnd"]];
|
|
1977
|
+
for (const be of bookmarkEnds) {
|
|
1978
|
+
const id = be["@_w:id"];
|
|
1979
|
+
if (id !== undefined) {
|
|
1980
|
+
const bookmark = new Bookmark_1.Bookmark({
|
|
1981
|
+
name: `_end_${id}`,
|
|
1982
|
+
id: typeof id === "number" ? id : parseInt(id, 10),
|
|
1983
|
+
skipNormalization: true,
|
|
1984
|
+
});
|
|
1985
|
+
result.bookmarkEnds.push(bookmark);
|
|
1986
|
+
}
|
|
1987
|
+
}
|
|
1988
|
+
}
|
|
1821
1989
|
const relationshipId = hyperlinkObj["@_r:id"];
|
|
1822
1990
|
const anchor = hyperlinkObj["@_w:anchor"];
|
|
1823
1991
|
const tooltip = hyperlinkObj["@_w:tooltip"];
|
|
@@ -1879,13 +2047,14 @@ class DocumentParser {
|
|
|
1879
2047
|
if (parsedRun && parsedRun.getContent().length > 1) {
|
|
1880
2048
|
hyperlink.setRun(parsedRun);
|
|
1881
2049
|
}
|
|
1882
|
-
|
|
2050
|
+
result.hyperlink = hyperlink;
|
|
2051
|
+
return result;
|
|
1883
2052
|
}
|
|
1884
2053
|
catch (error) {
|
|
1885
2054
|
logger_1.defaultLogger.warn("[DocumentParser] Failed to parse hyperlink:", error instanceof Error
|
|
1886
2055
|
? { message: error.message, stack: error.stack }
|
|
1887
2056
|
: { error: String(error) });
|
|
1888
|
-
return
|
|
2057
|
+
return result;
|
|
1889
2058
|
}
|
|
1890
2059
|
}
|
|
1891
2060
|
mergeConsecutiveHyperlinks(paragraph, resetFormatting = false) {
|
|
@@ -2459,12 +2628,49 @@ class DocumentParser {
|
|
|
2459
2628
|
}
|
|
2460
2629
|
rowXmls = XMLParser_1.XMLParser.extractElements(rawTableXml, "w:tr");
|
|
2461
2630
|
}
|
|
2631
|
+
const rowPositions = [];
|
|
2632
|
+
if (rawTableXml) {
|
|
2633
|
+
let searchPos = 0;
|
|
2634
|
+
for (const rowXml of rowXmls) {
|
|
2635
|
+
const rowStart = rawTableXml.indexOf(rowXml, searchPos);
|
|
2636
|
+
if (rowStart !== -1) {
|
|
2637
|
+
rowPositions.push({
|
|
2638
|
+
start: rowStart,
|
|
2639
|
+
end: rowStart + rowXml.length,
|
|
2640
|
+
});
|
|
2641
|
+
searchPos = rowStart + rowXml.length;
|
|
2642
|
+
}
|
|
2643
|
+
}
|
|
2644
|
+
}
|
|
2462
2645
|
for (let i = 0; i < rowChildren.length; i++) {
|
|
2463
2646
|
const rowObj = rowChildren[i];
|
|
2464
2647
|
const rawRowXml = i < rowXmls.length ? rowXmls[i] : undefined;
|
|
2465
2648
|
const row = await this.parseTableRowFromObject(rowObj, relationshipManager, zipHandler, imageManager, rawRowXml);
|
|
2466
2649
|
if (row) {
|
|
2467
2650
|
table.addRow(row);
|
|
2651
|
+
if (rawTableXml && i < rowPositions.length) {
|
|
2652
|
+
const currentRowEnd = rowPositions[i]?.end || 0;
|
|
2653
|
+
const nextRowStart = i + 1 < rowPositions.length
|
|
2654
|
+
? rowPositions[i + 1]?.start
|
|
2655
|
+
: rawTableXml.length;
|
|
2656
|
+
if (nextRowStart && currentRowEnd < nextRowStart) {
|
|
2657
|
+
const betweenContent = rawTableXml.slice(currentRowEnd, nextRowStart);
|
|
2658
|
+
const bookmarkEnds = this.extractBookmarkEndsFromContent(betweenContent);
|
|
2659
|
+
if (bookmarkEnds.length > 0) {
|
|
2660
|
+
const cells = row.getCells();
|
|
2661
|
+
const lastCell = cells[cells.length - 1];
|
|
2662
|
+
if (lastCell) {
|
|
2663
|
+
const cellParas = lastCell.getParagraphs();
|
|
2664
|
+
const lastPara = cellParas[cellParas.length - 1];
|
|
2665
|
+
if (lastPara) {
|
|
2666
|
+
for (const bookmark of bookmarkEnds) {
|
|
2667
|
+
lastPara.addBookmarkEnd(bookmark);
|
|
2668
|
+
}
|
|
2669
|
+
}
|
|
2670
|
+
}
|
|
2671
|
+
}
|
|
2672
|
+
}
|
|
2673
|
+
}
|
|
2468
2674
|
}
|
|
2469
2675
|
}
|
|
2470
2676
|
return table;
|
|
@@ -2814,17 +3020,27 @@ class DocumentParser {
|
|
|
2814
3020
|
if (rawCellXml) {
|
|
2815
3021
|
const cellContent = this.extractCellContentInOrder(rawCellXml);
|
|
2816
3022
|
let paragraphIndex = 0;
|
|
3023
|
+
let lastParagraph = null;
|
|
2817
3024
|
for (const item of cellContent) {
|
|
2818
3025
|
if (item.type === "paragraph") {
|
|
2819
3026
|
const paragraph = await this.parseParagraphWithOrder(item.xml, relationshipManager, zipHandler, imageManager);
|
|
2820
3027
|
if (paragraph) {
|
|
2821
3028
|
cell.addParagraph(paragraph);
|
|
3029
|
+
lastParagraph = paragraph;
|
|
2822
3030
|
paragraphIndex++;
|
|
2823
3031
|
}
|
|
2824
3032
|
}
|
|
2825
3033
|
else if (item.type === "table" || item.type === "sdt") {
|
|
2826
3034
|
cell.addRawNestedContent(paragraphIndex, item.xml, item.type);
|
|
2827
3035
|
}
|
|
3036
|
+
else if (item.type === "bookmarkEnd") {
|
|
3037
|
+
if (lastParagraph) {
|
|
3038
|
+
const bookmarkEnds = this.extractBookmarkEndsFromContent(item.xml);
|
|
3039
|
+
for (const bookmark of bookmarkEnds) {
|
|
3040
|
+
lastParagraph.addBookmarkEnd(bookmark);
|
|
3041
|
+
}
|
|
3042
|
+
}
|
|
3043
|
+
}
|
|
2828
3044
|
}
|
|
2829
3045
|
}
|
|
2830
3046
|
else {
|
|
@@ -2869,6 +3085,7 @@ class DocumentParser {
|
|
|
2869
3085
|
const pStart = content.indexOf("<w:p", pos);
|
|
2870
3086
|
const tblStart = content.indexOf("<w:tbl", pos);
|
|
2871
3087
|
const sdtStart = content.indexOf("<w:sdt", pos);
|
|
3088
|
+
const bookmarkEndStart = content.indexOf("<w:bookmarkEnd", pos);
|
|
2872
3089
|
let nextStart = -1;
|
|
2873
3090
|
let nextType = null;
|
|
2874
3091
|
let nextTag = "";
|
|
@@ -2893,16 +3110,34 @@ class DocumentParser {
|
|
|
2893
3110
|
nextType = "sdt";
|
|
2894
3111
|
nextTag = "w:sdt";
|
|
2895
3112
|
}
|
|
3113
|
+
if (bookmarkEndStart !== -1 &&
|
|
3114
|
+
(nextStart === -1 || bookmarkEndStart < nextStart)) {
|
|
3115
|
+
nextStart = bookmarkEndStart;
|
|
3116
|
+
nextType = "bookmarkEnd";
|
|
3117
|
+
nextTag = "w:bookmarkEnd";
|
|
3118
|
+
}
|
|
2896
3119
|
if (nextStart === -1 || nextType === null)
|
|
2897
3120
|
break;
|
|
2898
|
-
|
|
2899
|
-
|
|
2900
|
-
|
|
2901
|
-
|
|
3121
|
+
if (nextType === "bookmarkEnd") {
|
|
3122
|
+
const elementEnd = content.indexOf(">", nextStart) + 1;
|
|
3123
|
+
if (elementEnd === 0) {
|
|
3124
|
+
pos = nextStart + 1;
|
|
3125
|
+
continue;
|
|
3126
|
+
}
|
|
3127
|
+
const elementXml = content.substring(nextStart, elementEnd);
|
|
3128
|
+
result.push({ type: nextType, xml: elementXml });
|
|
3129
|
+
pos = elementEnd;
|
|
3130
|
+
}
|
|
3131
|
+
else {
|
|
3132
|
+
const elementEnd = this.findClosingTag(content, nextTag, nextStart);
|
|
3133
|
+
if (elementEnd === -1) {
|
|
3134
|
+
pos = nextStart + 1;
|
|
3135
|
+
continue;
|
|
3136
|
+
}
|
|
3137
|
+
const elementXml = content.substring(nextStart, elementEnd);
|
|
3138
|
+
result.push({ type: nextType, xml: elementXml });
|
|
3139
|
+
pos = elementEnd;
|
|
2902
3140
|
}
|
|
2903
|
-
const elementXml = content.substring(nextStart, elementEnd);
|
|
2904
|
-
result.push({ type: nextType, xml: elementXml });
|
|
2905
|
-
pos = elementEnd;
|
|
2906
3141
|
}
|
|
2907
3142
|
return result;
|
|
2908
3143
|
}
|