docxmlater 7.7.8 → 7.7.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import { BookmarkManager } from "../elements/BookmarkManager";
1
2
  import { ImageManager } from "../elements/ImageManager";
2
3
  import { Paragraph } from "../elements/Paragraph";
3
4
  import { Section } from "../elements/Section";
@@ -18,10 +19,11 @@ type BodyElement = Paragraph | Table | TableOfContentsElement | StructuredDocume
18
19
  export declare class DocumentParser {
19
20
  private parseErrors;
20
21
  private strictParsing;
22
+ private bookmarkManager;
21
23
  constructor(strictParsing?: boolean);
22
24
  getParseErrors(): ParseError[];
23
25
  clearParseErrors(): void;
24
- parseDocument(zipHandler: ZipHandler, relationshipManager: RelationshipManager, imageManager: ImageManager): Promise<{
26
+ parseDocument(zipHandler: ZipHandler, relationshipManager: RelationshipManager, imageManager: ImageManager, bookmarkManager?: BookmarkManager): Promise<{
25
27
  bodyElements: BodyElement[];
26
28
  properties: DocumentProperties;
27
29
  relationshipManager: RelationshipManager;
@@ -32,6 +34,8 @@ export declare class DocumentParser {
32
34
  namespaces: Record<string, string>;
33
35
  }>;
34
36
  private parseBodyElements;
37
+ private extractBodyLevelBookmarkEnds;
38
+ private extractBookmarkEndsFromContent;
35
39
  private findNextTag;
36
40
  private findNextTopLevelTag;
37
41
  private isPositionInsideTable;
@@ -1 +1 @@
1
- {"version":3,"file":"DocumentParser.d.ts","sourceRoot":"","sources":["../../src/core/DocumentParser.ts"],"names":[],"mappings":"AAWA,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,SAAS,EAAyC,MAAM,uBAAuB,CAAC;AAGzF,OAAO,EAEL,OAAO,EAGR,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mCAAmC,CAAC;AAC1E,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAG1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAE5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,KAAK,EAA8B,MAAM,qBAAqB,CAAC;AAcxE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAK5D,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;CACd;AAKD,KAAK,WAAW,GACZ,SAAS,GACT,KAAK,GACL,sBAAsB,GACtB,qBAAqB,CAAC;AAK1B,qBAAa,cAAc;IACzB,OAAO,CAAC,WAAW,CAAoB;IACvC,OAAO,CAAC,aAAa,CAAU;gBAEnB,aAAa,GAAE,OAAe;IAO1C,cAAc,IAAI,UAAU,EAAE;IAO9B,gBAAgB,IAAI,IAAI;IAWlB,aAAa,CACjB,UAAU,EAAE,UAAU,EACtB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC;QACT,YAAY,EAAE,WAAW,EAAE,CAAC;QAC5B,UAAU,EAAE,kBAAkB,CAAC;QAC/B,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,MAAM,EAAE,KAAK,EAAE,CAAC;QAChB,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;QACxB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KACpC,CAAC;YAsFY,iBAAiB;IAoH/B,OAAO,CAAC,WAAW;IAmCnB,OAAO,CAAC,mBAAmB;IAkC3B,OAAO,CAAC,qBAAqB;IAoB7B,OAAO,CAAC,oBAAoB;IAuB5B,OAAO,CAAC,qBAAqB;YAwEf,uBAAuB;YA4FvB,6BAA6B;YAyV7B,oBAAoB;IAuHlC,OAAO,CAAC,kBAAkB;IAoC1B,OAAO,CAAC,gBAAgB;YA8BV,wBAAwB;IAsKtC,OAAO,CAAC,kCAAkC;IA6Y1C,OAAO,CAAC,qBAAqB;IAsP7B,OAAO,CAAC,4BAA4B;IAiHpC,OAAO,CAAC,oBAAoB;IAiC5B,OAAO,CAAC,0BAA0B;IA8IlC,OAAO,CAAC,wCAAwC;IA0EhD,OAAO,CAAC,0BAA0B;IAmHlC,OAAO,CAAC,kBAAkB;IAsN1B,OAAO,CAAC,wBAAwB;IAgIhC,OAAO,CAAC,0BAA0B;IAiIlC,OAAO,CAAC,8BAA8B;IActC,OAAO,CAAC,0BAA0B;IAwClC,OAAO,CAAC,4BAA4B;YAwLtB,sBAAsB;IAqNpC,OAAO,CAAC,iBAAiB;IA4CzB,OAAO,CAAC,kBAAkB;IA6D1B,OAAO,CAAC,cAAc;IAmBtB,OAAO,CAAC,iBAAiB;YAuBX,oBAAoB;IA2ElC,OAAO,CAAC,8BAA8B;YAuFxB,uBAAuB;IAyErC,OAAO,CAAC,iCAAiC;IAyDzC,OAAO,CAAC,sCAAsC;IA4D9C,OAAO,CAAC,2BAA2B;YAgCrB,wBAAwB;IAwNtC,OAAO,CAAC,yBAAyB;IAmFjC,OAAO,CAAC,UAAU;IAmBlB,OAAO,CAAC,cAAc;YAsCR,kBAAkB;IAqPhC,OAAO,CAAC,6BAA6B;IAwCrC,OAAO,CAAC,4BAA4B;IA6JpC,OAAO,CAAC,4BAA4B;IA6FpC,OAAO,CAAC,sBAAsB;IAmI9B,OAAO,CAAC,cAAc;IA2BtB,OAAO,CAAC,WAAW;IAgHnB,OAAO,CAAC,kBAAkB;IAoB1B,OAAO,CAAC,eAAe;IAuEvB,OAAO,CAAC,qBAAqB;IA8D7B,OAAO,CAAC,WAAW;IA8CnB,OAAO,CAAC,cAAc;IAqEtB,OAAO,CAAC,sBAAsB;IAoP9B,OAAO,CAAC,UAAU;IAwMlB,OAAO,CAAC,+BAA+B;IAqHvC,OAAO,CAAC,yBAAyB;IAyJjC,OAAO,CAAC,yBAAyB;IA+EjC,OAAO,CAAC,2BAA2B;IAqEnC,OAAO,CAAC,+BAA+B;IAmDvC,OAAO,CAAC,8BAA8B;IA8CtC,OAAO,CAAC,iCAAiC;IA6FzC,OAAO,CAAC,mBAAmB;IAqF3B,OAAO,CAAC,mBAAmB;IAqB3B,OAAO,CAAC,uBAAuB;IA6B/B,MAAM,CAAC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IA+BzE,MAAM,CAAC,SAAS,CACd,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,GACjB,OAAO;IAwBV,MAAM,CAAC,gBAAgB,CACrB,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,GACf,KAAK,CAAC;QACP,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;IAmFF,OAAO,CAAC,eAAe;IA4BjB,sBAAsB,CAC1B,UAAU,EAAE,UAAU,EACtB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC;QACT,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;QACH,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;KACJ,CAAC;YA4FY,WAAW;YA6DX,WAAW;CAmD1B"}
1
+ {"version":3,"file":"DocumentParser.d.ts","sourceRoot":"","sources":["../../src/core/DocumentParser.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAM9D,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,SAAS,EAAyC,MAAM,uBAAuB,CAAC;AAGzF,OAAO,EAEL,OAAO,EAGR,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mCAAmC,CAAC;AAC1E,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAG1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAE5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,KAAK,EAA8B,MAAM,qBAAqB,CAAC;AAcxE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAK5D,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;CACd;AAKD,KAAK,WAAW,GACZ,SAAS,GACT,KAAK,GACL,sBAAsB,GACtB,qBAAqB,CAAC;AAK1B,qBAAa,cAAc;IACzB,OAAO,CAAC,WAAW,CAAoB;IACvC,OAAO,CAAC,aAAa,CAAU;IAC/B,OAAO,CAAC,eAAe,CAAgC;gBAE3C,aAAa,GAAE,OAAe;IAO1C,cAAc,IAAI,UAAU,EAAE;IAO9B,gBAAgB,IAAI,IAAI;IAWlB,aAAa,CACjB,UAAU,EAAE,UAAU,EACtB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,EAC1B,eAAe,CAAC,EAAE,eAAe,GAChC,OAAO,CAAC;QACT,YAAY,EAAE,WAAW,EAAE,CAAC;QAC5B,UAAU,EAAE,kBAAkB,CAAC;QAC/B,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,MAAM,EAAE,KAAK,EAAE,CAAC;QAChB,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;QACxB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KACpC,CAAC;YAyFY,iBAAiB;IA6L/B,OAAO,CAAC,4BAA4B;IAsCpC,OAAO,CAAC,8BAA8B;IA2BtC,OAAO,CAAC,WAAW;IAmCnB,OAAO,CAAC,mBAAmB;IAkC3B,OAAO,CAAC,qBAAqB;IAoB7B,OAAO,CAAC,oBAAoB;IAuB5B,OAAO,CAAC,qBAAqB;YAwEf,uBAAuB;YA4FvB,6BAA6B;YAgW7B,oBAAoB;IAyHlC,OAAO,CAAC,kBAAkB;IAmD1B,OAAO,CAAC,gBAAgB;YA8BV,wBAAwB;IAoLtC,OAAO,CAAC,kCAAkC;IA6Y1C,OAAO,CAAC,qBAAqB;IAsP7B,OAAO,CAAC,4BAA4B;IAiHpC,OAAO,CAAC,oBAAoB;IAiC5B,OAAO,CAAC,0BAA0B;IA8IlC,OAAO,CAAC,wCAAwC;IA0EhD,OAAO,CAAC,0BAA0B;IAmHlC,OAAO,CAAC,kBAAkB;IAsN1B,OAAO,CAAC,wBAAwB;IAwLhC,OAAO,CAAC,0BAA0B;IAiIlC,OAAO,CAAC,8BAA8B;IActC,OAAO,CAAC,0BAA0B;IAwClC,OAAO,CAAC,4BAA4B;YAwLtB,sBAAsB;IAqNpC,OAAO,CAAC,iBAAiB;IA4CzB,OAAO,CAAC,kBAAkB;IA6D1B,OAAO,CAAC,cAAc;IAmBtB,OAAO,CAAC,iBAAiB;YAuBX,oBAAoB;IA4HlC,OAAO,CAAC,8BAA8B;YAuFxB,uBAAuB;IAyErC,OAAO,CAAC,iCAAiC;IAyDzC,OAAO,CAAC,sCAAsC;IA4D9C,OAAO,CAAC,2BAA2B;YAgCrB,wBAAwB;IAkOtC,OAAO,CAAC,yBAAyB;IA0GjC,OAAO,CAAC,UAAU;IAmBlB,OAAO,CAAC,cAAc;YAsCR,kBAAkB;IAqPhC,OAAO,CAAC,6BAA6B;IAwCrC,OAAO,CAAC,4BAA4B;IA6JpC,OAAO,CAAC,4BAA4B;IA6FpC,OAAO,CAAC,sBAAsB;IAmI9B,OAAO,CAAC,cAAc;IA2BtB,OAAO,CAAC,WAAW;IAgHnB,OAAO,CAAC,kBAAkB;IAoB1B,OAAO,CAAC,eAAe;IAuEvB,OAAO,CAAC,qBAAqB;IA8D7B,OAAO,CAAC,WAAW;IA8CnB,OAAO,CAAC,cAAc;IAqEtB,OAAO,CAAC,sBAAsB;IAoP9B,OAAO,CAAC,UAAU;IAwMlB,OAAO,CAAC,+BAA+B;IAqHvC,OAAO,CAAC,yBAAyB;IAyJjC,OAAO,CAAC,yBAAyB;IA+EjC,OAAO,CAAC,2BAA2B;IAqEnC,OAAO,CAAC,+BAA+B;IAmDvC,OAAO,CAAC,8BAA8B;IA8CtC,OAAO,CAAC,iCAAiC;IA6FzC,OAAO,CAAC,mBAAmB;IAqF3B,OAAO,CAAC,mBAAmB;IAqB3B,OAAO,CAAC,uBAAuB;IA6B/B,MAAM,CAAC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IA+BzE,MAAM,CAAC,SAAS,CACd,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,GACjB,OAAO;IAwBV,MAAM,CAAC,gBAAgB,CACrB,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,GACf,KAAK,CAAC;QACP,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;IAmFF,OAAO,CAAC,eAAe;IA4BjB,sBAAsB,CAC1B,UAAU,EAAE,UAAU,EACtB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC;QACT,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;QACH,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;KACJ,CAAC;YA4FY,WAAW;YA6DX,WAAW;CAmD1B"}
@@ -66,6 +66,7 @@ const RelationshipManager_1 = require("./RelationshipManager");
66
66
  class DocumentParser {
67
67
  parseErrors = [];
68
68
  strictParsing;
69
+ bookmarkManager = null;
69
70
  constructor(strictParsing = false) {
70
71
  this.strictParsing = strictParsing;
71
72
  }
@@ -75,9 +76,10 @@ class DocumentParser {
75
76
  clearParseErrors() {
76
77
  this.parseErrors = [];
77
78
  }
78
- async parseDocument(zipHandler, relationshipManager, imageManager) {
79
+ async parseDocument(zipHandler, relationshipManager, imageManager, bookmarkManager) {
79
80
  const logger = getLogger();
80
81
  logger.info('Parsing document');
82
+ this.bookmarkManager = bookmarkManager || null;
81
83
  const docXml = zipHandler.getFileAsString(types_1.DOCX_PATHS.DOCUMENT);
82
84
  if (!docXml) {
83
85
  logger.error('Invalid document: word/document.xml not found');
@@ -146,6 +148,34 @@ class DocumentParser {
146
148
  candidates.sort((a, b) => a.pos - b.pos);
147
149
  const next = candidates[0];
148
150
  if (next) {
151
+ if (bodyElements.length > 0 && next.pos > pos) {
152
+ const bookmarkEnds = this.extractBodyLevelBookmarkEnds(bodyContent, pos, next.pos);
153
+ if (bookmarkEnds.length > 0) {
154
+ const prevElement = bodyElements[bodyElements.length - 1];
155
+ if (prevElement instanceof Paragraph_1.Paragraph) {
156
+ for (const bookmark of bookmarkEnds) {
157
+ prevElement.addBookmarkEnd(bookmark);
158
+ }
159
+ }
160
+ else if (prevElement instanceof Table_1.Table) {
161
+ const rows = prevElement.getRows();
162
+ const lastRow = rows[rows.length - 1];
163
+ if (lastRow) {
164
+ const cells = lastRow.getCells();
165
+ const lastCell = cells[cells.length - 1];
166
+ if (lastCell) {
167
+ const cellParas = lastCell.getParagraphs();
168
+ const lastPara = cellParas[cellParas.length - 1];
169
+ if (lastPara) {
170
+ for (const bookmark of bookmarkEnds) {
171
+ lastPara.addBookmarkEnd(bookmark);
172
+ }
173
+ }
174
+ }
175
+ }
176
+ }
177
+ }
178
+ }
149
179
  if (next.type === "p") {
150
180
  const elementXml = this.extractSingleElement(bodyContent, "w:p", next.pos);
151
181
  if (elementXml) {
@@ -190,10 +220,81 @@ class DocumentParser {
190
220
  }
191
221
  }
192
222
  }
223
+ if (bodyElements.length > 0 && pos < bodyContent.length) {
224
+ const trailingBookmarkEnds = this.extractBodyLevelBookmarkEnds(bodyContent, pos, -1);
225
+ if (trailingBookmarkEnds.length > 0) {
226
+ const lastElement = bodyElements[bodyElements.length - 1];
227
+ if (lastElement instanceof Paragraph_1.Paragraph) {
228
+ for (const bookmark of trailingBookmarkEnds) {
229
+ lastElement.addBookmarkEnd(bookmark);
230
+ }
231
+ }
232
+ else if (lastElement instanceof Table_1.Table) {
233
+ const rows = lastElement.getRows();
234
+ const lastRow = rows[rows.length - 1];
235
+ if (lastRow) {
236
+ const cells = lastRow.getCells();
237
+ const lastCell = cells[cells.length - 1];
238
+ if (lastCell) {
239
+ const cellParas = lastCell.getParagraphs();
240
+ const lastPara = cellParas[cellParas.length - 1];
241
+ if (lastPara) {
242
+ for (const bookmark of trailingBookmarkEnds) {
243
+ lastPara.addBookmarkEnd(bookmark);
244
+ }
245
+ }
246
+ }
247
+ }
248
+ }
249
+ }
250
+ }
193
251
  this.assembleMultiParagraphFields(bodyElements);
194
252
  this.validateLoadedContent(bodyElements);
195
253
  return bodyElements;
196
254
  }
255
+ extractBodyLevelBookmarkEnds(content, startPos, endPos) {
256
+ const bookmarks = [];
257
+ const searchContent = endPos === -1
258
+ ? content.slice(startPos)
259
+ : content.slice(startPos, endPos);
260
+ const bookmarkEndRegex = /<w:bookmarkEnd[^>]*w:id="(\d+)"[^>]*\/?>/g;
261
+ let match;
262
+ while ((match = bookmarkEndRegex.exec(searchContent)) !== null) {
263
+ const idStr = match[1];
264
+ if (idStr) {
265
+ const id = parseInt(idStr, 10);
266
+ if (!isNaN(id)) {
267
+ const bookmark = new Bookmark_1.Bookmark({
268
+ name: `_end_${id}`,
269
+ id: id,
270
+ skipNormalization: true,
271
+ });
272
+ bookmarks.push(bookmark);
273
+ }
274
+ }
275
+ }
276
+ return bookmarks;
277
+ }
278
+ extractBookmarkEndsFromContent(content) {
279
+ const bookmarks = [];
280
+ const bookmarkEndRegex = /<w:bookmarkEnd[^>]*w:id="(\d+)"[^>]*\/?>/g;
281
+ let match;
282
+ while ((match = bookmarkEndRegex.exec(content)) !== null) {
283
+ const idStr = match[1];
284
+ if (idStr) {
285
+ const id = parseInt(idStr, 10);
286
+ if (!isNaN(id)) {
287
+ const bookmark = new Bookmark_1.Bookmark({
288
+ name: `_end_${id}`,
289
+ id: id,
290
+ skipNormalization: true,
291
+ });
292
+ bookmarks.push(bookmark);
293
+ }
294
+ }
295
+ }
296
+ return bookmarks;
297
+ }
197
298
  findNextTag(content, tagName, startPos) {
198
299
  const tag = `<${tagName}`;
199
300
  let pos = content.indexOf(tag, startPos);
@@ -513,9 +614,15 @@ class DocumentParser {
513
614
  ? [hyperlinks]
514
615
  : [];
515
616
  if (child.index < hyperlinkArray.length) {
516
- const hyperlink = this.parseHyperlinkFromObject(hyperlinkArray[child.index], relationshipManager);
517
- if (hyperlink) {
518
- paragraph.addHyperlink(hyperlink);
617
+ const result = this.parseHyperlinkFromObject(hyperlinkArray[child.index], relationshipManager);
618
+ if (result.hyperlink) {
619
+ paragraph.addHyperlink(result.hyperlink);
620
+ }
621
+ for (const bookmark of result.bookmarkStarts) {
622
+ paragraph.addBookmarkStart(bookmark);
623
+ }
624
+ for (const bookmark of result.bookmarkEnds) {
625
+ paragraph.addBookmarkEnd(bookmark);
519
626
  }
520
627
  }
521
628
  }
@@ -652,9 +759,9 @@ class DocumentParser {
652
759
  }
653
760
  for (const hyperlinkXml of hyperlinkXmls) {
654
761
  const hyperlinkObj = XMLParser_1.XMLParser.parseToObject(hyperlinkXml, { trimValues: false });
655
- const hyperlink = this.parseHyperlinkFromObject(hyperlinkObj["w:hyperlink"], relationshipManager);
656
- if (hyperlink) {
657
- content.push(hyperlink);
762
+ const result = this.parseHyperlinkFromObject(hyperlinkObj["w:hyperlink"], relationshipManager);
763
+ if (result.hyperlink) {
764
+ content.push(result.hyperlink);
658
765
  }
659
766
  }
660
767
  if (content.length === 0) {
@@ -690,6 +797,14 @@ class DocumentParser {
690
797
  id: id,
691
798
  skipNormalization: true,
692
799
  });
800
+ if (this.bookmarkManager) {
801
+ try {
802
+ this.bookmarkManager.registerExisting(bookmark);
803
+ }
804
+ catch (e) {
805
+ logger_1.defaultLogger.debug("[DocumentParser] Bookmark already registered:", { name: nameAttr, id: id });
806
+ }
807
+ }
693
808
  return bookmark;
694
809
  }
695
810
  catch (error) {
@@ -762,9 +877,15 @@ class DocumentParser {
762
877
  ? [hyperlinks]
763
878
  : [];
764
879
  if (elementIndex < hyperlinkArray.length) {
765
- const hyperlink = this.parseHyperlinkFromObject(hyperlinkArray[elementIndex], relationshipManager);
766
- if (hyperlink) {
767
- paragraph.addHyperlink(hyperlink);
880
+ const result = this.parseHyperlinkFromObject(hyperlinkArray[elementIndex], relationshipManager);
881
+ if (result.hyperlink) {
882
+ paragraph.addHyperlink(result.hyperlink);
883
+ }
884
+ for (const bookmark of result.bookmarkStarts) {
885
+ paragraph.addBookmarkStart(bookmark);
886
+ }
887
+ for (const bookmark of result.bookmarkEnds) {
888
+ paragraph.addBookmarkEnd(bookmark);
768
889
  }
769
890
  }
770
891
  }
@@ -810,9 +931,15 @@ class DocumentParser {
810
931
  ? [hyperlinks]
811
932
  : [];
812
933
  for (const hyperlinkObj of hyperlinkChildren) {
813
- const hyperlink = this.parseHyperlinkFromObject(hyperlinkObj, relationshipManager);
814
- if (hyperlink) {
815
- paragraph.addHyperlink(hyperlink);
934
+ const result = this.parseHyperlinkFromObject(hyperlinkObj, relationshipManager);
935
+ if (result.hyperlink) {
936
+ paragraph.addHyperlink(result.hyperlink);
937
+ }
938
+ for (const bookmark of result.bookmarkStarts) {
939
+ paragraph.addBookmarkStart(bookmark);
940
+ }
941
+ for (const bookmark of result.bookmarkEnds) {
942
+ paragraph.addBookmarkEnd(bookmark);
816
943
  }
817
944
  }
818
945
  const fields = paraObj["w:fldSimple"];
@@ -1817,7 +1944,48 @@ class DocumentParser {
1817
1944
  }
1818
1945
  }
1819
1946
  parseHyperlinkFromObject(hyperlinkObj, relationshipManager) {
1947
+ const result = { hyperlink: null, bookmarkStarts: [], bookmarkEnds: [] };
1820
1948
  try {
1949
+ if (hyperlinkObj["w:bookmarkStart"]) {
1950
+ const bookmarkStarts = Array.isArray(hyperlinkObj["w:bookmarkStart"])
1951
+ ? hyperlinkObj["w:bookmarkStart"]
1952
+ : [hyperlinkObj["w:bookmarkStart"]];
1953
+ for (const bs of bookmarkStarts) {
1954
+ const id = bs["@_w:id"];
1955
+ const name = bs["@_w:name"];
1956
+ if (id !== undefined && name) {
1957
+ const bookmark = new Bookmark_1.Bookmark({
1958
+ name: name,
1959
+ id: typeof id === "number" ? id : parseInt(id, 10),
1960
+ skipNormalization: true,
1961
+ });
1962
+ result.bookmarkStarts.push(bookmark);
1963
+ if (this.bookmarkManager) {
1964
+ try {
1965
+ this.bookmarkManager.registerExisting(bookmark);
1966
+ }
1967
+ catch {
1968
+ }
1969
+ }
1970
+ }
1971
+ }
1972
+ }
1973
+ if (hyperlinkObj["w:bookmarkEnd"]) {
1974
+ const bookmarkEnds = Array.isArray(hyperlinkObj["w:bookmarkEnd"])
1975
+ ? hyperlinkObj["w:bookmarkEnd"]
1976
+ : [hyperlinkObj["w:bookmarkEnd"]];
1977
+ for (const be of bookmarkEnds) {
1978
+ const id = be["@_w:id"];
1979
+ if (id !== undefined) {
1980
+ const bookmark = new Bookmark_1.Bookmark({
1981
+ name: `_end_${id}`,
1982
+ id: typeof id === "number" ? id : parseInt(id, 10),
1983
+ skipNormalization: true,
1984
+ });
1985
+ result.bookmarkEnds.push(bookmark);
1986
+ }
1987
+ }
1988
+ }
1821
1989
  const relationshipId = hyperlinkObj["@_r:id"];
1822
1990
  const anchor = hyperlinkObj["@_w:anchor"];
1823
1991
  const tooltip = hyperlinkObj["@_w:tooltip"];
@@ -1879,13 +2047,14 @@ class DocumentParser {
1879
2047
  if (parsedRun && parsedRun.getContent().length > 1) {
1880
2048
  hyperlink.setRun(parsedRun);
1881
2049
  }
1882
- return hyperlink;
2050
+ result.hyperlink = hyperlink;
2051
+ return result;
1883
2052
  }
1884
2053
  catch (error) {
1885
2054
  logger_1.defaultLogger.warn("[DocumentParser] Failed to parse hyperlink:", error instanceof Error
1886
2055
  ? { message: error.message, stack: error.stack }
1887
2056
  : { error: String(error) });
1888
- return null;
2057
+ return result;
1889
2058
  }
1890
2059
  }
1891
2060
  mergeConsecutiveHyperlinks(paragraph, resetFormatting = false) {
@@ -2459,12 +2628,49 @@ class DocumentParser {
2459
2628
  }
2460
2629
  rowXmls = XMLParser_1.XMLParser.extractElements(rawTableXml, "w:tr");
2461
2630
  }
2631
+ const rowPositions = [];
2632
+ if (rawTableXml) {
2633
+ let searchPos = 0;
2634
+ for (const rowXml of rowXmls) {
2635
+ const rowStart = rawTableXml.indexOf(rowXml, searchPos);
2636
+ if (rowStart !== -1) {
2637
+ rowPositions.push({
2638
+ start: rowStart,
2639
+ end: rowStart + rowXml.length,
2640
+ });
2641
+ searchPos = rowStart + rowXml.length;
2642
+ }
2643
+ }
2644
+ }
2462
2645
  for (let i = 0; i < rowChildren.length; i++) {
2463
2646
  const rowObj = rowChildren[i];
2464
2647
  const rawRowXml = i < rowXmls.length ? rowXmls[i] : undefined;
2465
2648
  const row = await this.parseTableRowFromObject(rowObj, relationshipManager, zipHandler, imageManager, rawRowXml);
2466
2649
  if (row) {
2467
2650
  table.addRow(row);
2651
+ if (rawTableXml && i < rowPositions.length) {
2652
+ const currentRowEnd = rowPositions[i]?.end || 0;
2653
+ const nextRowStart = i + 1 < rowPositions.length
2654
+ ? rowPositions[i + 1]?.start
2655
+ : rawTableXml.length;
2656
+ if (nextRowStart && currentRowEnd < nextRowStart) {
2657
+ const betweenContent = rawTableXml.slice(currentRowEnd, nextRowStart);
2658
+ const bookmarkEnds = this.extractBookmarkEndsFromContent(betweenContent);
2659
+ if (bookmarkEnds.length > 0) {
2660
+ const cells = row.getCells();
2661
+ const lastCell = cells[cells.length - 1];
2662
+ if (lastCell) {
2663
+ const cellParas = lastCell.getParagraphs();
2664
+ const lastPara = cellParas[cellParas.length - 1];
2665
+ if (lastPara) {
2666
+ for (const bookmark of bookmarkEnds) {
2667
+ lastPara.addBookmarkEnd(bookmark);
2668
+ }
2669
+ }
2670
+ }
2671
+ }
2672
+ }
2673
+ }
2468
2674
  }
2469
2675
  }
2470
2676
  return table;
@@ -2814,17 +3020,27 @@ class DocumentParser {
2814
3020
  if (rawCellXml) {
2815
3021
  const cellContent = this.extractCellContentInOrder(rawCellXml);
2816
3022
  let paragraphIndex = 0;
3023
+ let lastParagraph = null;
2817
3024
  for (const item of cellContent) {
2818
3025
  if (item.type === "paragraph") {
2819
3026
  const paragraph = await this.parseParagraphWithOrder(item.xml, relationshipManager, zipHandler, imageManager);
2820
3027
  if (paragraph) {
2821
3028
  cell.addParagraph(paragraph);
3029
+ lastParagraph = paragraph;
2822
3030
  paragraphIndex++;
2823
3031
  }
2824
3032
  }
2825
3033
  else if (item.type === "table" || item.type === "sdt") {
2826
3034
  cell.addRawNestedContent(paragraphIndex, item.xml, item.type);
2827
3035
  }
3036
+ else if (item.type === "bookmarkEnd") {
3037
+ if (lastParagraph) {
3038
+ const bookmarkEnds = this.extractBookmarkEndsFromContent(item.xml);
3039
+ for (const bookmark of bookmarkEnds) {
3040
+ lastParagraph.addBookmarkEnd(bookmark);
3041
+ }
3042
+ }
3043
+ }
2828
3044
  }
2829
3045
  }
2830
3046
  else {
@@ -2869,6 +3085,7 @@ class DocumentParser {
2869
3085
  const pStart = content.indexOf("<w:p", pos);
2870
3086
  const tblStart = content.indexOf("<w:tbl", pos);
2871
3087
  const sdtStart = content.indexOf("<w:sdt", pos);
3088
+ const bookmarkEndStart = content.indexOf("<w:bookmarkEnd", pos);
2872
3089
  let nextStart = -1;
2873
3090
  let nextType = null;
2874
3091
  let nextTag = "";
@@ -2893,16 +3110,34 @@ class DocumentParser {
2893
3110
  nextType = "sdt";
2894
3111
  nextTag = "w:sdt";
2895
3112
  }
3113
+ if (bookmarkEndStart !== -1 &&
3114
+ (nextStart === -1 || bookmarkEndStart < nextStart)) {
3115
+ nextStart = bookmarkEndStart;
3116
+ nextType = "bookmarkEnd";
3117
+ nextTag = "w:bookmarkEnd";
3118
+ }
2896
3119
  if (nextStart === -1 || nextType === null)
2897
3120
  break;
2898
- const elementEnd = this.findClosingTag(content, nextTag, nextStart);
2899
- if (elementEnd === -1) {
2900
- pos = nextStart + 1;
2901
- continue;
3121
+ if (nextType === "bookmarkEnd") {
3122
+ const elementEnd = content.indexOf(">", nextStart) + 1;
3123
+ if (elementEnd === 0) {
3124
+ pos = nextStart + 1;
3125
+ continue;
3126
+ }
3127
+ const elementXml = content.substring(nextStart, elementEnd);
3128
+ result.push({ type: nextType, xml: elementXml });
3129
+ pos = elementEnd;
3130
+ }
3131
+ else {
3132
+ const elementEnd = this.findClosingTag(content, nextTag, nextStart);
3133
+ if (elementEnd === -1) {
3134
+ pos = nextStart + 1;
3135
+ continue;
3136
+ }
3137
+ const elementXml = content.substring(nextStart, elementEnd);
3138
+ result.push({ type: nextType, xml: elementXml });
3139
+ pos = elementEnd;
2902
3140
  }
2903
- const elementXml = content.substring(nextStart, elementEnd);
2904
- result.push({ type: nextType, xml: elementXml });
2905
- pos = elementEnd;
2906
3141
  }
2907
3142
  return result;
2908
3143
  }