docxmlater 7.7.7 → 7.7.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import { BookmarkManager } from "../elements/BookmarkManager";
1
2
  import { ImageManager } from "../elements/ImageManager";
2
3
  import { Paragraph } from "../elements/Paragraph";
3
4
  import { Section } from "../elements/Section";
@@ -18,10 +19,11 @@ type BodyElement = Paragraph | Table | TableOfContentsElement | StructuredDocume
18
19
  export declare class DocumentParser {
19
20
  private parseErrors;
20
21
  private strictParsing;
22
+ private bookmarkManager;
21
23
  constructor(strictParsing?: boolean);
22
24
  getParseErrors(): ParseError[];
23
25
  clearParseErrors(): void;
24
- parseDocument(zipHandler: ZipHandler, relationshipManager: RelationshipManager, imageManager: ImageManager): Promise<{
26
+ parseDocument(zipHandler: ZipHandler, relationshipManager: RelationshipManager, imageManager: ImageManager, bookmarkManager?: BookmarkManager): Promise<{
25
27
  bodyElements: BodyElement[];
26
28
  properties: DocumentProperties;
27
29
  relationshipManager: RelationshipManager;
@@ -32,6 +34,8 @@ export declare class DocumentParser {
32
34
  namespaces: Record<string, string>;
33
35
  }>;
34
36
  private parseBodyElements;
37
+ private extractBodyLevelBookmarkEnds;
38
+ private extractBookmarkEndsFromContent;
35
39
  private findNextTag;
36
40
  private findNextTopLevelTag;
37
41
  private isPositionInsideTable;
@@ -40,6 +44,8 @@ export declare class DocumentParser {
40
44
  private parseParagraphWithOrder;
41
45
  private parseOrderedParagraphChildren;
42
46
  private parseRevisionFromXml;
47
+ private parseBookmarkStart;
48
+ private parseBookmarkEnd;
43
49
  private parseParagraphFromObject;
44
50
  private parseParagraphPropertiesFromObject;
45
51
  private assembleComplexFields;
@@ -1 +1 @@
1
- {"version":3,"file":"DocumentParser.d.ts","sourceRoot":"","sources":["../../src/core/DocumentParser.ts"],"names":[],"mappings":"AAUA,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,SAAS,EAAyC,MAAM,uBAAuB,CAAC;AAGzF,OAAO,EAEL,OAAO,EAGR,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mCAAmC,CAAC;AAC1E,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAG1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAE5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,KAAK,EAA8B,MAAM,qBAAqB,CAAC;AAcxE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAK5D,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;CACd;AAKD,KAAK,WAAW,GACZ,SAAS,GACT,KAAK,GACL,sBAAsB,GACtB,qBAAqB,CAAC;AAK1B,qBAAa,cAAc;IACzB,OAAO,CAAC,WAAW,CAAoB;IACvC,OAAO,CAAC,aAAa,CAAU;gBAEnB,aAAa,GAAE,OAAe;IAO1C,cAAc,IAAI,UAAU,EAAE;IAO9B,gBAAgB,IAAI,IAAI;IAWlB,aAAa,CACjB,UAAU,EAAE,UAAU,EACtB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC;QACT,YAAY,EAAE,WAAW,EAAE,CAAC;QAC5B,UAAU,EAAE,kBAAkB,CAAC;QAC/B,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,MAAM,EAAE,KAAK,EAAE,CAAC;QAChB,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;QACxB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KACpC,CAAC;YAsFY,iBAAiB;IAoH/B,OAAO,CAAC,WAAW;IAmCnB,OAAO,CAAC,mBAAmB;IAkC3B,OAAO,CAAC,qBAAqB;IAoB7B,OAAO,CAAC,oBAAoB;IAuB5B,OAAO,CAAC,qBAAqB;YAwEf,uBAAuB;YA4FvB,6BAA6B;YA+S7B,oBAAoB;YAkHpB,wBAAwB;IAsKtC,OAAO,CAAC,kCAAkC;IA6Y1C,OAAO,CAAC,qBAAqB;IAsP7B,OAAO,CAAC,4BAA4B;IAiHpC,OAAO,CAAC,oBAAoB;IAiC5B,OAAO,CAAC,0BAA0B;IA8IlC,OAAO,CAAC,wCAAwC;IA0EhD,OAAO,CAAC,0BAA0B;IAmHlC,OAAO,CAAC,kBAAkB;IAsN1B,OAAO,CAAC,wBAAwB;IAgIhC,OAAO,CAAC,0BAA0B;IAiIlC,OAAO,CAAC,8BAA8B;IActC,OAAO,CAAC,0BAA0B;IAwClC,OAAO,CAAC,4BAA4B;YAwLtB,sBAAsB;IAqNpC,OAAO,CAAC,iBAAiB;IA4CzB,OAAO,CAAC,kBAAkB;IA6D1B,OAAO,CAAC,cAAc;IAmBtB,OAAO,CAAC,iBAAiB;YAuBX,oBAAoB;IA2ElC,OAAO,CAAC,8BAA8B;YAuFxB,uBAAuB;IAyErC,OAAO,CAAC,iCAAiC;IAyDzC,OAAO,CAAC,sCAAsC;IA4D9C,OAAO,CAAC,2BAA2B;YAgCrB,wBAAwB;IAwNtC,OAAO,CAAC,yBAAyB;IAmFjC,OAAO,CAAC,UAAU;IAmBlB,OAAO,CAAC,cAAc;YAsCR,kBAAkB;IAqPhC,OAAO,CAAC,6BAA6B;IAwCrC,OAAO,CAAC,4BAA4B;IA6JpC,OAAO,CAAC,4BAA4B;IA6FpC,OAAO,CAAC,sBAAsB;IAmI9B,OAAO,CAAC,cAAc;IA2BtB,OAAO,CAAC,WAAW;IAgHnB,OAAO,CAAC,kBAAkB;IAoB1B,OAAO,CAAC,eAAe;IAuEvB,OAAO,CAAC,qBAAqB;IA8D7B,OAAO,CAAC,WAAW;IA8CnB,OAAO,CAAC,cAAc;IAqEtB,OAAO,CAAC,sBAAsB;IAoP9B,OAAO,CAAC,UAAU;IAwMlB,OAAO,CAAC,+BAA+B;IAqHvC,OAAO,CAAC,yBAAyB;IAyJjC,OAAO,CAAC,yBAAyB;IA+EjC,OAAO,CAAC,2BAA2B;IAqEnC,OAAO,CAAC,+BAA+B;IAmDvC,OAAO,CAAC,8BAA8B;IA8CtC,OAAO,CAAC,iCAAiC;IA6FzC,OAAO,CAAC,mBAAmB;IAqF3B,OAAO,CAAC,mBAAmB;IAqB3B,OAAO,CAAC,uBAAuB;IA6B/B,MAAM,CAAC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IA+BzE,MAAM,CAAC,SAAS,CACd,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,GACjB,OAAO;IAwBV,MAAM,CAAC,gBAAgB,CACrB,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,GACf,KAAK,CAAC;QACP,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;IAmFF,OAAO,CAAC,eAAe;IA4BjB,sBAAsB,CAC1B,UAAU,EAAE,UAAU,EACtB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC;QACT,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;QACH,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;KACJ,CAAC;YA4FY,WAAW;YA6DX,WAAW;CAmD1B"}
1
+ {"version":3,"file":"DocumentParser.d.ts","sourceRoot":"","sources":["../../src/core/DocumentParser.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAM9D,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,SAAS,EAAyC,MAAM,uBAAuB,CAAC;AAGzF,OAAO,EAEL,OAAO,EAGR,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mCAAmC,CAAC;AAC1E,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAG1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAE5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,KAAK,EAA8B,MAAM,qBAAqB,CAAC;AAcxE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAK5D,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;CACd;AAKD,KAAK,WAAW,GACZ,SAAS,GACT,KAAK,GACL,sBAAsB,GACtB,qBAAqB,CAAC;AAK1B,qBAAa,cAAc;IACzB,OAAO,CAAC,WAAW,CAAoB;IACvC,OAAO,CAAC,aAAa,CAAU;IAC/B,OAAO,CAAC,eAAe,CAAgC;gBAE3C,aAAa,GAAE,OAAe;IAO1C,cAAc,IAAI,UAAU,EAAE;IAO9B,gBAAgB,IAAI,IAAI;IAWlB,aAAa,CACjB,UAAU,EAAE,UAAU,EACtB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,EAC1B,eAAe,CAAC,EAAE,eAAe,GAChC,OAAO,CAAC;QACT,YAAY,EAAE,WAAW,EAAE,CAAC;QAC5B,UAAU,EAAE,kBAAkB,CAAC;QAC/B,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,MAAM,EAAE,KAAK,EAAE,CAAC;QAChB,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;QACxB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KACpC,CAAC;YAyFY,iBAAiB;IA6L/B,OAAO,CAAC,4BAA4B;IAsCpC,OAAO,CAAC,8BAA8B;IA2BtC,OAAO,CAAC,WAAW;IAmCnB,OAAO,CAAC,mBAAmB;IAkC3B,OAAO,CAAC,qBAAqB;IAoB7B,OAAO,CAAC,oBAAoB;IAuB5B,OAAO,CAAC,qBAAqB;YAwEf,uBAAuB;YA4FvB,6BAA6B;YAgW7B,oBAAoB;IAyHlC,OAAO,CAAC,kBAAkB;IAmD1B,OAAO,CAAC,gBAAgB;YA8BV,wBAAwB;IAoLtC,OAAO,CAAC,kCAAkC;IA6Y1C,OAAO,CAAC,qBAAqB;IAsP7B,OAAO,CAAC,4BAA4B;IAiHpC,OAAO,CAAC,oBAAoB;IAiC5B,OAAO,CAAC,0BAA0B;IA8IlC,OAAO,CAAC,wCAAwC;IA0EhD,OAAO,CAAC,0BAA0B;IAmHlC,OAAO,CAAC,kBAAkB;IAsN1B,OAAO,CAAC,wBAAwB;IAwLhC,OAAO,CAAC,0BAA0B;IAiIlC,OAAO,CAAC,8BAA8B;IActC,OAAO,CAAC,0BAA0B;IAwClC,OAAO,CAAC,4BAA4B;YAwLtB,sBAAsB;IAqNpC,OAAO,CAAC,iBAAiB;IA4CzB,OAAO,CAAC,kBAAkB;IA6D1B,OAAO,CAAC,cAAc;IAmBtB,OAAO,CAAC,iBAAiB;YAuBX,oBAAoB;IA4HlC,OAAO,CAAC,8BAA8B;YAuFxB,uBAAuB;IAyErC,OAAO,CAAC,iCAAiC;IAyDzC,OAAO,CAAC,sCAAsC;IA4D9C,OAAO,CAAC,2BAA2B;YAgCrB,wBAAwB;IAkOtC,OAAO,CAAC,yBAAyB;IA0GjC,OAAO,CAAC,UAAU;IAmBlB,OAAO,CAAC,cAAc;YAsCR,kBAAkB;IAqPhC,OAAO,CAAC,6BAA6B;IAwCrC,OAAO,CAAC,4BAA4B;IA6JpC,OAAO,CAAC,4BAA4B;IA6FpC,OAAO,CAAC,sBAAsB;IAmI9B,OAAO,CAAC,cAAc;IA2BtB,OAAO,CAAC,WAAW;IAgHnB,OAAO,CAAC,kBAAkB;IAoB1B,OAAO,CAAC,eAAe;IAuEvB,OAAO,CAAC,qBAAqB;IA8D7B,OAAO,CAAC,WAAW;IA8CnB,OAAO,CAAC,cAAc;IAqEtB,OAAO,CAAC,sBAAsB;IAoP9B,OAAO,CAAC,UAAU;IAwMlB,OAAO,CAAC,+BAA+B;IAqHvC,OAAO,CAAC,yBAAyB;IAyJjC,OAAO,CAAC,yBAAyB;IA+EjC,OAAO,CAAC,2BAA2B;IAqEnC,OAAO,CAAC,+BAA+B;IAmDvC,OAAO,CAAC,8BAA8B;IA8CtC,OAAO,CAAC,iCAAiC;IA6FzC,OAAO,CAAC,mBAAmB;IAqF3B,OAAO,CAAC,mBAAmB;IAqB3B,OAAO,CAAC,uBAAuB;IA6B/B,MAAM,CAAC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IA+BzE,MAAM,CAAC,SAAS,CACd,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,GACjB,OAAO;IAwBV,MAAM,CAAC,gBAAgB,CACrB,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,GACf,KAAK,CAAC;QACP,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;IAmFF,OAAO,CAAC,eAAe;IA4BjB,sBAAsB,CAC1B,UAAU,EAAE,UAAU,EACtB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC;QACT,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;QACH,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;KACJ,CAAC;YA4FY,WAAW;YA6DX,WAAW;CAmD1B"}
@@ -34,6 +34,7 @@ var __importStar = (this && this.__importStar) || (function () {
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
36
  exports.DocumentParser = void 0;
37
+ const Bookmark_1 = require("../elements/Bookmark");
37
38
  const Field_1 = require("../elements/Field");
38
39
  const FieldHelpers_1 = require("../elements/FieldHelpers");
39
40
  const Footer_1 = require("../elements/Footer");
@@ -65,6 +66,7 @@ const RelationshipManager_1 = require("./RelationshipManager");
65
66
  class DocumentParser {
66
67
  parseErrors = [];
67
68
  strictParsing;
69
+ bookmarkManager = null;
68
70
  constructor(strictParsing = false) {
69
71
  this.strictParsing = strictParsing;
70
72
  }
@@ -74,9 +76,10 @@ class DocumentParser {
74
76
  clearParseErrors() {
75
77
  this.parseErrors = [];
76
78
  }
77
- async parseDocument(zipHandler, relationshipManager, imageManager) {
79
+ async parseDocument(zipHandler, relationshipManager, imageManager, bookmarkManager) {
78
80
  const logger = getLogger();
79
81
  logger.info('Parsing document');
82
+ this.bookmarkManager = bookmarkManager || null;
80
83
  const docXml = zipHandler.getFileAsString(types_1.DOCX_PATHS.DOCUMENT);
81
84
  if (!docXml) {
82
85
  logger.error('Invalid document: word/document.xml not found');
@@ -145,6 +148,34 @@ class DocumentParser {
145
148
  candidates.sort((a, b) => a.pos - b.pos);
146
149
  const next = candidates[0];
147
150
  if (next) {
151
+ if (bodyElements.length > 0 && next.pos > pos) {
152
+ const bookmarkEnds = this.extractBodyLevelBookmarkEnds(bodyContent, pos, next.pos);
153
+ if (bookmarkEnds.length > 0) {
154
+ const prevElement = bodyElements[bodyElements.length - 1];
155
+ if (prevElement instanceof Paragraph_1.Paragraph) {
156
+ for (const bookmark of bookmarkEnds) {
157
+ prevElement.addBookmarkEnd(bookmark);
158
+ }
159
+ }
160
+ else if (prevElement instanceof Table_1.Table) {
161
+ const rows = prevElement.getRows();
162
+ const lastRow = rows[rows.length - 1];
163
+ if (lastRow) {
164
+ const cells = lastRow.getCells();
165
+ const lastCell = cells[cells.length - 1];
166
+ if (lastCell) {
167
+ const cellParas = lastCell.getParagraphs();
168
+ const lastPara = cellParas[cellParas.length - 1];
169
+ if (lastPara) {
170
+ for (const bookmark of bookmarkEnds) {
171
+ lastPara.addBookmarkEnd(bookmark);
172
+ }
173
+ }
174
+ }
175
+ }
176
+ }
177
+ }
178
+ }
148
179
  if (next.type === "p") {
149
180
  const elementXml = this.extractSingleElement(bodyContent, "w:p", next.pos);
150
181
  if (elementXml) {
@@ -189,10 +220,81 @@ class DocumentParser {
189
220
  }
190
221
  }
191
222
  }
223
+ if (bodyElements.length > 0 && pos < bodyContent.length) {
224
+ const trailingBookmarkEnds = this.extractBodyLevelBookmarkEnds(bodyContent, pos, -1);
225
+ if (trailingBookmarkEnds.length > 0) {
226
+ const lastElement = bodyElements[bodyElements.length - 1];
227
+ if (lastElement instanceof Paragraph_1.Paragraph) {
228
+ for (const bookmark of trailingBookmarkEnds) {
229
+ lastElement.addBookmarkEnd(bookmark);
230
+ }
231
+ }
232
+ else if (lastElement instanceof Table_1.Table) {
233
+ const rows = lastElement.getRows();
234
+ const lastRow = rows[rows.length - 1];
235
+ if (lastRow) {
236
+ const cells = lastRow.getCells();
237
+ const lastCell = cells[cells.length - 1];
238
+ if (lastCell) {
239
+ const cellParas = lastCell.getParagraphs();
240
+ const lastPara = cellParas[cellParas.length - 1];
241
+ if (lastPara) {
242
+ for (const bookmark of trailingBookmarkEnds) {
243
+ lastPara.addBookmarkEnd(bookmark);
244
+ }
245
+ }
246
+ }
247
+ }
248
+ }
249
+ }
250
+ }
192
251
  this.assembleMultiParagraphFields(bodyElements);
193
252
  this.validateLoadedContent(bodyElements);
194
253
  return bodyElements;
195
254
  }
255
+ extractBodyLevelBookmarkEnds(content, startPos, endPos) {
256
+ const bookmarks = [];
257
+ const searchContent = endPos === -1
258
+ ? content.slice(startPos)
259
+ : content.slice(startPos, endPos);
260
+ const bookmarkEndRegex = /<w:bookmarkEnd[^>]*w:id="(\d+)"[^>]*\/?>/g;
261
+ let match;
262
+ while ((match = bookmarkEndRegex.exec(searchContent)) !== null) {
263
+ const idStr = match[1];
264
+ if (idStr) {
265
+ const id = parseInt(idStr, 10);
266
+ if (!isNaN(id)) {
267
+ const bookmark = new Bookmark_1.Bookmark({
268
+ name: `_end_${id}`,
269
+ id: id,
270
+ skipNormalization: true,
271
+ });
272
+ bookmarks.push(bookmark);
273
+ }
274
+ }
275
+ }
276
+ return bookmarks;
277
+ }
278
+ extractBookmarkEndsFromContent(content) {
279
+ const bookmarks = [];
280
+ const bookmarkEndRegex = /<w:bookmarkEnd[^>]*w:id="(\d+)"[^>]*\/?>/g;
281
+ let match;
282
+ while ((match = bookmarkEndRegex.exec(content)) !== null) {
283
+ const idStr = match[1];
284
+ if (idStr) {
285
+ const id = parseInt(idStr, 10);
286
+ if (!isNaN(id)) {
287
+ const bookmark = new Bookmark_1.Bookmark({
288
+ name: `_end_${id}`,
289
+ id: id,
290
+ skipNormalization: true,
291
+ });
292
+ bookmarks.push(bookmark);
293
+ }
294
+ }
295
+ }
296
+ return bookmarks;
297
+ }
196
298
  findNextTag(content, tagName, startPos) {
197
299
  const tag = `<${tagName}`;
198
300
  let pos = content.indexOf(tag, startPos);
@@ -344,6 +446,8 @@ class DocumentParser {
344
446
  let delIndex = 0;
345
447
  let moveFromIndex = 0;
346
448
  let moveToIndex = 0;
449
+ let bookmarkStartIndex = 0;
450
+ let bookmarkEndIndex = 0;
347
451
  const findClosingTagEnd = (content, tagName, startPos) => {
348
452
  const closingTag = `</${tagName}>`;
349
453
  const closingPos = content.indexOf(closingTag, startPos);
@@ -417,6 +521,22 @@ class DocumentParser {
417
521
  });
418
522
  searchPos = selfClosing ? tagEnd + 1 : findClosingTagEnd(paraContent, "w:moveTo", tagEnd);
419
523
  }
524
+ else if (tagName === "w:bookmarkStart") {
525
+ children.push({
526
+ type: "w:bookmarkStart",
527
+ pos: tagStart,
528
+ index: bookmarkStartIndex++,
529
+ });
530
+ searchPos = tagEnd + 1;
531
+ }
532
+ else if (tagName === "w:bookmarkEnd") {
533
+ children.push({
534
+ type: "w:bookmarkEnd",
535
+ pos: tagStart,
536
+ index: bookmarkEndIndex++,
537
+ });
538
+ searchPos = tagEnd + 1;
539
+ }
420
540
  else {
421
541
  searchPos = tagEnd + 1;
422
542
  }
@@ -425,6 +545,8 @@ class DocumentParser {
425
545
  const delXmls = XMLParser_1.XMLParser.extractElements(paraContent, "w:del");
426
546
  const moveFromXmls = XMLParser_1.XMLParser.extractElements(paraContent, "w:moveFrom");
427
547
  const moveToXmls = XMLParser_1.XMLParser.extractElements(paraContent, "w:moveTo");
548
+ const bookmarkStartXmls = XMLParser_1.XMLParser.extractElements(paraContent, "w:bookmarkStart");
549
+ const bookmarkEndXmls = XMLParser_1.XMLParser.extractElements(paraContent, "w:bookmarkEnd");
428
550
  const extractRunXmlAtPosition = (pos) => {
429
551
  const closeTag = "</w:r>";
430
552
  let depth = 1;
@@ -492,9 +614,15 @@ class DocumentParser {
492
614
  ? [hyperlinks]
493
615
  : [];
494
616
  if (child.index < hyperlinkArray.length) {
495
- const hyperlink = this.parseHyperlinkFromObject(hyperlinkArray[child.index], relationshipManager);
496
- if (hyperlink) {
497
- paragraph.addHyperlink(hyperlink);
617
+ const result = this.parseHyperlinkFromObject(hyperlinkArray[child.index], relationshipManager);
618
+ if (result.hyperlink) {
619
+ paragraph.addHyperlink(result.hyperlink);
620
+ }
621
+ for (const bookmark of result.bookmarkStarts) {
622
+ paragraph.addBookmarkStart(bookmark);
623
+ }
624
+ for (const bookmark of result.bookmarkEnds) {
625
+ paragraph.addBookmarkEnd(bookmark);
498
626
  }
499
627
  }
500
628
  }
@@ -556,6 +684,28 @@ class DocumentParser {
556
684
  }
557
685
  }
558
686
  }
687
+ else if (child.type === "w:bookmarkStart") {
688
+ if (child.index < bookmarkStartXmls.length) {
689
+ const bookmarkXml = bookmarkStartXmls[child.index];
690
+ if (bookmarkXml) {
691
+ const bookmark = this.parseBookmarkStart(bookmarkXml);
692
+ if (bookmark) {
693
+ paragraph.addBookmarkStart(bookmark);
694
+ }
695
+ }
696
+ }
697
+ }
698
+ else if (child.type === "w:bookmarkEnd") {
699
+ if (child.index < bookmarkEndXmls.length) {
700
+ const bookmarkXml = bookmarkEndXmls[child.index];
701
+ if (bookmarkXml) {
702
+ const bookmark = this.parseBookmarkEnd(bookmarkXml);
703
+ if (bookmark) {
704
+ paragraph.addBookmarkEnd(bookmark);
705
+ }
706
+ }
707
+ }
708
+ }
559
709
  }
560
710
  }
561
711
  async parseRevisionFromXml(revisionXml, tagName, relationshipManager, zipHandler, imageManager) {
@@ -609,9 +759,9 @@ class DocumentParser {
609
759
  }
610
760
  for (const hyperlinkXml of hyperlinkXmls) {
611
761
  const hyperlinkObj = XMLParser_1.XMLParser.parseToObject(hyperlinkXml, { trimValues: false });
612
- const hyperlink = this.parseHyperlinkFromObject(hyperlinkObj["w:hyperlink"], relationshipManager);
613
- if (hyperlink) {
614
- content.push(hyperlink);
762
+ const result = this.parseHyperlinkFromObject(hyperlinkObj["w:hyperlink"], relationshipManager);
763
+ if (result.hyperlink) {
764
+ content.push(result.hyperlink);
615
765
  }
616
766
  }
617
767
  if (content.length === 0) {
@@ -634,6 +784,57 @@ class DocumentParser {
634
784
  return null;
635
785
  }
636
786
  }
787
+ parseBookmarkStart(bookmarkXml) {
788
+ try {
789
+ const idAttr = XMLParser_1.XMLParser.extractAttribute(bookmarkXml, "w:id");
790
+ const nameAttr = XMLParser_1.XMLParser.extractAttribute(bookmarkXml, "w:name");
791
+ if (!idAttr || !nameAttr) {
792
+ return null;
793
+ }
794
+ const id = parseInt(idAttr, 10);
795
+ const bookmark = new Bookmark_1.Bookmark({
796
+ name: nameAttr,
797
+ id: id,
798
+ skipNormalization: true,
799
+ });
800
+ if (this.bookmarkManager) {
801
+ try {
802
+ this.bookmarkManager.registerExisting(bookmark);
803
+ }
804
+ catch (e) {
805
+ logger_1.defaultLogger.debug("[DocumentParser] Bookmark already registered:", { name: nameAttr, id: id });
806
+ }
807
+ }
808
+ return bookmark;
809
+ }
810
+ catch (error) {
811
+ logger_1.defaultLogger.warn("[DocumentParser] Failed to parse bookmark start:", error instanceof Error
812
+ ? { message: error.message }
813
+ : { error: String(error) });
814
+ return null;
815
+ }
816
+ }
817
+ parseBookmarkEnd(bookmarkXml) {
818
+ try {
819
+ const idAttr = XMLParser_1.XMLParser.extractAttribute(bookmarkXml, "w:id");
820
+ if (!idAttr) {
821
+ return null;
822
+ }
823
+ const id = parseInt(idAttr, 10);
824
+ const bookmark = new Bookmark_1.Bookmark({
825
+ name: `_end_${id}`,
826
+ id: id,
827
+ skipNormalization: true,
828
+ });
829
+ return bookmark;
830
+ }
831
+ catch (error) {
832
+ logger_1.defaultLogger.warn("[DocumentParser] Failed to parse bookmark end:", error instanceof Error
833
+ ? { message: error.message }
834
+ : { error: String(error) });
835
+ return null;
836
+ }
837
+ }
637
838
  async parseParagraphFromObject(paraObj, relationshipManager, zipHandler, imageManager) {
638
839
  try {
639
840
  const paragraph = new Paragraph_1.Paragraph();
@@ -676,9 +877,15 @@ class DocumentParser {
676
877
  ? [hyperlinks]
677
878
  : [];
678
879
  if (elementIndex < hyperlinkArray.length) {
679
- const hyperlink = this.parseHyperlinkFromObject(hyperlinkArray[elementIndex], relationshipManager);
680
- if (hyperlink) {
681
- paragraph.addHyperlink(hyperlink);
880
+ const result = this.parseHyperlinkFromObject(hyperlinkArray[elementIndex], relationshipManager);
881
+ if (result.hyperlink) {
882
+ paragraph.addHyperlink(result.hyperlink);
883
+ }
884
+ for (const bookmark of result.bookmarkStarts) {
885
+ paragraph.addBookmarkStart(bookmark);
886
+ }
887
+ for (const bookmark of result.bookmarkEnds) {
888
+ paragraph.addBookmarkEnd(bookmark);
682
889
  }
683
890
  }
684
891
  }
@@ -724,9 +931,15 @@ class DocumentParser {
724
931
  ? [hyperlinks]
725
932
  : [];
726
933
  for (const hyperlinkObj of hyperlinkChildren) {
727
- const hyperlink = this.parseHyperlinkFromObject(hyperlinkObj, relationshipManager);
728
- if (hyperlink) {
729
- paragraph.addHyperlink(hyperlink);
934
+ const result = this.parseHyperlinkFromObject(hyperlinkObj, relationshipManager);
935
+ if (result.hyperlink) {
936
+ paragraph.addHyperlink(result.hyperlink);
937
+ }
938
+ for (const bookmark of result.bookmarkStarts) {
939
+ paragraph.addBookmarkStart(bookmark);
940
+ }
941
+ for (const bookmark of result.bookmarkEnds) {
942
+ paragraph.addBookmarkEnd(bookmark);
730
943
  }
731
944
  }
732
945
  const fields = paraObj["w:fldSimple"];
@@ -1731,7 +1944,48 @@ class DocumentParser {
1731
1944
  }
1732
1945
  }
1733
1946
  parseHyperlinkFromObject(hyperlinkObj, relationshipManager) {
1947
+ const result = { hyperlink: null, bookmarkStarts: [], bookmarkEnds: [] };
1734
1948
  try {
1949
+ if (hyperlinkObj["w:bookmarkStart"]) {
1950
+ const bookmarkStarts = Array.isArray(hyperlinkObj["w:bookmarkStart"])
1951
+ ? hyperlinkObj["w:bookmarkStart"]
1952
+ : [hyperlinkObj["w:bookmarkStart"]];
1953
+ for (const bs of bookmarkStarts) {
1954
+ const id = bs["@_w:id"];
1955
+ const name = bs["@_w:name"];
1956
+ if (id !== undefined && name) {
1957
+ const bookmark = new Bookmark_1.Bookmark({
1958
+ name: name,
1959
+ id: typeof id === "number" ? id : parseInt(id, 10),
1960
+ skipNormalization: true,
1961
+ });
1962
+ result.bookmarkStarts.push(bookmark);
1963
+ if (this.bookmarkManager) {
1964
+ try {
1965
+ this.bookmarkManager.registerExisting(bookmark);
1966
+ }
1967
+ catch {
1968
+ }
1969
+ }
1970
+ }
1971
+ }
1972
+ }
1973
+ if (hyperlinkObj["w:bookmarkEnd"]) {
1974
+ const bookmarkEnds = Array.isArray(hyperlinkObj["w:bookmarkEnd"])
1975
+ ? hyperlinkObj["w:bookmarkEnd"]
1976
+ : [hyperlinkObj["w:bookmarkEnd"]];
1977
+ for (const be of bookmarkEnds) {
1978
+ const id = be["@_w:id"];
1979
+ if (id !== undefined) {
1980
+ const bookmark = new Bookmark_1.Bookmark({
1981
+ name: `_end_${id}`,
1982
+ id: typeof id === "number" ? id : parseInt(id, 10),
1983
+ skipNormalization: true,
1984
+ });
1985
+ result.bookmarkEnds.push(bookmark);
1986
+ }
1987
+ }
1988
+ }
1735
1989
  const relationshipId = hyperlinkObj["@_r:id"];
1736
1990
  const anchor = hyperlinkObj["@_w:anchor"];
1737
1991
  const tooltip = hyperlinkObj["@_w:tooltip"];
@@ -1793,13 +2047,14 @@ class DocumentParser {
1793
2047
  if (parsedRun && parsedRun.getContent().length > 1) {
1794
2048
  hyperlink.setRun(parsedRun);
1795
2049
  }
1796
- return hyperlink;
2050
+ result.hyperlink = hyperlink;
2051
+ return result;
1797
2052
  }
1798
2053
  catch (error) {
1799
2054
  logger_1.defaultLogger.warn("[DocumentParser] Failed to parse hyperlink:", error instanceof Error
1800
2055
  ? { message: error.message, stack: error.stack }
1801
2056
  : { error: String(error) });
1802
- return null;
2057
+ return result;
1803
2058
  }
1804
2059
  }
1805
2060
  mergeConsecutiveHyperlinks(paragraph, resetFormatting = false) {
@@ -2373,12 +2628,49 @@ class DocumentParser {
2373
2628
  }
2374
2629
  rowXmls = XMLParser_1.XMLParser.extractElements(rawTableXml, "w:tr");
2375
2630
  }
2631
+ const rowPositions = [];
2632
+ if (rawTableXml) {
2633
+ let searchPos = 0;
2634
+ for (const rowXml of rowXmls) {
2635
+ const rowStart = rawTableXml.indexOf(rowXml, searchPos);
2636
+ if (rowStart !== -1) {
2637
+ rowPositions.push({
2638
+ start: rowStart,
2639
+ end: rowStart + rowXml.length,
2640
+ });
2641
+ searchPos = rowStart + rowXml.length;
2642
+ }
2643
+ }
2644
+ }
2376
2645
  for (let i = 0; i < rowChildren.length; i++) {
2377
2646
  const rowObj = rowChildren[i];
2378
2647
  const rawRowXml = i < rowXmls.length ? rowXmls[i] : undefined;
2379
2648
  const row = await this.parseTableRowFromObject(rowObj, relationshipManager, zipHandler, imageManager, rawRowXml);
2380
2649
  if (row) {
2381
2650
  table.addRow(row);
2651
+ if (rawTableXml && i < rowPositions.length) {
2652
+ const currentRowEnd = rowPositions[i]?.end || 0;
2653
+ const nextRowStart = i + 1 < rowPositions.length
2654
+ ? rowPositions[i + 1]?.start
2655
+ : rawTableXml.length;
2656
+ if (nextRowStart && currentRowEnd < nextRowStart) {
2657
+ const betweenContent = rawTableXml.slice(currentRowEnd, nextRowStart);
2658
+ const bookmarkEnds = this.extractBookmarkEndsFromContent(betweenContent);
2659
+ if (bookmarkEnds.length > 0) {
2660
+ const cells = row.getCells();
2661
+ const lastCell = cells[cells.length - 1];
2662
+ if (lastCell) {
2663
+ const cellParas = lastCell.getParagraphs();
2664
+ const lastPara = cellParas[cellParas.length - 1];
2665
+ if (lastPara) {
2666
+ for (const bookmark of bookmarkEnds) {
2667
+ lastPara.addBookmarkEnd(bookmark);
2668
+ }
2669
+ }
2670
+ }
2671
+ }
2672
+ }
2673
+ }
2382
2674
  }
2383
2675
  }
2384
2676
  return table;
@@ -2728,17 +3020,27 @@ class DocumentParser {
2728
3020
  if (rawCellXml) {
2729
3021
  const cellContent = this.extractCellContentInOrder(rawCellXml);
2730
3022
  let paragraphIndex = 0;
3023
+ let lastParagraph = null;
2731
3024
  for (const item of cellContent) {
2732
3025
  if (item.type === "paragraph") {
2733
3026
  const paragraph = await this.parseParagraphWithOrder(item.xml, relationshipManager, zipHandler, imageManager);
2734
3027
  if (paragraph) {
2735
3028
  cell.addParagraph(paragraph);
3029
+ lastParagraph = paragraph;
2736
3030
  paragraphIndex++;
2737
3031
  }
2738
3032
  }
2739
3033
  else if (item.type === "table" || item.type === "sdt") {
2740
3034
  cell.addRawNestedContent(paragraphIndex, item.xml, item.type);
2741
3035
  }
3036
+ else if (item.type === "bookmarkEnd") {
3037
+ if (lastParagraph) {
3038
+ const bookmarkEnds = this.extractBookmarkEndsFromContent(item.xml);
3039
+ for (const bookmark of bookmarkEnds) {
3040
+ lastParagraph.addBookmarkEnd(bookmark);
3041
+ }
3042
+ }
3043
+ }
2742
3044
  }
2743
3045
  }
2744
3046
  else {
@@ -2783,6 +3085,7 @@ class DocumentParser {
2783
3085
  const pStart = content.indexOf("<w:p", pos);
2784
3086
  const tblStart = content.indexOf("<w:tbl", pos);
2785
3087
  const sdtStart = content.indexOf("<w:sdt", pos);
3088
+ const bookmarkEndStart = content.indexOf("<w:bookmarkEnd", pos);
2786
3089
  let nextStart = -1;
2787
3090
  let nextType = null;
2788
3091
  let nextTag = "";
@@ -2807,16 +3110,34 @@ class DocumentParser {
2807
3110
  nextType = "sdt";
2808
3111
  nextTag = "w:sdt";
2809
3112
  }
3113
+ if (bookmarkEndStart !== -1 &&
3114
+ (nextStart === -1 || bookmarkEndStart < nextStart)) {
3115
+ nextStart = bookmarkEndStart;
3116
+ nextType = "bookmarkEnd";
3117
+ nextTag = "w:bookmarkEnd";
3118
+ }
2810
3119
  if (nextStart === -1 || nextType === null)
2811
3120
  break;
2812
- const elementEnd = this.findClosingTag(content, nextTag, nextStart);
2813
- if (elementEnd === -1) {
2814
- pos = nextStart + 1;
2815
- continue;
3121
+ if (nextType === "bookmarkEnd") {
3122
+ const elementEnd = content.indexOf(">", nextStart) + 1;
3123
+ if (elementEnd === 0) {
3124
+ pos = nextStart + 1;
3125
+ continue;
3126
+ }
3127
+ const elementXml = content.substring(nextStart, elementEnd);
3128
+ result.push({ type: nextType, xml: elementXml });
3129
+ pos = elementEnd;
3130
+ }
3131
+ else {
3132
+ const elementEnd = this.findClosingTag(content, nextTag, nextStart);
3133
+ if (elementEnd === -1) {
3134
+ pos = nextStart + 1;
3135
+ continue;
3136
+ }
3137
+ const elementXml = content.substring(nextStart, elementEnd);
3138
+ result.push({ type: nextType, xml: elementXml });
3139
+ pos = elementEnd;
2816
3140
  }
2817
- const elementXml = content.substring(nextStart, elementEnd);
2818
- result.push({ type: nextType, xml: elementXml });
2819
- pos = elementEnd;
2820
3141
  }
2821
3142
  return result;
2822
3143
  }