docxmlater 4.5.4 → 4.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/Document.d.ts +1 -0
- package/dist/core/Document.d.ts.map +1 -1
- package/dist/core/Document.js +4 -1
- package/dist/core/Document.js.map +1 -1
- package/dist/core/DocumentParser.d.ts +1 -0
- package/dist/core/DocumentParser.d.ts.map +1 -1
- package/dist/core/DocumentParser.js +86 -87
- package/dist/core/DocumentParser.js.map +1 -1
- package/dist/xml/XMLParser.d.ts.map +1 -1
- package/dist/xml/XMLParser.js +0 -13
- package/dist/xml/XMLParser.js.map +1 -1
- package/package.json +1 -1
|
@@ -38,6 +38,7 @@ export declare class DocumentParser {
|
|
|
38
38
|
private extractSingleElement;
|
|
39
39
|
private validateLoadedContent;
|
|
40
40
|
private parseParagraphWithOrder;
|
|
41
|
+
private parseOrderedParagraphChildren;
|
|
41
42
|
private parseParagraphFromObject;
|
|
42
43
|
private parseParagraphPropertiesFromObject;
|
|
43
44
|
private assembleComplexFields;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"DocumentParser.d.ts","sourceRoot":"","sources":["../../src/core/DocumentParser.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,SAAS,EAAuB,MAAM,uBAAuB,CAAC;AAEvE,OAAO,EAEL,OAAO,EAGR,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mCAAmC,CAAC;AAC1E,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAG1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAE5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,KAAK,EAA8B,MAAM,qBAAqB,CAAC;AASxE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAK5D,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;CACd;AAKD,KAAK,WAAW,GACZ,SAAS,GACT,KAAK,GACL,sBAAsB,GACtB,qBAAqB,CAAC;AAK1B,qBAAa,cAAc;IACzB,OAAO,CAAC,WAAW,CAAoB;IACvC,OAAO,CAAC,aAAa,CAAU;gBAEnB,aAAa,GAAE,OAAe;IAO1C,cAAc,IAAI,UAAU,EAAE;IAO9B,gBAAgB,IAAI,IAAI;IAWlB,aAAa,CACjB,UAAU,EAAE,UAAU,EACtB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC;QACT,YAAY,EAAE,WAAW,EAAE,CAAC;QAC5B,UAAU,EAAE,kBAAkB,CAAC;QAC/B,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,MAAM,EAAE,KAAK,EAAE,CAAC;QAChB,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;QACxB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KACpC,CAAC;YAyDY,iBAAiB;IAgH/B,OAAO,CAAC,WAAW;IAmCnB,OAAO,CAAC,mBAAmB;IAkC3B,OAAO,CAAC,qBAAqB;IAoB7B,OAAO,CAAC,oBAAoB;IAuB5B,OAAO,CAAC,qBAAqB;YAwEf,uBAAuB;
|
|
1
|
+
{"version":3,"file":"DocumentParser.d.ts","sourceRoot":"","sources":["../../src/core/DocumentParser.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,SAAS,EAAuB,MAAM,uBAAuB,CAAC;AAEvE,OAAO,EAEL,OAAO,EAGR,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mCAAmC,CAAC;AAC1E,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAG1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAE5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,KAAK,EAA8B,MAAM,qBAAqB,CAAC;AASxE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAK5D,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;CACd;AAKD,KAAK,WAAW,GACZ,SAAS,GACT,KAAK,GACL,sBAAsB,GACtB,qBAAqB,CAAC;AAK1B,qBAAa,cAAc;IACzB,OAAO,CAAC,WAAW,CAAoB;IACvC,OAAO,CAAC,aAAa,CAAU;gBAEnB,aAAa,GAAE,OAAe;IAO1C,cAAc,IAAI,UAAU,EAAE;IAO9B,gBAAgB,IAAI,IAAI;IAWlB,aAAa,CACjB,UAAU,EAAE,UAAU,EACtB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC;QACT,YAAY,EAAE,WAAW,EAAE,CAAC;QAC5B,UAAU,EAAE,kBAAkB,CAAC;QAC/B,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,MAAM,EAAE,KAAK,EAAE,CAAC;QAChB,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;QACxB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KACpC,CAAC;YAyDY,iBAAiB;IAgH/B,OAAO,CAAC,WAAW;IAmCnB,OAAO,CAAC,mBAAmB;IAkC3B,OAAO,CAAC,qBAAqB;IAoB7B,OAAO,CAAC,oBAAoB;IAuB5B,OAAO,CAAC,qBAAqB;YAwEf,uBAAuB;YAqFvB,6BAA6B;YAiI7B,wBAAwB;IAsKtC,OAAO,CAAC,kCAAkC;IAsU1C,OAAO,CAAC,qBAAqB;IAuH7B,OAAO,CAAC,0BAA0B;IAkHlC,OAAO,CAAC,kBAAkB;IAsN1B,OAAO,CAAC,wBAAwB;IAiGhC,OAAO,CAAC,0BAA0B;IAiIlC,OAAO,CAAC,8BAA8B;IActC,OAAO,CAAC,0BAA0B;IAwClC,OAAO,CAAC,4BAA4B;YAmLtB,sBAAsB;IAwLpC,OAAO,CAAC,iBAAiB;IA2CzB,OAAO,CAAC,kBAAkB;IA6D1B,OAAO,CAAC,cAAc;IAmBtB,OAAO,CAAC,iBAAiB;YAuBX,oBAAoB;IA2DlC,OAAO,CAAC,8BAA8B;YAuFxB,uBAAuB;IAyDrC,OAAO,CAAC,iCAAiC;IAyDzC,OAAO,CAAC,sCAAsC;IA4D9C,OAAO,CAAC,2BAA2B;YAgCrB,wBAAwB;YAoLxB,kBAAkB;IAqPhC,OAAO,CAAC,6BAA6B;IAwCrC,OAAO,CAAC,4BAA4B;IA6JpC,OAAO,CAAC,4BAA4B;IA8FpC,OAAO,CAAC,sBAAsB;IA8H9B,OAAO,CAAC,cAAc;IA2BtB,OAAO,CAAC,WAAW;IAgHnB,OAAO,CAAC,kBAAkB;IAoB1B,OAAO,CAAC,eAAe;IAuEvB,OAAO,CAAC,qBAAqB;IA8D7B,OAAO,CAAC,WAAW;IA8CnB,OAAO,CAAC,cAAc;IAqEtB,OAAO,CAAC,sBAAsB;IAoP9B,OAAO,CAAC,UAAU;IAgLlB,OAAO,CAAC,+BAA+B;IA4GvC,OAAO,CAAC,yBAAyB;IAyJjC,OAAO,CAAC,yBAAyB;IA+EjC,OAAO,CAAC,2BAA2B;IAqEnC,OAAO,CAAC,+BAA+B;IAmDvC,OAAO,CAAC,8BAA8B;IA8CtC,OAAO,CAAC,iCAAiC;IA6FzC,OAAO,CAAC,mBAAmB;IAqF3B,OAAO,CAAC,mBAAmB;IAqB3B,OAAO,CAAC,uBAAuB;IA6B/B,MAAM,CAAC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IA+BzE,MAAM,CAAC,SAAS,CACd,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,GACjB,OAAO;IAwBV,MAAM,CAAC,gBAAgB,CACrB,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,GACf,KAAK,CAAC;QACP,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;IA6DF,OAAO,CAAC,eAAe;IAkCjB,sBAAsB,CAC1B,UAAU,EAAE,UAAU,EACtB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC;QACT,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;QACH,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;KACJ,CAAC;YA4FY,WAAW;YA2DX,WAAW;CAiD1B"}
|
|
@@ -267,127 +267,126 @@ class DocumentParser {
|
|
|
267
267
|
if (paraId) {
|
|
268
268
|
paragraph.formatting.paraId = paraId;
|
|
269
269
|
}
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
270
|
+
await this.parseOrderedParagraphChildren(paraXml, pElement, paragraph, relationshipManager, zipHandler, imageManager);
|
|
271
|
+
this.assembleComplexFields(paragraph);
|
|
272
|
+
const runs = paragraph.getRuns();
|
|
273
|
+
const runData = runs.map((run) => ({
|
|
274
|
+
text: run.getText(),
|
|
275
|
+
rtl: run.getFormatting().rtl,
|
|
276
|
+
}));
|
|
277
|
+
const bidi = paragraph.getFormatting().bidi;
|
|
278
|
+
(0, diagnostics_1.logParagraphContent)("parsing", -1, runData, bidi);
|
|
279
|
+
if (bidi) {
|
|
280
|
+
(0, diagnostics_1.logTextDirection)(`Paragraph has BiDi enabled`);
|
|
281
|
+
}
|
|
282
|
+
this.mergeConsecutiveHyperlinks(paragraph);
|
|
283
|
+
return paragraph;
|
|
284
|
+
}
|
|
285
|
+
catch (error) {
|
|
286
|
+
const err = error instanceof Error ? error : new Error(String(error));
|
|
287
|
+
this.parseErrors.push({ element: "paragraph", error: err });
|
|
288
|
+
if (this.strictParsing) {
|
|
289
|
+
throw new Error(`Failed to parse paragraph: ${err.message}`);
|
|
290
|
+
}
|
|
291
|
+
return null;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
async parseOrderedParagraphChildren(paraXml, pElement, paragraph, relationshipManager, zipHandler, imageManager) {
|
|
295
|
+
const pPrEnd = paraXml.indexOf("</w:pPr>");
|
|
296
|
+
const contentStart = pPrEnd !== -1 ? pPrEnd + 8 : paraXml.indexOf(">") + 1;
|
|
297
|
+
const contentEnd = paraXml.lastIndexOf("</w:p>");
|
|
298
|
+
if (contentEnd <= contentStart) {
|
|
299
|
+
return;
|
|
300
|
+
}
|
|
301
|
+
const paraContent = paraXml.substring(contentStart, contentEnd);
|
|
302
|
+
const children = [];
|
|
303
|
+
let runIndex = 0;
|
|
304
|
+
let hyperlinkIndex = 0;
|
|
305
|
+
let fieldIndex = 0;
|
|
306
|
+
let searchPos = 0;
|
|
307
|
+
while (searchPos < paraContent.length) {
|
|
308
|
+
const tagStart = paraContent.indexOf("<", searchPos);
|
|
309
|
+
if (tagStart === -1)
|
|
310
|
+
break;
|
|
311
|
+
const tagEnd = paraContent.indexOf(">", tagStart);
|
|
312
|
+
if (tagEnd === -1)
|
|
313
|
+
break;
|
|
314
|
+
const tagContent = paraContent.substring(tagStart + 1, tagEnd);
|
|
315
|
+
const tagName = tagContent.split(/[\s\/>]/)[0];
|
|
316
|
+
if (tagName === "w:r") {
|
|
317
|
+
children.push({ type: "w:r", pos: tagStart, index: runIndex++ });
|
|
318
|
+
searchPos = tagEnd + 1;
|
|
319
|
+
}
|
|
320
|
+
else if (tagName === "w:hyperlink") {
|
|
321
|
+
children.push({
|
|
322
|
+
type: "w:hyperlink",
|
|
323
|
+
pos: tagStart,
|
|
324
|
+
index: hyperlinkIndex++,
|
|
325
|
+
});
|
|
326
|
+
searchPos = tagEnd + 1;
|
|
327
|
+
}
|
|
328
|
+
else if (tagName === "w:fldSimple") {
|
|
329
|
+
children.push({
|
|
330
|
+
type: "w:fldSimple",
|
|
331
|
+
pos: tagStart,
|
|
332
|
+
index: fieldIndex++,
|
|
333
|
+
});
|
|
334
|
+
searchPos = tagEnd + 1;
|
|
325
335
|
}
|
|
326
336
|
else {
|
|
337
|
+
searchPos = tagEnd + 1;
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
for (const child of children) {
|
|
341
|
+
if (child.type === "w:r") {
|
|
327
342
|
const runs = pElement["w:r"];
|
|
328
|
-
const
|
|
329
|
-
|
|
330
|
-
|
|
343
|
+
const runArray = Array.isArray(runs) ? runs : runs ? [runs] : [];
|
|
344
|
+
if (child.index < runArray.length) {
|
|
345
|
+
const runObj = runArray[child.index];
|
|
346
|
+
if (runObj["w:drawing"]) {
|
|
331
347
|
if (zipHandler && imageManager) {
|
|
332
|
-
const imageRun = await this.parseDrawingFromObject(
|
|
348
|
+
const imageRun = await this.parseDrawingFromObject(runObj["w:drawing"], zipHandler, relationshipManager, imageManager);
|
|
333
349
|
if (imageRun) {
|
|
334
350
|
paragraph.addRun(imageRun);
|
|
335
351
|
}
|
|
336
352
|
}
|
|
337
353
|
}
|
|
338
354
|
else {
|
|
339
|
-
const run = this.parseRunFromObject(
|
|
355
|
+
const run = this.parseRunFromObject(runObj);
|
|
340
356
|
if (run) {
|
|
341
357
|
paragraph.addRun(run);
|
|
342
358
|
}
|
|
343
359
|
}
|
|
344
360
|
}
|
|
361
|
+
}
|
|
362
|
+
else if (child.type === "w:hyperlink") {
|
|
345
363
|
const hyperlinks = pElement["w:hyperlink"];
|
|
346
|
-
const
|
|
364
|
+
const hyperlinkArray = Array.isArray(hyperlinks)
|
|
347
365
|
? hyperlinks
|
|
348
366
|
: hyperlinks
|
|
349
367
|
? [hyperlinks]
|
|
350
368
|
: [];
|
|
351
|
-
|
|
352
|
-
const hyperlink = this.parseHyperlinkFromObject(
|
|
369
|
+
if (child.index < hyperlinkArray.length) {
|
|
370
|
+
const hyperlink = this.parseHyperlinkFromObject(hyperlinkArray[child.index], relationshipManager);
|
|
353
371
|
if (hyperlink) {
|
|
354
372
|
paragraph.addHyperlink(hyperlink);
|
|
355
373
|
}
|
|
356
374
|
}
|
|
375
|
+
}
|
|
376
|
+
else if (child.type === "w:fldSimple") {
|
|
357
377
|
const fields = pElement["w:fldSimple"];
|
|
358
|
-
const
|
|
378
|
+
const fieldArray = Array.isArray(fields)
|
|
359
379
|
? fields
|
|
360
380
|
: fields
|
|
361
381
|
? [fields]
|
|
362
382
|
: [];
|
|
363
|
-
|
|
364
|
-
const field = this.parseSimpleFieldFromObject(
|
|
383
|
+
if (child.index < fieldArray.length) {
|
|
384
|
+
const field = this.parseSimpleFieldFromObject(fieldArray[child.index]);
|
|
365
385
|
if (field) {
|
|
366
386
|
paragraph.addField(field);
|
|
367
387
|
}
|
|
368
388
|
}
|
|
369
389
|
}
|
|
370
|
-
this.assembleComplexFields(paragraph);
|
|
371
|
-
const runs = paragraph.getRuns();
|
|
372
|
-
const runData = runs.map((run) => ({
|
|
373
|
-
text: run.getText(),
|
|
374
|
-
rtl: run.getFormatting().rtl,
|
|
375
|
-
}));
|
|
376
|
-
const bidi = paragraph.getFormatting().bidi;
|
|
377
|
-
(0, diagnostics_1.logParagraphContent)("parsing", -1, runData, bidi);
|
|
378
|
-
if (bidi) {
|
|
379
|
-
(0, diagnostics_1.logTextDirection)(`Paragraph has BiDi enabled`);
|
|
380
|
-
}
|
|
381
|
-
this.mergeConsecutiveHyperlinks(paragraph);
|
|
382
|
-
return paragraph;
|
|
383
|
-
}
|
|
384
|
-
catch (error) {
|
|
385
|
-
const err = error instanceof Error ? error : new Error(String(error));
|
|
386
|
-
this.parseErrors.push({ element: "paragraph", error: err });
|
|
387
|
-
if (this.strictParsing) {
|
|
388
|
-
throw new Error(`Failed to parse paragraph: ${err.message}`);
|
|
389
|
-
}
|
|
390
|
-
return null;
|
|
391
390
|
}
|
|
392
391
|
}
|
|
393
392
|
async parseParagraphFromObject(paraObj, relationshipManager, zipHandler, imageManager) {
|