docxmlater 4.0.0 → 4.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/Document.d.ts +3 -1
- package/dist/core/Document.d.ts.map +1 -1
- package/dist/core/Document.js +108 -18
- package/dist/core/Document.js.map +1 -1
- package/dist/core/DocumentParser.d.ts +2 -0
- package/dist/core/DocumentParser.d.ts.map +1 -1
- package/dist/core/DocumentParser.js +104 -25
- package/dist/core/DocumentParser.js.map +1 -1
- package/package.json +1 -1
|
@@ -61,6 +61,8 @@ export declare class DocumentParser {
|
|
|
61
61
|
private parseTableBordersFromObject;
|
|
62
62
|
private parseTableCellFromObject;
|
|
63
63
|
private parseSDTFromObject;
|
|
64
|
+
private extractInstructionFromContent;
|
|
65
|
+
private extractInstructionFromRawXML;
|
|
64
66
|
private parseTOCFromSDTContent;
|
|
65
67
|
private parseListItems;
|
|
66
68
|
private objectToXml;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"DocumentParser.d.ts","sourceRoot":"","sources":["../../src/core/DocumentParser.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,SAAS,EAAuB,MAAM,uBAAuB,CAAC;AAEvE,OAAO,EAEL,OAAO,EAGR,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mCAAmC,CAAC;AAC1E,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAG1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAE5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,KAAK,EAA8B,MAAM,qBAAqB,CAAC;AASxE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAK5D,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;CACd;AAKD,KAAK,WAAW,GACZ,SAAS,GACT,KAAK,GACL,sBAAsB,GACtB,qBAAqB,CAAC;AAK1B,qBAAa,cAAc;IACzB,OAAO,CAAC,WAAW,CAAoB;IACvC,OAAO,CAAC,aAAa,CAAU;gBAEnB,aAAa,GAAE,OAAe;IAO1C,cAAc,IAAI,UAAU,EAAE;IAO9B,gBAAgB,IAAI,IAAI;IAWlB,aAAa,CACjB,UAAU,EAAE,UAAU,EACtB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC;QACT,YAAY,EAAE,WAAW,EAAE,CAAC;QAC5B,UAAU,EAAE,kBAAkB,CAAC;QAC/B,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,MAAM,EAAE,KAAK,EAAE,CAAC;QAChB,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;QACxB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KACpC,CAAC;YAyDY,iBAAiB;IAgH/B,OAAO,CAAC,WAAW;IAmCnB,OAAO,CAAC,mBAAmB;IAkC3B,OAAO,CAAC,qBAAqB;IAoB7B,OAAO,CAAC,oBAAoB;IAuB5B,OAAO,CAAC,qBAAqB;YAwEf,uBAAuB;YA8LvB,wBAAwB;IAsKtC,OAAO,CAAC,kCAAkC;IAsU1C,OAAO,CAAC,qBAAqB;IAuH7B,OAAO,CAAC,0BAA0B;IAkHlC,OAAO,CAAC,kBAAkB;IAsN1B,OAAO,CAAC,wBAAwB;IAiGhC,OAAO,CAAC,0BAA0B;IAiIlC,OAAO,CAAC,8BAA8B;IActC,OAAO,CAAC,0BAA0B;IAwClC,OAAO,CAAC,4BAA4B;YAmLtB,sBAAsB;IAwLpC,OAAO,CAAC,iBAAiB;IA2CzB,OAAO,CAAC,kBAAkB;IA6D1B,OAAO,CAAC,cAAc;IAmBtB,OAAO,CAAC,iBAAiB;YAuBX,oBAAoB;IA2DlC,OAAO,CAAC,8BAA8B;YAuFxB,uBAAuB;IAyDrC,OAAO,CAAC,iCAAiC;IAyDzC,OAAO,CAAC,sCAAsC;IA4D9C,OAAO,CAAC,2BAA2B;YAgCrB,wBAAwB;YAoLxB,kBAAkB;
|
|
1
|
+
{"version":3,"file":"DocumentParser.d.ts","sourceRoot":"","sources":["../../src/core/DocumentParser.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAExD,OAAO,EAAE,SAAS,EAAuB,MAAM,uBAAuB,CAAC;AAEvE,OAAO,EAEL,OAAO,EAGR,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,qBAAqB,EAAE,MAAM,mCAAmC,CAAC;AAC1E,OAAO,EAAE,KAAK,EAAE,MAAM,mBAAmB,CAAC;AAG1C,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAE5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,KAAK,EAA8B,MAAM,qBAAqB,CAAC;AASxE,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAK5D,MAAM,WAAW,UAAU;IACzB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,KAAK,CAAC;CACd;AAKD,KAAK,WAAW,GACZ,SAAS,GACT,KAAK,GACL,sBAAsB,GACtB,qBAAqB,CAAC;AAK1B,qBAAa,cAAc;IACzB,OAAO,CAAC,WAAW,CAAoB;IACvC,OAAO,CAAC,aAAa,CAAU;gBAEnB,aAAa,GAAE,OAAe;IAO1C,cAAc,IAAI,UAAU,EAAE;IAO9B,gBAAgB,IAAI,IAAI;IAWlB,aAAa,CACjB,UAAU,EAAE,UAAU,EACtB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC;QACT,YAAY,EAAE,WAAW,EAAE,CAAC;QAC5B,UAAU,EAAE,kBAAkB,CAAC;QAC/B,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,MAAM,EAAE,KAAK,EAAE,CAAC;QAChB,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,kBAAkB,EAAE,iBAAiB,EAAE,CAAC;QACxC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;QACxB,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KACpC,CAAC;YAyDY,iBAAiB;IAgH/B,OAAO,CAAC,WAAW;IAmCnB,OAAO,CAAC,mBAAmB;IAkC3B,OAAO,CAAC,qBAAqB;IAoB7B,OAAO,CAAC,oBAAoB;IAuB5B,OAAO,CAAC,qBAAqB;YAwEf,uBAAuB;YA8LvB,wBAAwB;IAsKtC,OAAO,CAAC,kCAAkC;IAsU1C,OAAO,CAAC,qBAAqB;IAuH7B,OAAO,CAAC,0BAA0B;IAkHlC,OAAO,CAAC,kBAAkB;IAsN1B,OAAO,CAAC,wBAAwB;IAiGhC,OAAO,CAAC,0BAA0B;IAiIlC,OAAO,CAAC,8BAA8B;IActC,OAAO,CAAC,0BAA0B;IAwClC,OAAO,CAAC,4BAA4B;YAmLtB,sBAAsB;IAwLpC,OAAO,CAAC,iBAAiB;IA2CzB,OAAO,CAAC,kBAAkB;IA6D1B,OAAO,CAAC,cAAc;IAmBtB,OAAO,CAAC,iBAAiB;YAuBX,oBAAoB;IA2DlC,OAAO,CAAC,8BAA8B;YAuFxB,uBAAuB;IAyDrC,OAAO,CAAC,iCAAiC;IAyDzC,OAAO,CAAC,sCAAsC;IA4D9C,OAAO,CAAC,2BAA2B;YAgCrB,wBAAwB;YAoLxB,kBAAkB;IAqPhC,OAAO,CAAC,6BAA6B;IAwCrC,OAAO,CAAC,4BAA4B;IAsJpC,OAAO,CAAC,sBAAsB;IA8G9B,OAAO,CAAC,cAAc;IA2BtB,OAAO,CAAC,WAAW;IAgHnB,OAAO,CAAC,kBAAkB;IAoB1B,OAAO,CAAC,eAAe;IAuEvB,OAAO,CAAC,qBAAqB;IA8D7B,OAAO,CAAC,WAAW;IA8CnB,OAAO,CAAC,cAAc;IAqEtB,OAAO,CAAC,sBAAsB;IAoP9B,OAAO,CAAC,UAAU;IAgLlB,OAAO,CAAC,+BAA+B;IA4GvC,OAAO,CAAC,yBAAyB;IAyJjC,OAAO,CAAC,yBAAyB;IA+EjC,OAAO,CAAC,2BAA2B;IAqEnC,OAAO,CAAC,+BAA+B;IAmDvC,OAAO,CAAC,8BAA8B;IA8CtC,OAAO,CAAC,iCAAiC;IA6FzC,OAAO,CAAC,mBAAmB;IAqF3B,OAAO,CAAC,mBAAmB;IAqB3B,OAAO,CAAC,uBAAuB;IA6B/B,MAAM,CAAC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI;IA+BzE,MAAM,CAAC,SAAS,CACd,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,EAChB,UAAU,EAAE,MAAM,GACjB,OAAO;IAwBV,MAAM,CAAC,gBAAgB,CACrB,UAAU,EAAE,UAAU,EACtB,QAAQ,EAAE,MAAM,GACf,KAAK,CAAC;QACP,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;IA6DF,OAAO,CAAC,eAAe;IAkCjB,sBAAsB,CAC1B,UAAU,EAAE,UAAU,EACtB,OAAO,EAAE,OAAO,GAAG,IAAI,EACvB,mBAAmB,EAAE,mBAAmB,EACxC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC;QACT,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;QACH,OAAO,EAAE,KAAK,CAAC;YACb,MAAM,EAAE,OAAO,oBAAoB,EAAE,MAAM,CAAC;YAC5C,cAAc,EAAE,MAAM,CAAC;YACvB,QAAQ,EAAE,MAAM,CAAC;SAClB,CAAC,CAAC;KACJ,CAAC;YA4FY,WAAW;YA2DX,WAAW;CAiD1B"}
|
|
@@ -2229,6 +2229,104 @@ class DocumentParser {
|
|
|
2229
2229
|
return null;
|
|
2230
2230
|
}
|
|
2231
2231
|
}
|
|
2232
|
+
extractInstructionFromContent(content) {
|
|
2233
|
+
logger_1.defaultLogger.debug(`[TOC Parser] Searching ${content.length} content elements for TOC ComplexField`);
|
|
2234
|
+
for (const element of content) {
|
|
2235
|
+
if (element instanceof Paragraph_1.Paragraph) {
|
|
2236
|
+
const paragraphContent = element.getContent();
|
|
2237
|
+
for (const item of paragraphContent) {
|
|
2238
|
+
if (item instanceof Field_1.ComplexField) {
|
|
2239
|
+
const instruction = item.getInstruction();
|
|
2240
|
+
if (instruction && instruction.trim().startsWith("TOC")) {
|
|
2241
|
+
logger_1.defaultLogger.debug(`[TOC Parser] Found ComplexField with TOC instruction: "${instruction.substring(0, 100)}..."`);
|
|
2242
|
+
return instruction.trim();
|
|
2243
|
+
}
|
|
2244
|
+
}
|
|
2245
|
+
}
|
|
2246
|
+
}
|
|
2247
|
+
}
|
|
2248
|
+
logger_1.defaultLogger.debug("[TOC Parser] No ComplexField with TOC instruction found in assembled content");
|
|
2249
|
+
return undefined;
|
|
2250
|
+
}
|
|
2251
|
+
extractInstructionFromRawXML(sdtContent) {
|
|
2252
|
+
const paragraphs = sdtContent["w:p"];
|
|
2253
|
+
const paraArray = Array.isArray(paragraphs)
|
|
2254
|
+
? paragraphs
|
|
2255
|
+
: paragraphs
|
|
2256
|
+
? [paragraphs]
|
|
2257
|
+
: [];
|
|
2258
|
+
logger_1.defaultLogger.debug(`[TOC Parser] Fallback: Parsing raw XML from ${paraArray.length} paragraph(s)`);
|
|
2259
|
+
let inField = false;
|
|
2260
|
+
let instructionParts = [];
|
|
2261
|
+
let foundTOCInstruction;
|
|
2262
|
+
for (let pIdx = 0; pIdx < paraArray.length; pIdx++) {
|
|
2263
|
+
const paraObj = paraArray[pIdx];
|
|
2264
|
+
const runs = paraObj["w:r"];
|
|
2265
|
+
const runArray = Array.isArray(runs) ? runs : runs ? [runs] : [];
|
|
2266
|
+
logger_1.defaultLogger.debug(`[TOC Parser] Paragraph ${pIdx + 1}: ${runArray.length} runs`);
|
|
2267
|
+
for (let rIdx = 0; rIdx < runArray.length; rIdx++) {
|
|
2268
|
+
const runObj = runArray[rIdx];
|
|
2269
|
+
const fldChar = runObj["w:fldChar"];
|
|
2270
|
+
if (fldChar) {
|
|
2271
|
+
const fldCharArray = Array.isArray(fldChar) ? fldChar : [fldChar];
|
|
2272
|
+
logger_1.defaultLogger.debug(`[TOC Parser] Paragraph ${pIdx + 1}, Run ${rIdx + 1}: Found ${fldCharArray.length} fldChar element(s)`);
|
|
2273
|
+
for (const fldCharObj of fldCharArray) {
|
|
2274
|
+
const charType = fldCharObj["@_w:fldCharType"] || fldCharObj["@_fldCharType"];
|
|
2275
|
+
if (!charType) {
|
|
2276
|
+
logger_1.defaultLogger.debug(`[TOC Parser] Warning: fldChar without charType attribute`);
|
|
2277
|
+
continue;
|
|
2278
|
+
}
|
|
2279
|
+
logger_1.defaultLogger.debug(`[TOC Parser] Paragraph ${pIdx + 1}, Run ${rIdx + 1}: fldChar type = "${charType}"`);
|
|
2280
|
+
if (charType === "begin") {
|
|
2281
|
+
inField = true;
|
|
2282
|
+
instructionParts = [];
|
|
2283
|
+
logger_1.defaultLogger.debug("[TOC Parser] Found field begin marker, starting instruction collection");
|
|
2284
|
+
continue;
|
|
2285
|
+
}
|
|
2286
|
+
if (charType === "end" || charType === "separate") {
|
|
2287
|
+
const fullInstruction = instructionParts.join("").trim();
|
|
2288
|
+
logger_1.defaultLogger.debug(`[TOC Parser] Field ${charType} marker found. Collected instruction: "${fullInstruction.substring(0, 100)}..."`);
|
|
2289
|
+
if (fullInstruction.startsWith("TOC")) {
|
|
2290
|
+
foundTOCInstruction = fullInstruction;
|
|
2291
|
+
logger_1.defaultLogger.debug(`[TOC Parser] ✓ Extracted complete TOC instruction from ${instructionParts.length} part(s)`);
|
|
2292
|
+
}
|
|
2293
|
+
if (charType === "end") {
|
|
2294
|
+
inField = false;
|
|
2295
|
+
instructionParts = [];
|
|
2296
|
+
if (foundTOCInstruction) {
|
|
2297
|
+
return foundTOCInstruction;
|
|
2298
|
+
}
|
|
2299
|
+
}
|
|
2300
|
+
continue;
|
|
2301
|
+
}
|
|
2302
|
+
}
|
|
2303
|
+
}
|
|
2304
|
+
if (inField) {
|
|
2305
|
+
const instrText = runObj["w:instrText"];
|
|
2306
|
+
if (instrText) {
|
|
2307
|
+
let text = "";
|
|
2308
|
+
if (typeof instrText === "string") {
|
|
2309
|
+
text = instrText;
|
|
2310
|
+
}
|
|
2311
|
+
else if (instrText["#text"]) {
|
|
2312
|
+
text = instrText["#text"];
|
|
2313
|
+
}
|
|
2314
|
+
else {
|
|
2315
|
+
text = String(instrText);
|
|
2316
|
+
}
|
|
2317
|
+
instructionParts.push(text);
|
|
2318
|
+
logger_1.defaultLogger.debug(`[TOC Parser] Paragraph ${pIdx + 1}, Run ${rIdx + 1}: Collected instrText = "${text.substring(0, 50)}..."`);
|
|
2319
|
+
}
|
|
2320
|
+
}
|
|
2321
|
+
}
|
|
2322
|
+
}
|
|
2323
|
+
if (foundTOCInstruction) {
|
|
2324
|
+
logger_1.defaultLogger.debug(`[TOC Parser] Returning TOC instruction (no end marker found): "${foundTOCInstruction.substring(0, 100)}..."`);
|
|
2325
|
+
return foundTOCInstruction;
|
|
2326
|
+
}
|
|
2327
|
+
logger_1.defaultLogger.debug("[TOC Parser] No TOC instruction found in raw XML fallback");
|
|
2328
|
+
return undefined;
|
|
2329
|
+
}
|
|
2232
2330
|
parseTOCFromSDTContent(content, properties, sdtContent) {
|
|
2233
2331
|
try {
|
|
2234
2332
|
let title;
|
|
@@ -2242,35 +2340,16 @@ class DocumentParser {
|
|
|
2242
2340
|
}
|
|
2243
2341
|
}
|
|
2244
2342
|
}
|
|
2245
|
-
|
|
2246
|
-
|
|
2247
|
-
|
|
2248
|
-
|
|
2249
|
-
? [paragraphs]
|
|
2250
|
-
: [];
|
|
2251
|
-
for (const paraObj of paraArray) {
|
|
2252
|
-
const runs = paraObj["w:r"];
|
|
2253
|
-
const runArray = Array.isArray(runs) ? runs : runs ? [runs] : [];
|
|
2254
|
-
for (const runObj of runArray) {
|
|
2255
|
-
const instrText = runObj["w:instrText"];
|
|
2256
|
-
if (instrText) {
|
|
2257
|
-
if (typeof instrText === "string") {
|
|
2258
|
-
fieldInstruction = instrText.trim();
|
|
2259
|
-
}
|
|
2260
|
-
else if (instrText["#text"]) {
|
|
2261
|
-
fieldInstruction = instrText["#text"].trim();
|
|
2262
|
-
}
|
|
2263
|
-
if (fieldInstruction)
|
|
2264
|
-
break;
|
|
2265
|
-
}
|
|
2266
|
-
}
|
|
2267
|
-
if (fieldInstruction)
|
|
2268
|
-
break;
|
|
2343
|
+
fieldInstruction = this.extractInstructionFromContent(content);
|
|
2344
|
+
if (!fieldInstruction) {
|
|
2345
|
+
logger_1.defaultLogger.debug("[TOC Parser] ComplexField extraction failed, falling back to raw XML parsing");
|
|
2346
|
+
fieldInstruction = this.extractInstructionFromRawXML(sdtContent);
|
|
2269
2347
|
}
|
|
2270
2348
|
if (!fieldInstruction) {
|
|
2271
|
-
logger_1.defaultLogger.warn("[DocumentParser] No TOC field instruction found in SDT content");
|
|
2349
|
+
logger_1.defaultLogger.warn("[DocumentParser] No TOC field instruction found in SDT content (tried both ComplexField and raw XML)");
|
|
2272
2350
|
return null;
|
|
2273
2351
|
}
|
|
2352
|
+
logger_1.defaultLogger.debug(`[TOC Parser] Successfully extracted instruction: "${fieldInstruction}"`);
|
|
2274
2353
|
const tocOptions = {
|
|
2275
2354
|
title,
|
|
2276
2355
|
originalFieldInstruction: fieldInstruction.trim(),
|