@adeu/core 1.6.8 → 1.6.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1833 -540
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +75 -1
- package/dist/index.d.ts +75 -1
- package/dist/index.js +1832 -540
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/consistency.test.ts +134 -0
- package/src/diff.test.ts +13 -1
- package/src/diff.ts +189 -70
- package/src/docx/bridge.ts +99 -57
- package/src/docx/dom.ts +66 -7
- package/src/engine.bugs.test.ts +481 -0
- package/src/engine.ts +1346 -192
- package/src/index.ts +1 -1
- package/src/markup.ts +160 -53
- package/src/outline.ts +199 -69
- package/src/sanitize/core.ts +26 -0
- package/src/sanitize/report.ts +1 -1
- package/src/sanitize/sanitize.test.ts +47 -2
- package/src/sanitize/transforms.ts +87 -0
- package/src/utils/docx.ts +282 -157
package/src/utils/docx.ts
CHANGED
|
@@ -1,50 +1,59 @@
|
|
|
1
|
-
import { qn, findChild, findAllDescendants } from
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
export const
|
|
12
|
-
export const
|
|
13
|
-
export const
|
|
14
|
-
export const
|
|
15
|
-
export const
|
|
16
|
-
export const
|
|
17
|
-
export const
|
|
18
|
-
export const
|
|
19
|
-
export const
|
|
20
|
-
export const
|
|
21
|
-
export const
|
|
22
|
-
export const
|
|
23
|
-
export const
|
|
24
|
-
export const
|
|
25
|
-
export const
|
|
26
|
-
export const
|
|
27
|
-
export const
|
|
28
|
-
export const
|
|
29
|
-
export const
|
|
30
|
-
export const
|
|
31
|
-
export const
|
|
32
|
-
export const
|
|
33
|
-
export const
|
|
34
|
-
export const
|
|
35
|
-
export const
|
|
36
|
-
export const
|
|
37
|
-
export const
|
|
38
|
-
export const
|
|
39
|
-
export const
|
|
40
|
-
export const
|
|
41
|
-
export const
|
|
42
|
-
export const
|
|
43
|
-
export const
|
|
1
|
+
import { qn, findChild, findAllDescendants } from "../docx/dom.js";
|
|
2
|
+
import {
|
|
3
|
+
Paragraph,
|
|
4
|
+
Table,
|
|
5
|
+
Run,
|
|
6
|
+
NotesPart,
|
|
7
|
+
FootnoteItem,
|
|
8
|
+
DocxEvent,
|
|
9
|
+
} from "../docx/primitives.js";
|
|
10
|
+
|
|
11
|
+
export const QN_W_P = "w:p";
|
|
12
|
+
export const QN_W_R = "w:r";
|
|
13
|
+
export const QN_W_T = "w:t";
|
|
14
|
+
export const QN_W_DELTEXT = "w:delText";
|
|
15
|
+
export const QN_W_TAB = "w:tab";
|
|
16
|
+
export const QN_W_BR = "w:br";
|
|
17
|
+
export const QN_W_CR = "w:cr";
|
|
18
|
+
export const QN_W_RPR = "w:rPr";
|
|
19
|
+
export const QN_W_RPRCHANGE = "w:rPrChange";
|
|
20
|
+
export const QN_W_COMMENTREFERENCE = "w:commentReference";
|
|
21
|
+
export const QN_W_FOOTNOTEREFERENCE = "w:footnoteReference";
|
|
22
|
+
export const QN_W_ENDNOTEREFERENCE = "w:endnoteReference";
|
|
23
|
+
export const QN_W_FLDCHAR = "w:fldChar";
|
|
24
|
+
export const QN_W_FLDCHARTYPE = "w:fldCharType";
|
|
25
|
+
export const QN_W_INSTRTEXT = "w:instrText";
|
|
26
|
+
export const QN_W_INS = "w:ins";
|
|
27
|
+
export const QN_W_DEL = "w:del";
|
|
28
|
+
export const QN_W_ID = "w:id";
|
|
29
|
+
export const QN_W_AUTHOR = "w:author";
|
|
30
|
+
export const QN_W_DATE = "w:date";
|
|
31
|
+
export const QN_W_COMMENTRANGESTART = "w:commentRangeStart";
|
|
32
|
+
export const QN_W_COMMENTRANGEEND = "w:commentRangeEnd";
|
|
33
|
+
export const QN_W_HYPERLINK = "w:hyperlink";
|
|
34
|
+
export const QN_R_ID = "r:id";
|
|
35
|
+
export const QN_W_FLDSIMPLE = "w:fldSimple";
|
|
36
|
+
export const QN_W_INSTR = "w:instr";
|
|
37
|
+
export const QN_W_BOOKMARKSTART = "w:bookmarkStart";
|
|
38
|
+
export const QN_W_NAME = "w:name";
|
|
39
|
+
export const QN_W_SDT = "w:sdt";
|
|
40
|
+
export const QN_W_SMARTTAG = "w:smartTag";
|
|
41
|
+
export const QN_W_SDTCONTENT = "w:sdtContent";
|
|
42
|
+
export const QN_W_B = "w:b";
|
|
43
|
+
export const QN_W_I = "w:i";
|
|
44
|
+
export const QN_W_VAL = "w:val";
|
|
45
|
+
export const QN_W_PPR = "w:pPr";
|
|
46
|
+
export const QN_W_PSTYLE = "w:pStyle";
|
|
47
|
+
export const QN_W_OUTLINELVL = "w:outlineLvl";
|
|
48
|
+
export const QN_W_NUMPR = "w:numPr";
|
|
49
|
+
export const QN_W_NUMID = "w:numId";
|
|
50
|
+
export const QN_W_ILVL = "w:ilvl";
|
|
44
51
|
|
|
45
52
|
const _CUSTOM_HEADING_NAME_RE = /Heading[ ]?([1-6])(?![0-9])/;
|
|
46
53
|
|
|
47
|
-
export function _get_style_cache(
|
|
54
|
+
export function _get_style_cache(
|
|
55
|
+
part: any,
|
|
56
|
+
): [Record<string, any>, string | null] {
|
|
48
57
|
const pkg = part.package || part.pkg || (part.part ? part.part.pkg : null);
|
|
49
58
|
if (pkg && pkg._adeu_style_cache) {
|
|
50
59
|
return pkg._adeu_style_cache;
|
|
@@ -54,46 +63,56 @@ export function _get_style_cache(part: any): [Record<string, any>, string | null
|
|
|
54
63
|
let default_pstyle: string | null = null;
|
|
55
64
|
const raw_styles: Record<string, any> = {};
|
|
56
65
|
|
|
57
|
-
const stylesPart = pkg?.getPartByPath(
|
|
66
|
+
const stylesPart = pkg?.getPartByPath("word/styles.xml");
|
|
58
67
|
if (!stylesPart) {
|
|
59
68
|
const result: [Record<string, any>, string | null] = [cache, null];
|
|
60
69
|
if (pkg) pkg._adeu_style_cache = result;
|
|
61
70
|
return result;
|
|
62
71
|
}
|
|
63
72
|
|
|
64
|
-
const styles = findAllDescendants(stylesPart._element,
|
|
73
|
+
const styles = findAllDescendants(stylesPart._element, "w:style");
|
|
65
74
|
for (const s of styles) {
|
|
66
|
-
const s_id = s.getAttribute(
|
|
75
|
+
const s_id = s.getAttribute("w:styleId");
|
|
67
76
|
if (!s_id) continue;
|
|
68
77
|
|
|
69
|
-
const s_type = s.getAttribute(
|
|
70
|
-
const is_default =
|
|
78
|
+
const s_type = s.getAttribute("w:type");
|
|
79
|
+
const is_default =
|
|
80
|
+
s.getAttribute("w:default") === "1" ||
|
|
81
|
+
s.getAttribute("w:default") === "true";
|
|
71
82
|
|
|
72
|
-
if (s_type ===
|
|
83
|
+
if (s_type === "paragraph" && is_default) default_pstyle = s_id;
|
|
73
84
|
|
|
74
|
-
const name_el = findChild(s,
|
|
75
|
-
|
|
85
|
+
const name_el = findChild(s, "w:name");
|
|
86
|
+
let name = name_el ? name_el.getAttribute("w:val") : s_id;
|
|
76
87
|
|
|
77
|
-
|
|
78
|
-
|
|
88
|
+
if (name && typeof name === "string") {
|
|
89
|
+
if (name.toLowerCase().startsWith("heading")) {
|
|
90
|
+
name = name.replace(/^heading/i, "Heading");
|
|
91
|
+
} else if (name.toLowerCase() === "title") {
|
|
92
|
+
name = "Title";
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const based_on_el = findChild(s, "w:basedOn");
|
|
97
|
+
const based_on = based_on_el ? based_on_el.getAttribute("w:val") : null;
|
|
79
98
|
|
|
80
99
|
let outline_lvl: number | null = null;
|
|
81
|
-
const pPr = findChild(s,
|
|
100
|
+
const pPr = findChild(s, "w:pPr");
|
|
82
101
|
if (pPr) {
|
|
83
|
-
const oLvl = findChild(pPr,
|
|
102
|
+
const oLvl = findChild(pPr, "w:outlineLvl");
|
|
84
103
|
if (oLvl) {
|
|
85
|
-
const val = oLvl.getAttribute(
|
|
104
|
+
const val = oLvl.getAttribute("w:val");
|
|
86
105
|
if (val && /^\d+$/.test(val)) outline_lvl = parseInt(val, 10);
|
|
87
106
|
}
|
|
88
107
|
}
|
|
89
108
|
|
|
90
109
|
let bold: boolean | null = null;
|
|
91
|
-
const rPr = findChild(s,
|
|
110
|
+
const rPr = findChild(s, "w:rPr");
|
|
92
111
|
if (rPr) {
|
|
93
|
-
const b = findChild(rPr,
|
|
112
|
+
const b = findChild(rPr, "w:b");
|
|
94
113
|
if (b) {
|
|
95
|
-
const val = b.getAttribute(
|
|
96
|
-
bold = val !==
|
|
114
|
+
const val = b.getAttribute("w:val");
|
|
115
|
+
bold = val !== "0" && val !== "false" && val !== "off";
|
|
97
116
|
}
|
|
98
117
|
}
|
|
99
118
|
|
|
@@ -102,7 +121,8 @@ export function _get_style_cache(part: any): [Record<string, any>, string | null
|
|
|
102
121
|
|
|
103
122
|
const resolve_style = (s_id: string, visited: Set<string>): any => {
|
|
104
123
|
if (cache[s_id]) return cache[s_id];
|
|
105
|
-
if (visited.has(s_id) || !raw_styles[s_id])
|
|
124
|
+
if (visited.has(s_id) || !raw_styles[s_id])
|
|
125
|
+
return { name: s_id, outline_level: null, bold: false };
|
|
106
126
|
|
|
107
127
|
visited.add(s_id);
|
|
108
128
|
const raw = raw_styles[s_id];
|
|
@@ -135,9 +155,15 @@ function _detect_heading_level_from_name(name: string): number | null {
|
|
|
135
155
|
return match ? parseInt(match[1], 10) : null;
|
|
136
156
|
}
|
|
137
157
|
|
|
138
|
-
export function is_native_heading(
|
|
158
|
+
export function is_native_heading(
|
|
159
|
+
paragraph: Paragraph,
|
|
160
|
+
style_cache?: Record<string, any>,
|
|
161
|
+
default_pstyle?: string | null,
|
|
162
|
+
): boolean {
|
|
139
163
|
if (!style_cache) {
|
|
140
|
-
[style_cache, default_pstyle] = _get_style_cache(
|
|
164
|
+
[style_cache, default_pstyle] = _get_style_cache(
|
|
165
|
+
paragraph._parent.part || paragraph._parent,
|
|
166
|
+
);
|
|
141
167
|
}
|
|
142
168
|
const pPr = findChild(paragraph._element, QN_W_PPR);
|
|
143
169
|
|
|
@@ -159,23 +185,36 @@ export function is_native_heading(paragraph: Paragraph, style_cache?: Record<str
|
|
|
159
185
|
}
|
|
160
186
|
|
|
161
187
|
const style_info = style_id && style_cache ? style_cache[style_id] : null;
|
|
162
|
-
if (
|
|
188
|
+
if (
|
|
189
|
+
style_info &&
|
|
190
|
+
style_info.outline_level !== null &&
|
|
191
|
+
style_info.outline_level >= 0 &&
|
|
192
|
+
style_info.outline_level <= 8
|
|
193
|
+
) {
|
|
163
194
|
return true;
|
|
164
195
|
}
|
|
165
196
|
|
|
166
|
-
|
|
167
|
-
if (style_name
|
|
168
|
-
|
|
169
|
-
|
|
197
|
+
let style_name = style_info ? style_info.name : style_id; // FALLBACK TO ID
|
|
198
|
+
if (style_name && typeof style_name === "string" && style_name.toLowerCase().startsWith("heading")) {
|
|
199
|
+
style_name = style_name.replace(/^heading/i, "Heading");
|
|
200
|
+
}
|
|
201
|
+
if (style_name?.startsWith("Heading")) return true;
|
|
202
|
+
if (style_name === "Title") return true;
|
|
203
|
+
if (style_name && style_name !== "Normal") {
|
|
170
204
|
if (_detect_heading_level_from_name(style_name) !== null) return true;
|
|
171
205
|
}
|
|
172
206
|
|
|
173
207
|
return false;
|
|
174
208
|
}
|
|
175
|
-
|
|
176
|
-
|
|
209
|
+
export function get_paragraph_prefix(
|
|
210
|
+
paragraph: Paragraph,
|
|
211
|
+
style_cache?: Record<string, any>,
|
|
212
|
+
default_pstyle?: string | null,
|
|
213
|
+
): string {
|
|
177
214
|
if (!style_cache) {
|
|
178
|
-
[style_cache, default_pstyle] = _get_style_cache(
|
|
215
|
+
[style_cache, default_pstyle] = _get_style_cache(
|
|
216
|
+
paragraph._parent.part || paragraph._parent,
|
|
217
|
+
);
|
|
179
218
|
}
|
|
180
219
|
const pPr = findChild(paragraph._element, QN_W_PPR);
|
|
181
220
|
|
|
@@ -185,7 +224,7 @@ export function get_paragraph_prefix(paragraph: Paragraph, style_cache?: Record<
|
|
|
185
224
|
const val = oLvl.getAttribute(QN_W_VAL);
|
|
186
225
|
if (val && /^\d+$/.test(val)) {
|
|
187
226
|
const lvl = parseInt(val, 10);
|
|
188
|
-
if (lvl >= 0 && lvl <= 8) return
|
|
227
|
+
if (lvl >= 0 && lvl <= 8) return "#".repeat(lvl + 1) + " ";
|
|
189
228
|
}
|
|
190
229
|
}
|
|
191
230
|
}
|
|
@@ -197,40 +236,48 @@ export function get_paragraph_prefix(paragraph: Paragraph, style_cache?: Record<
|
|
|
197
236
|
}
|
|
198
237
|
|
|
199
238
|
const style_info = style_id && style_cache ? style_cache[style_id] : null;
|
|
200
|
-
if (
|
|
201
|
-
|
|
239
|
+
if (
|
|
240
|
+
style_info &&
|
|
241
|
+
style_info.outline_level !== null &&
|
|
242
|
+
style_info.outline_level >= 0 &&
|
|
243
|
+
style_info.outline_level <= 8
|
|
244
|
+
) {
|
|
245
|
+
return "#".repeat(style_info.outline_level + 1) + " ";
|
|
202
246
|
}
|
|
203
247
|
|
|
204
|
-
|
|
205
|
-
if (style_name
|
|
206
|
-
|
|
207
|
-
|
|
248
|
+
let style_name = style_info ? style_info.name : style_id; // FALLBACK TO ID
|
|
249
|
+
if (style_name && typeof style_name === "string" && style_name.toLowerCase().startsWith("heading")) {
|
|
250
|
+
style_name = style_name.replace(/^heading/i, "Heading");
|
|
251
|
+
}
|
|
252
|
+
if (style_name?.startsWith("Heading")) {
|
|
253
|
+
const match = style_name.replace("Heading", "").trim();
|
|
254
|
+
if (/^\d+$/.test(match)) return "#".repeat(parseInt(match, 10)) + " ";
|
|
208
255
|
}
|
|
209
256
|
|
|
210
|
-
if (style_name ===
|
|
257
|
+
if (style_name === "Title") return "# ";
|
|
211
258
|
|
|
212
259
|
if (pPr) {
|
|
213
260
|
const numPr = findChild(pPr, QN_W_NUMPR);
|
|
214
261
|
if (numPr) {
|
|
215
262
|
const numId = findChild(numPr, QN_W_NUMID);
|
|
216
|
-
if (numId && numId.getAttribute(QN_W_VAL) !==
|
|
263
|
+
if (numId && numId.getAttribute(QN_W_VAL) !== "0") {
|
|
217
264
|
let level = 0;
|
|
218
265
|
const ilvl = findChild(numPr, QN_W_ILVL);
|
|
219
266
|
if (ilvl) {
|
|
220
267
|
const valAttr = ilvl.getAttribute(QN_W_VAL);
|
|
221
268
|
if (valAttr) level = parseInt(valAttr, 10) || 0;
|
|
222
269
|
}
|
|
223
|
-
return
|
|
270
|
+
return " ".repeat(level) + "* ";
|
|
224
271
|
}
|
|
225
272
|
}
|
|
226
273
|
}
|
|
227
274
|
|
|
228
|
-
if (style_name && style_name !==
|
|
275
|
+
if (style_name && style_name !== "Normal") {
|
|
229
276
|
const custom_level = _detect_heading_level_from_name(style_name);
|
|
230
|
-
if (custom_level !== null) return
|
|
277
|
+
if (custom_level !== null) return "#".repeat(custom_level) + " ";
|
|
231
278
|
}
|
|
232
279
|
|
|
233
|
-
if (!style_name || style_name ===
|
|
280
|
+
if (!style_name || style_name === "Normal") {
|
|
234
281
|
const text = paragraph.text.trim();
|
|
235
282
|
if (text && text.length < 100 && text === text.toUpperCase()) {
|
|
236
283
|
let is_bold = false;
|
|
@@ -240,12 +287,16 @@ export function get_paragraph_prefix(paragraph: Paragraph, style_cache?: Record<
|
|
|
240
287
|
const runs = findAllDescendants(paragraph._element, QN_W_R);
|
|
241
288
|
for (const r of runs) {
|
|
242
289
|
const tList = findAllDescendants(r, QN_W_T);
|
|
243
|
-
const tText = tList.map(t => t.textContent ||
|
|
290
|
+
const tText = tList.map((t) => t.textContent || "").join("");
|
|
244
291
|
if (tText.trim()) {
|
|
245
292
|
const rPr_run = findChild(r, QN_W_RPR);
|
|
246
293
|
if (rPr_run) {
|
|
247
294
|
const b = findChild(rPr_run, QN_W_B);
|
|
248
|
-
if (
|
|
295
|
+
if (
|
|
296
|
+
b &&
|
|
297
|
+
b.getAttribute(QN_W_VAL) !== "0" &&
|
|
298
|
+
b.getAttribute(QN_W_VAL) !== "false"
|
|
299
|
+
) {
|
|
249
300
|
is_bold = true;
|
|
250
301
|
}
|
|
251
302
|
}
|
|
@@ -253,23 +304,30 @@ export function get_paragraph_prefix(paragraph: Paragraph, style_cache?: Record<
|
|
|
253
304
|
}
|
|
254
305
|
}
|
|
255
306
|
}
|
|
256
|
-
if (is_bold) return
|
|
307
|
+
if (is_bold) return "## ";
|
|
257
308
|
}
|
|
258
309
|
}
|
|
259
310
|
|
|
260
|
-
return
|
|
311
|
+
return "";
|
|
261
312
|
}
|
|
262
313
|
|
|
263
|
-
export function is_heading_paragraph(
|
|
314
|
+
export function is_heading_paragraph(
|
|
315
|
+
paragraph: Paragraph,
|
|
316
|
+
style_cache?: Record<string, any>,
|
|
317
|
+
default_pstyle?: string | null,
|
|
318
|
+
): boolean {
|
|
264
319
|
const prefix = get_paragraph_prefix(paragraph, style_cache, default_pstyle);
|
|
265
320
|
if (!prefix) return false;
|
|
266
321
|
const stripped = prefix.trimEnd();
|
|
267
|
-
return stripped.length > 0 && stripped ===
|
|
322
|
+
return stripped.length > 0 && stripped === "#".repeat(stripped.length);
|
|
268
323
|
}
|
|
269
324
|
|
|
270
|
-
export function get_run_style_markers(
|
|
271
|
-
|
|
272
|
-
|
|
325
|
+
export function get_run_style_markers(
|
|
326
|
+
run: Run,
|
|
327
|
+
is_heading: boolean | null = null,
|
|
328
|
+
): [string, string] {
|
|
329
|
+
let prefix = "";
|
|
330
|
+
let suffix = "";
|
|
273
331
|
|
|
274
332
|
const rPr = findChild(run._element, QN_W_RPR);
|
|
275
333
|
let is_bold = false;
|
|
@@ -277,65 +335,86 @@ export function get_run_style_markers(run: Run, is_heading: boolean | null = nul
|
|
|
277
335
|
|
|
278
336
|
if (rPr) {
|
|
279
337
|
const b = findChild(rPr, QN_W_B);
|
|
280
|
-
if (
|
|
338
|
+
if (
|
|
339
|
+
b &&
|
|
340
|
+
b.getAttribute(QN_W_VAL) !== "0" &&
|
|
341
|
+
b.getAttribute(QN_W_VAL) !== "false"
|
|
342
|
+
)
|
|
343
|
+
is_bold = true;
|
|
281
344
|
|
|
282
345
|
const i = findChild(rPr, QN_W_I);
|
|
283
|
-
if (
|
|
346
|
+
if (
|
|
347
|
+
i &&
|
|
348
|
+
i.getAttribute(QN_W_VAL) !== "0" &&
|
|
349
|
+
i.getAttribute(QN_W_VAL) !== "false"
|
|
350
|
+
)
|
|
351
|
+
is_italic = true;
|
|
284
352
|
}
|
|
285
353
|
|
|
286
354
|
if (is_heading === null) {
|
|
287
355
|
const parent = run._parent;
|
|
288
|
-
is_heading =
|
|
356
|
+
is_heading =
|
|
357
|
+
parent instanceof Paragraph ? is_native_heading(parent) : false;
|
|
289
358
|
}
|
|
290
359
|
|
|
291
360
|
if (is_bold && !is_heading) {
|
|
292
|
-
prefix +=
|
|
293
|
-
suffix =
|
|
361
|
+
prefix += "**";
|
|
362
|
+
suffix = "**" + suffix;
|
|
294
363
|
}
|
|
295
364
|
|
|
296
365
|
if (is_italic) {
|
|
297
|
-
prefix +=
|
|
298
|
-
suffix =
|
|
366
|
+
prefix += "_";
|
|
367
|
+
suffix = "_" + suffix;
|
|
299
368
|
}
|
|
300
369
|
|
|
301
370
|
return [prefix, suffix];
|
|
302
371
|
}
|
|
303
372
|
|
|
304
|
-
export function apply_formatting_to_segments(
|
|
373
|
+
export function apply_formatting_to_segments(
|
|
374
|
+
text: string,
|
|
375
|
+
prefix: string,
|
|
376
|
+
suffix: string,
|
|
377
|
+
): string {
|
|
305
378
|
if (!prefix && !suffix) return text;
|
|
306
|
-
if (!text) return
|
|
307
|
-
if (!text.includes(
|
|
379
|
+
if (!text) return "";
|
|
380
|
+
if (!text.includes("\n")) return `${prefix}${text}${suffix}`;
|
|
308
381
|
|
|
309
|
-
const parts = text.split(
|
|
310
|
-
return parts.map(p => p ? `${prefix}${p}${suffix}` :
|
|
382
|
+
const parts = text.split("\n");
|
|
383
|
+
return parts.map((p) => (p ? `${prefix}${p}${suffix}` : "")).join("\n");
|
|
311
384
|
}
|
|
312
385
|
|
|
313
386
|
export function get_run_text(run: Run): string {
|
|
314
|
-
let text =
|
|
387
|
+
let text = "";
|
|
315
388
|
for (let i = 0; i < run._element.childNodes.length; i++) {
|
|
316
389
|
const child = run._element.childNodes[i] as Element;
|
|
317
390
|
if (child.nodeType !== 1) continue;
|
|
318
|
-
|
|
391
|
+
|
|
319
392
|
if (child.tagName === QN_W_T || child.tagName === QN_W_DELTEXT) {
|
|
320
|
-
const raw = child.textContent ||
|
|
321
|
-
text += raw.replace(/\t/g,
|
|
393
|
+
const raw = child.textContent || "";
|
|
394
|
+
text += raw.replace(/\t/g, " ");
|
|
322
395
|
} else if (child.tagName === QN_W_TAB) {
|
|
323
|
-
text +=
|
|
396
|
+
text += " ";
|
|
324
397
|
} else if (child.tagName === QN_W_BR || child.tagName === QN_W_CR) {
|
|
325
|
-
text +=
|
|
398
|
+
text += "\n";
|
|
326
399
|
}
|
|
327
400
|
}
|
|
328
401
|
return text;
|
|
329
402
|
}
|
|
330
403
|
|
|
331
|
-
export function* iter_block_items(
|
|
404
|
+
export function* iter_block_items(
|
|
405
|
+
parent: any,
|
|
406
|
+
): Generator<Paragraph | Table | FootnoteItem> {
|
|
332
407
|
const parent_elm = parent._element || parent.element || parent;
|
|
333
408
|
|
|
334
|
-
if (parent.constructor.name ===
|
|
335
|
-
const tag = parent.note_type ===
|
|
409
|
+
if (parent.constructor.name === "NotesPart") {
|
|
410
|
+
const tag = parent.note_type === "fn" ? "w:footnote" : "w:endnote";
|
|
336
411
|
const notes = findAllDescendants(parent_elm, tag);
|
|
337
412
|
for (const child of notes) {
|
|
338
|
-
if (
|
|
413
|
+
if (
|
|
414
|
+
child.getAttribute("w:type") === "separator" ||
|
|
415
|
+
child.getAttribute("w:type") === "continuationSeparator"
|
|
416
|
+
)
|
|
417
|
+
continue;
|
|
339
418
|
yield new FootnoteItem(child, parent, parent.note_type);
|
|
340
419
|
}
|
|
341
420
|
return;
|
|
@@ -347,50 +426,77 @@ export function* iter_block_items(parent: any): Generator<Paragraph | Table | Fo
|
|
|
347
426
|
|
|
348
427
|
if (child.tagName === QN_W_P) {
|
|
349
428
|
yield new Paragraph(child, parent);
|
|
350
|
-
} else if (child.tagName ===
|
|
429
|
+
} else if (child.tagName === "w:tbl") {
|
|
351
430
|
yield new Table(child, parent);
|
|
352
431
|
}
|
|
353
432
|
}
|
|
354
433
|
}
|
|
355
434
|
|
|
356
435
|
export function* iter_document_parts(doc: any): Generator<any> {
|
|
357
|
-
//
|
|
436
|
+
// 1. Headers
|
|
437
|
+
const headers = doc.pkg.parts.filter(
|
|
438
|
+
(p: any) =>
|
|
439
|
+
p.contentType ===
|
|
440
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml",
|
|
441
|
+
);
|
|
442
|
+
for (const h of headers) yield h;
|
|
443
|
+
|
|
444
|
+
// 2. Main Document Body
|
|
358
445
|
yield doc;
|
|
359
446
|
|
|
360
|
-
|
|
361
|
-
const
|
|
447
|
+
// 3. Footers
|
|
448
|
+
const footers = doc.pkg.parts.filter(
|
|
449
|
+
(p: any) =>
|
|
450
|
+
p.contentType ===
|
|
451
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml",
|
|
452
|
+
);
|
|
453
|
+
for (const f of footers) yield f;
|
|
362
454
|
|
|
363
|
-
|
|
364
|
-
|
|
455
|
+
// 4. Notes
|
|
456
|
+
const fnPart = doc.pkg.getPartByPath("word/footnotes.xml");
|
|
457
|
+
const enPart = doc.pkg.getPartByPath("word/endnotes.xml");
|
|
458
|
+
|
|
459
|
+
if (fnPart) yield new NotesPart(fnPart, "fn");
|
|
460
|
+
if (enPart) yield new NotesPart(enPart, "en");
|
|
365
461
|
}
|
|
366
462
|
|
|
367
463
|
function _is_page_instr(instr: string): boolean {
|
|
368
464
|
if (!instr) return false;
|
|
369
465
|
const parts = instr.toUpperCase().trim().split(/\s+/);
|
|
370
|
-
return parts.length > 0 && (parts[0] ===
|
|
466
|
+
return parts.length > 0 && (parts[0] === "PAGE" || parts[0] === "NUMPAGES");
|
|
371
467
|
}
|
|
372
468
|
|
|
373
469
|
export function _get_part(parent: any): any {
|
|
374
470
|
if (!parent) return null;
|
|
375
471
|
if (parent.part) return parent.part;
|
|
376
|
-
if (parent.pkg && parent.pkg.mainDocumentPart)
|
|
472
|
+
if (parent.pkg && parent.pkg.mainDocumentPart)
|
|
473
|
+
return parent.pkg.mainDocumentPart;
|
|
377
474
|
if (parent._parent) return _get_part(parent._parent);
|
|
378
475
|
return null;
|
|
379
476
|
}
|
|
380
477
|
|
|
381
|
-
export function* iter_paragraph_content(
|
|
478
|
+
export function* iter_paragraph_content(
|
|
479
|
+
paragraph: Paragraph,
|
|
480
|
+
): Generator<Run | DocxEvent> {
|
|
382
481
|
let in_complex_field = false;
|
|
383
|
-
let current_instr =
|
|
482
|
+
let current_instr = "";
|
|
384
483
|
let hide_result = false;
|
|
385
484
|
|
|
386
|
-
function* process_run_element(
|
|
485
|
+
function* process_run_element(
|
|
486
|
+
r_element: Element,
|
|
487
|
+
): Generator<Run | DocxEvent> {
|
|
387
488
|
let c_id: string | null = null;
|
|
388
489
|
const rPr = findChild(r_element, QN_W_RPR);
|
|
389
490
|
if (rPr) {
|
|
390
491
|
const rPrChange = findChild(rPr, QN_W_RPRCHANGE);
|
|
391
492
|
if (rPrChange) {
|
|
392
493
|
c_id = rPrChange.getAttribute(QN_W_ID);
|
|
393
|
-
yield {
|
|
494
|
+
yield {
|
|
495
|
+
type: "fmt_start",
|
|
496
|
+
id: c_id!,
|
|
497
|
+
author: rPrChange.getAttribute(QN_W_AUTHOR) || undefined,
|
|
498
|
+
date: rPrChange.getAttribute(QN_W_DATE) || undefined,
|
|
499
|
+
};
|
|
394
500
|
}
|
|
395
501
|
}
|
|
396
502
|
|
|
@@ -401,40 +507,42 @@ export function* iter_paragraph_content(paragraph: Paragraph): Generator<Run | D
|
|
|
401
507
|
const tag = child.tagName;
|
|
402
508
|
if (tag === QN_W_COMMENTREFERENCE) {
|
|
403
509
|
const ref_id = child.getAttribute(QN_W_ID);
|
|
404
|
-
if (ref_id) yield { type:
|
|
510
|
+
if (ref_id) yield { type: "ref", id: ref_id };
|
|
405
511
|
} else if (tag === QN_W_FOOTNOTEREFERENCE) {
|
|
406
512
|
const f_id = child.getAttribute(QN_W_ID);
|
|
407
|
-
if (f_id) yield { type:
|
|
513
|
+
if (f_id) yield { type: "footnote", id: f_id };
|
|
408
514
|
} else if (tag === QN_W_ENDNOTEREFERENCE) {
|
|
409
515
|
const e_id = child.getAttribute(QN_W_ID);
|
|
410
|
-
if (e_id) yield { type:
|
|
516
|
+
if (e_id) yield { type: "endnote", id: e_id };
|
|
411
517
|
} else if (tag === QN_W_FLDCHAR) {
|
|
412
518
|
const fld_type = child.getAttribute(QN_W_FLDCHARTYPE);
|
|
413
|
-
if (fld_type ===
|
|
519
|
+
if (fld_type === "begin") {
|
|
414
520
|
in_complex_field = true;
|
|
415
|
-
current_instr =
|
|
416
|
-
} else if (fld_type ===
|
|
521
|
+
current_instr = "";
|
|
522
|
+
} else if (fld_type === "separate") {
|
|
417
523
|
if (_is_page_instr(current_instr)) hide_result = true;
|
|
418
524
|
else {
|
|
419
525
|
const parts = current_instr.trim().split(/\s+/);
|
|
420
|
-
if (parts.length > 1 && parts[0] ===
|
|
526
|
+
if (parts.length > 1 && parts[0] === "REF")
|
|
527
|
+
yield { type: "xref_start", id: parts[1] };
|
|
421
528
|
}
|
|
422
|
-
} else if (fld_type ===
|
|
529
|
+
} else if (fld_type === "end") {
|
|
423
530
|
if (!hide_result) {
|
|
424
531
|
const parts = current_instr.trim().split(/\s+/);
|
|
425
|
-
if (parts.length > 1 && parts[0] ===
|
|
532
|
+
if (parts.length > 1 && parts[0] === "REF")
|
|
533
|
+
yield { type: "xref_end", id: parts[1] };
|
|
426
534
|
}
|
|
427
535
|
in_complex_field = false;
|
|
428
|
-
current_instr =
|
|
536
|
+
current_instr = "";
|
|
429
537
|
hide_result = false;
|
|
430
538
|
}
|
|
431
539
|
} else if (tag === QN_W_INSTRTEXT && in_complex_field && !hide_result) {
|
|
432
|
-
current_instr += child.textContent ||
|
|
540
|
+
current_instr += child.textContent || "";
|
|
433
541
|
}
|
|
434
542
|
}
|
|
435
543
|
|
|
436
544
|
if (!hide_result) yield new Run(r_element, paragraph);
|
|
437
|
-
if (c_id !== null) yield { type:
|
|
545
|
+
if (c_id !== null) yield { type: "fmt_end", id: c_id };
|
|
438
546
|
}
|
|
439
547
|
|
|
440
548
|
function* traverse_node(node: Element): Generator<Run | DocxEvent> {
|
|
@@ -446,42 +554,59 @@ export function* iter_paragraph_content(paragraph: Paragraph): Generator<Run | D
|
|
|
446
554
|
if (tag === QN_W_R) yield* process_run_element(child);
|
|
447
555
|
else if (tag === QN_W_INS) {
|
|
448
556
|
const i_id = child.getAttribute(QN_W_ID)!;
|
|
449
|
-
yield {
|
|
557
|
+
yield {
|
|
558
|
+
type: "ins_start",
|
|
559
|
+
id: i_id,
|
|
560
|
+
author: child.getAttribute(QN_W_AUTHOR) || undefined,
|
|
561
|
+
date: child.getAttribute(QN_W_DATE) || undefined,
|
|
562
|
+
};
|
|
450
563
|
yield* traverse_node(child);
|
|
451
|
-
yield { type:
|
|
564
|
+
yield { type: "ins_end", id: i_id };
|
|
452
565
|
} else if (tag === QN_W_DEL) {
|
|
453
566
|
const d_id = child.getAttribute(QN_W_ID)!;
|
|
454
|
-
yield {
|
|
567
|
+
yield {
|
|
568
|
+
type: "del_start",
|
|
569
|
+
id: d_id,
|
|
570
|
+
author: child.getAttribute(QN_W_AUTHOR) || undefined,
|
|
571
|
+
date: child.getAttribute(QN_W_DATE) || undefined,
|
|
572
|
+
};
|
|
455
573
|
yield* traverse_node(child);
|
|
456
|
-
yield { type:
|
|
457
|
-
} else if (tag === QN_W_COMMENTRANGESTART)
|
|
458
|
-
|
|
574
|
+
yield { type: "del_end", id: d_id };
|
|
575
|
+
} else if (tag === QN_W_COMMENTRANGESTART)
|
|
576
|
+
yield { type: "start", id: child.getAttribute(QN_W_ID)! };
|
|
577
|
+
else if (tag === QN_W_COMMENTRANGEEND)
|
|
578
|
+
yield { type: "end", id: child.getAttribute(QN_W_ID)! };
|
|
459
579
|
else if (tag === QN_W_HYPERLINK) {
|
|
460
|
-
const rId = child.getAttribute(QN_R_ID) || child.getAttribute(
|
|
461
|
-
let url =
|
|
580
|
+
const rId = child.getAttribute(QN_R_ID) || child.getAttribute("id");
|
|
581
|
+
let url = "";
|
|
462
582
|
const part = _get_part(paragraph._parent);
|
|
463
583
|
if (rId && part) {
|
|
464
584
|
const rel = part.rels.get(rId);
|
|
465
585
|
if (rel && rel.isExternal) url = rel.target;
|
|
466
586
|
}
|
|
467
|
-
if (url) yield { type:
|
|
587
|
+
if (url) yield { type: "hyperlink_start", id: rId!, date: url };
|
|
468
588
|
yield* traverse_node(child);
|
|
469
|
-
if (url) yield { type:
|
|
589
|
+
if (url) yield { type: "hyperlink_end", id: rId!, date: url };
|
|
470
590
|
} else if (tag === QN_W_FLDSIMPLE) {
|
|
471
|
-
const instr = child.getAttribute(QN_W_INSTR) ||
|
|
591
|
+
const instr = child.getAttribute(QN_W_INSTR) || "";
|
|
472
592
|
const parts = instr.trim().split(/\s+/);
|
|
473
|
-
const target =
|
|
474
|
-
if (target) yield { type:
|
|
593
|
+
const target = parts.length > 1 && parts[0] === "REF" ? parts[1] : "";
|
|
594
|
+
if (target) yield { type: "xref_start", id: target };
|
|
475
595
|
yield* traverse_node(child);
|
|
476
|
-
if (target) yield { type:
|
|
596
|
+
if (target) yield { type: "xref_end", id: target };
|
|
477
597
|
} else if (tag === QN_W_BOOKMARKSTART) {
|
|
478
598
|
const b_name = child.getAttribute(QN_W_NAME);
|
|
479
|
-
if (b_name && (!b_name.startsWith(
|
|
480
|
-
|
|
599
|
+
if (b_name && (!b_name.startsWith("_") || b_name.startsWith("_Ref")))
|
|
600
|
+
yield { type: "bookmark", id: b_name };
|
|
601
|
+
} else if (
|
|
602
|
+
tag === QN_W_SDT ||
|
|
603
|
+
tag === QN_W_SMARTTAG ||
|
|
604
|
+
tag === QN_W_SDTCONTENT
|
|
605
|
+
) {
|
|
481
606
|
yield* traverse_node(child);
|
|
482
607
|
}
|
|
483
608
|
}
|
|
484
609
|
}
|
|
485
610
|
|
|
486
611
|
yield* traverse_node(paragraph._element);
|
|
487
|
-
}
|
|
612
|
+
}
|