@adeu/core 1.6.7 → 1.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/utils/docx.ts CHANGED
@@ -1,50 +1,59 @@
1
- import { qn, findChild, findAllDescendants } from '../docx/dom.js';
2
- import { Paragraph, Table, Run, NotesPart, FootnoteItem, DocxEvent } from '../docx/primitives.js';
3
-
4
- export const QN_W_P = 'w:p';
5
- export const QN_W_R = 'w:r';
6
- export const QN_W_T = 'w:t';
7
- export const QN_W_DELTEXT = 'w:delText';
8
- export const QN_W_TAB = 'w:tab';
9
- export const QN_W_BR = 'w:br';
10
- export const QN_W_CR = 'w:cr';
11
- export const QN_W_RPR = 'w:rPr';
12
- export const QN_W_RPRCHANGE = 'w:rPrChange';
13
- export const QN_W_COMMENTREFERENCE = 'w:commentReference';
14
- export const QN_W_FOOTNOTEREFERENCE = 'w:footnoteReference';
15
- export const QN_W_ENDNOTEREFERENCE = 'w:endnoteReference';
16
- export const QN_W_FLDCHAR = 'w:fldChar';
17
- export const QN_W_FLDCHARTYPE = 'w:fldCharType';
18
- export const QN_W_INSTRTEXT = 'w:instrText';
19
- export const QN_W_INS = 'w:ins';
20
- export const QN_W_DEL = 'w:del';
21
- export const QN_W_ID = 'w:id';
22
- export const QN_W_AUTHOR = 'w:author';
23
- export const QN_W_DATE = 'w:date';
24
- export const QN_W_COMMENTRANGESTART = 'w:commentRangeStart';
25
- export const QN_W_COMMENTRANGEEND = 'w:commentRangeEnd';
26
- export const QN_W_HYPERLINK = 'w:hyperlink';
27
- export const QN_R_ID = 'r:id';
28
- export const QN_W_FLDSIMPLE = 'w:fldSimple';
29
- export const QN_W_INSTR = 'w:instr';
30
- export const QN_W_BOOKMARKSTART = 'w:bookmarkStart';
31
- export const QN_W_NAME = 'w:name';
32
- export const QN_W_SDT = 'w:sdt';
33
- export const QN_W_SMARTTAG = 'w:smartTag';
34
- export const QN_W_SDTCONTENT = 'w:sdtContent';
35
- export const QN_W_B = 'w:b';
36
- export const QN_W_I = 'w:i';
37
- export const QN_W_VAL = 'w:val';
38
- export const QN_W_PPR = 'w:pPr';
39
- export const QN_W_PSTYLE = 'w:pStyle';
40
- export const QN_W_OUTLINELVL = 'w:outlineLvl';
41
- export const QN_W_NUMPR = 'w:numPr';
42
- export const QN_W_NUMID = 'w:numId';
43
- export const QN_W_ILVL = 'w:ilvl';
1
+ import { qn, findChild, findAllDescendants } from "../docx/dom.js";
2
+ import {
3
+ Paragraph,
4
+ Table,
5
+ Run,
6
+ NotesPart,
7
+ FootnoteItem,
8
+ DocxEvent,
9
+ } from "../docx/primitives.js";
10
+
11
+ export const QN_W_P = "w:p";
12
+ export const QN_W_R = "w:r";
13
+ export const QN_W_T = "w:t";
14
+ export const QN_W_DELTEXT = "w:delText";
15
+ export const QN_W_TAB = "w:tab";
16
+ export const QN_W_BR = "w:br";
17
+ export const QN_W_CR = "w:cr";
18
+ export const QN_W_RPR = "w:rPr";
19
+ export const QN_W_RPRCHANGE = "w:rPrChange";
20
+ export const QN_W_COMMENTREFERENCE = "w:commentReference";
21
+ export const QN_W_FOOTNOTEREFERENCE = "w:footnoteReference";
22
+ export const QN_W_ENDNOTEREFERENCE = "w:endnoteReference";
23
+ export const QN_W_FLDCHAR = "w:fldChar";
24
+ export const QN_W_FLDCHARTYPE = "w:fldCharType";
25
+ export const QN_W_INSTRTEXT = "w:instrText";
26
+ export const QN_W_INS = "w:ins";
27
+ export const QN_W_DEL = "w:del";
28
+ export const QN_W_ID = "w:id";
29
+ export const QN_W_AUTHOR = "w:author";
30
+ export const QN_W_DATE = "w:date";
31
+ export const QN_W_COMMENTRANGESTART = "w:commentRangeStart";
32
+ export const QN_W_COMMENTRANGEEND = "w:commentRangeEnd";
33
+ export const QN_W_HYPERLINK = "w:hyperlink";
34
+ export const QN_R_ID = "r:id";
35
+ export const QN_W_FLDSIMPLE = "w:fldSimple";
36
+ export const QN_W_INSTR = "w:instr";
37
+ export const QN_W_BOOKMARKSTART = "w:bookmarkStart";
38
+ export const QN_W_NAME = "w:name";
39
+ export const QN_W_SDT = "w:sdt";
40
+ export const QN_W_SMARTTAG = "w:smartTag";
41
+ export const QN_W_SDTCONTENT = "w:sdtContent";
42
+ export const QN_W_B = "w:b";
43
+ export const QN_W_I = "w:i";
44
+ export const QN_W_VAL = "w:val";
45
+ export const QN_W_PPR = "w:pPr";
46
+ export const QN_W_PSTYLE = "w:pStyle";
47
+ export const QN_W_OUTLINELVL = "w:outlineLvl";
48
+ export const QN_W_NUMPR = "w:numPr";
49
+ export const QN_W_NUMID = "w:numId";
50
+ export const QN_W_ILVL = "w:ilvl";
44
51
 
45
52
  const _CUSTOM_HEADING_NAME_RE = /Heading[ ]?([1-6])(?![0-9])/;
46
53
 
47
- export function _get_style_cache(part: any): [Record<string, any>, string | null] {
54
+ export function _get_style_cache(
55
+ part: any,
56
+ ): [Record<string, any>, string | null] {
48
57
  const pkg = part.package || part.pkg || (part.part ? part.part.pkg : null);
49
58
  if (pkg && pkg._adeu_style_cache) {
50
59
  return pkg._adeu_style_cache;
@@ -54,46 +63,56 @@ export function _get_style_cache(part: any): [Record<string, any>, string | null
54
63
  let default_pstyle: string | null = null;
55
64
  const raw_styles: Record<string, any> = {};
56
65
 
57
- const stylesPart = pkg?.getPartByPath('word/styles.xml');
66
+ const stylesPart = pkg?.getPartByPath("word/styles.xml");
58
67
  if (!stylesPart) {
59
68
  const result: [Record<string, any>, string | null] = [cache, null];
60
69
  if (pkg) pkg._adeu_style_cache = result;
61
70
  return result;
62
71
  }
63
72
 
64
- const styles = findAllDescendants(stylesPart._element, 'w:style');
73
+ const styles = findAllDescendants(stylesPart._element, "w:style");
65
74
  for (const s of styles) {
66
- const s_id = s.getAttribute('w:styleId');
75
+ const s_id = s.getAttribute("w:styleId");
67
76
  if (!s_id) continue;
68
77
 
69
- const s_type = s.getAttribute('w:type');
70
- const is_default = s.getAttribute('w:default') === '1' || s.getAttribute('w:default') === 'true';
78
+ const s_type = s.getAttribute("w:type");
79
+ const is_default =
80
+ s.getAttribute("w:default") === "1" ||
81
+ s.getAttribute("w:default") === "true";
71
82
 
72
- if (s_type === 'paragraph' && is_default) default_pstyle = s_id;
83
+ if (s_type === "paragraph" && is_default) default_pstyle = s_id;
73
84
 
74
- const name_el = findChild(s, 'w:name');
75
- const name = name_el ? name_el.getAttribute('w:val') : s_id;
85
+ const name_el = findChild(s, "w:name");
86
+ let name = name_el ? name_el.getAttribute("w:val") : s_id;
76
87
 
77
- const based_on_el = findChild(s, 'w:basedOn');
78
- const based_on = based_on_el ? based_on_el.getAttribute('w:val') : null;
88
+ if (name && typeof name === "string") {
89
+ if (name.toLowerCase().startsWith("heading")) {
90
+ name = name.replace(/^heading/i, "Heading");
91
+ } else if (name.toLowerCase() === "title") {
92
+ name = "Title";
93
+ }
94
+ }
95
+
96
+ const based_on_el = findChild(s, "w:basedOn");
97
+ const based_on = based_on_el ? based_on_el.getAttribute("w:val") : null;
79
98
 
80
99
  let outline_lvl: number | null = null;
81
- const pPr = findChild(s, 'w:pPr');
100
+ const pPr = findChild(s, "w:pPr");
82
101
  if (pPr) {
83
- const oLvl = findChild(pPr, 'w:outlineLvl');
102
+ const oLvl = findChild(pPr, "w:outlineLvl");
84
103
  if (oLvl) {
85
- const val = oLvl.getAttribute('w:val');
104
+ const val = oLvl.getAttribute("w:val");
86
105
  if (val && /^\d+$/.test(val)) outline_lvl = parseInt(val, 10);
87
106
  }
88
107
  }
89
108
 
90
109
  let bold: boolean | null = null;
91
- const rPr = findChild(s, 'w:rPr');
110
+ const rPr = findChild(s, "w:rPr");
92
111
  if (rPr) {
93
- const b = findChild(rPr, 'w:b');
112
+ const b = findChild(rPr, "w:b");
94
113
  if (b) {
95
- const val = b.getAttribute('w:val');
96
- bold = val !== '0' && val !== 'false' && val !== 'off';
114
+ const val = b.getAttribute("w:val");
115
+ bold = val !== "0" && val !== "false" && val !== "off";
97
116
  }
98
117
  }
99
118
 
@@ -102,7 +121,8 @@ export function _get_style_cache(part: any): [Record<string, any>, string | null
102
121
 
103
122
  const resolve_style = (s_id: string, visited: Set<string>): any => {
104
123
  if (cache[s_id]) return cache[s_id];
105
- if (visited.has(s_id) || !raw_styles[s_id]) return { name: s_id, outline_level: null, bold: false };
124
+ if (visited.has(s_id) || !raw_styles[s_id])
125
+ return { name: s_id, outline_level: null, bold: false };
106
126
 
107
127
  visited.add(s_id);
108
128
  const raw = raw_styles[s_id];
@@ -135,9 +155,15 @@ function _detect_heading_level_from_name(name: string): number | null {
135
155
  return match ? parseInt(match[1], 10) : null;
136
156
  }
137
157
 
138
- export function is_native_heading(paragraph: Paragraph, style_cache?: Record<string, any>, default_pstyle?: string | null): boolean {
158
+ export function is_native_heading(
159
+ paragraph: Paragraph,
160
+ style_cache?: Record<string, any>,
161
+ default_pstyle?: string | null,
162
+ ): boolean {
139
163
  if (!style_cache) {
140
- [style_cache, default_pstyle] = _get_style_cache(paragraph._parent.part || paragraph._parent);
164
+ [style_cache, default_pstyle] = _get_style_cache(
165
+ paragraph._parent.part || paragraph._parent,
166
+ );
141
167
  }
142
168
  const pPr = findChild(paragraph._element, QN_W_PPR);
143
169
 
@@ -159,23 +185,36 @@ export function is_native_heading(paragraph: Paragraph, style_cache?: Record<str
159
185
  }
160
186
 
161
187
  const style_info = style_id && style_cache ? style_cache[style_id] : null;
162
- if (style_info && style_info.outline_level !== null && style_info.outline_level >= 0 && style_info.outline_level <= 8) {
188
+ if (
189
+ style_info &&
190
+ style_info.outline_level !== null &&
191
+ style_info.outline_level >= 0 &&
192
+ style_info.outline_level <= 8
193
+ ) {
163
194
  return true;
164
195
  }
165
196
 
166
- const style_name = style_info ? style_info.name : null;
167
- if (style_name?.startsWith('Heading')) return true;
168
- if (style_name === 'Title') return true;
169
- if (style_name && style_name !== 'Normal') {
197
+ let style_name = style_info ? style_info.name : style_id; // FALLBACK TO ID
198
+ if (style_name && typeof style_name === "string" && style_name.toLowerCase().startsWith("heading")) {
199
+ style_name = style_name.replace(/^heading/i, "Heading");
200
+ }
201
+ if (style_name?.startsWith("Heading")) return true;
202
+ if (style_name === "Title") return true;
203
+ if (style_name && style_name !== "Normal") {
170
204
  if (_detect_heading_level_from_name(style_name) !== null) return true;
171
205
  }
172
206
 
173
207
  return false;
174
208
  }
175
-
176
- export function get_paragraph_prefix(paragraph: Paragraph, style_cache?: Record<string, any>, default_pstyle?: string | null): string {
209
+ export function get_paragraph_prefix(
210
+ paragraph: Paragraph,
211
+ style_cache?: Record<string, any>,
212
+ default_pstyle?: string | null,
213
+ ): string {
177
214
  if (!style_cache) {
178
- [style_cache, default_pstyle] = _get_style_cache(paragraph._parent.part || paragraph._parent);
215
+ [style_cache, default_pstyle] = _get_style_cache(
216
+ paragraph._parent.part || paragraph._parent,
217
+ );
179
218
  }
180
219
  const pPr = findChild(paragraph._element, QN_W_PPR);
181
220
 
@@ -185,7 +224,7 @@ export function get_paragraph_prefix(paragraph: Paragraph, style_cache?: Record<
185
224
  const val = oLvl.getAttribute(QN_W_VAL);
186
225
  if (val && /^\d+$/.test(val)) {
187
226
  const lvl = parseInt(val, 10);
188
- if (lvl >= 0 && lvl <= 8) return '#'.repeat(lvl + 1) + ' ';
227
+ if (lvl >= 0 && lvl <= 8) return "#".repeat(lvl + 1) + " ";
189
228
  }
190
229
  }
191
230
  }
@@ -197,40 +236,48 @@ export function get_paragraph_prefix(paragraph: Paragraph, style_cache?: Record<
197
236
  }
198
237
 
199
238
  const style_info = style_id && style_cache ? style_cache[style_id] : null;
200
- if (style_info && style_info.outline_level !== null && style_info.outline_level >= 0 && style_info.outline_level <= 8) {
201
- return '#'.repeat(style_info.outline_level + 1) + ' ';
239
+ if (
240
+ style_info &&
241
+ style_info.outline_level !== null &&
242
+ style_info.outline_level >= 0 &&
243
+ style_info.outline_level <= 8
244
+ ) {
245
+ return "#".repeat(style_info.outline_level + 1) + " ";
202
246
  }
203
247
 
204
- const style_name = style_info ? style_info.name : null;
205
- if (style_name?.startsWith('Heading')) {
206
- const match = style_name.replace('Heading', '').trim();
207
- if (/^\d+$/.test(match)) return '#'.repeat(parseInt(match, 10)) + ' ';
248
+ let style_name = style_info ? style_info.name : style_id; // FALLBACK TO ID
249
+ if (style_name && typeof style_name === "string" && style_name.toLowerCase().startsWith("heading")) {
250
+ style_name = style_name.replace(/^heading/i, "Heading");
251
+ }
252
+ if (style_name?.startsWith("Heading")) {
253
+ const match = style_name.replace("Heading", "").trim();
254
+ if (/^\d+$/.test(match)) return "#".repeat(parseInt(match, 10)) + " ";
208
255
  }
209
256
 
210
- if (style_name === 'Title') return '# ';
257
+ if (style_name === "Title") return "# ";
211
258
 
212
259
  if (pPr) {
213
260
  const numPr = findChild(pPr, QN_W_NUMPR);
214
261
  if (numPr) {
215
262
  const numId = findChild(numPr, QN_W_NUMID);
216
- if (numId && numId.getAttribute(QN_W_VAL) !== '0') {
263
+ if (numId && numId.getAttribute(QN_W_VAL) !== "0") {
217
264
  let level = 0;
218
265
  const ilvl = findChild(numPr, QN_W_ILVL);
219
266
  if (ilvl) {
220
267
  const valAttr = ilvl.getAttribute(QN_W_VAL);
221
268
  if (valAttr) level = parseInt(valAttr, 10) || 0;
222
269
  }
223
- return ' '.repeat(level) + '* ';
270
+ return " ".repeat(level) + "* ";
224
271
  }
225
272
  }
226
273
  }
227
274
 
228
- if (style_name && style_name !== 'Normal') {
275
+ if (style_name && style_name !== "Normal") {
229
276
  const custom_level = _detect_heading_level_from_name(style_name);
230
- if (custom_level !== null) return '#'.repeat(custom_level) + ' ';
277
+ if (custom_level !== null) return "#".repeat(custom_level) + " ";
231
278
  }
232
279
 
233
- if (!style_name || style_name === 'Normal') {
280
+ if (!style_name || style_name === "Normal") {
234
281
  const text = paragraph.text.trim();
235
282
  if (text && text.length < 100 && text === text.toUpperCase()) {
236
283
  let is_bold = false;
@@ -240,12 +287,16 @@ export function get_paragraph_prefix(paragraph: Paragraph, style_cache?: Record<
240
287
  const runs = findAllDescendants(paragraph._element, QN_W_R);
241
288
  for (const r of runs) {
242
289
  const tList = findAllDescendants(r, QN_W_T);
243
- const tText = tList.map(t => t.textContent || '').join('');
290
+ const tText = tList.map((t) => t.textContent || "").join("");
244
291
  if (tText.trim()) {
245
292
  const rPr_run = findChild(r, QN_W_RPR);
246
293
  if (rPr_run) {
247
294
  const b = findChild(rPr_run, QN_W_B);
248
- if (b && b.getAttribute(QN_W_VAL) !== '0' && b.getAttribute(QN_W_VAL) !== 'false') {
295
+ if (
296
+ b &&
297
+ b.getAttribute(QN_W_VAL) !== "0" &&
298
+ b.getAttribute(QN_W_VAL) !== "false"
299
+ ) {
249
300
  is_bold = true;
250
301
  }
251
302
  }
@@ -253,23 +304,30 @@ export function get_paragraph_prefix(paragraph: Paragraph, style_cache?: Record<
253
304
  }
254
305
  }
255
306
  }
256
- if (is_bold) return '## ';
307
+ if (is_bold) return "## ";
257
308
  }
258
309
  }
259
310
 
260
- return '';
311
+ return "";
261
312
  }
262
313
 
263
- export function is_heading_paragraph(paragraph: Paragraph, style_cache?: Record<string, any>, default_pstyle?: string | null): boolean {
314
+ export function is_heading_paragraph(
315
+ paragraph: Paragraph,
316
+ style_cache?: Record<string, any>,
317
+ default_pstyle?: string | null,
318
+ ): boolean {
264
319
  const prefix = get_paragraph_prefix(paragraph, style_cache, default_pstyle);
265
320
  if (!prefix) return false;
266
321
  const stripped = prefix.trimEnd();
267
- return stripped.length > 0 && stripped === '#'.repeat(stripped.length);
322
+ return stripped.length > 0 && stripped === "#".repeat(stripped.length);
268
323
  }
269
324
 
270
- export function get_run_style_markers(run: Run, is_heading: boolean | null = null): [string, string] {
271
- let prefix = '';
272
- let suffix = '';
325
+ export function get_run_style_markers(
326
+ run: Run,
327
+ is_heading: boolean | null = null,
328
+ ): [string, string] {
329
+ let prefix = "";
330
+ let suffix = "";
273
331
 
274
332
  const rPr = findChild(run._element, QN_W_RPR);
275
333
  let is_bold = false;
@@ -277,65 +335,86 @@ export function get_run_style_markers(run: Run, is_heading: boolean | null = nul
277
335
 
278
336
  if (rPr) {
279
337
  const b = findChild(rPr, QN_W_B);
280
- if (b && b.getAttribute(QN_W_VAL) !== '0' && b.getAttribute(QN_W_VAL) !== 'false') is_bold = true;
338
+ if (
339
+ b &&
340
+ b.getAttribute(QN_W_VAL) !== "0" &&
341
+ b.getAttribute(QN_W_VAL) !== "false"
342
+ )
343
+ is_bold = true;
281
344
 
282
345
  const i = findChild(rPr, QN_W_I);
283
- if (i && i.getAttribute(QN_W_VAL) !== '0' && i.getAttribute(QN_W_VAL) !== 'false') is_italic = true;
346
+ if (
347
+ i &&
348
+ i.getAttribute(QN_W_VAL) !== "0" &&
349
+ i.getAttribute(QN_W_VAL) !== "false"
350
+ )
351
+ is_italic = true;
284
352
  }
285
353
 
286
354
  if (is_heading === null) {
287
355
  const parent = run._parent;
288
- is_heading = parent instanceof Paragraph ? is_native_heading(parent) : false;
356
+ is_heading =
357
+ parent instanceof Paragraph ? is_native_heading(parent) : false;
289
358
  }
290
359
 
291
360
  if (is_bold && !is_heading) {
292
- prefix += '**';
293
- suffix = '**' + suffix;
361
+ prefix += "**";
362
+ suffix = "**" + suffix;
294
363
  }
295
364
 
296
365
  if (is_italic) {
297
- prefix += '_';
298
- suffix = '_' + suffix;
366
+ prefix += "_";
367
+ suffix = "_" + suffix;
299
368
  }
300
369
 
301
370
  return [prefix, suffix];
302
371
  }
303
372
 
304
- export function apply_formatting_to_segments(text: string, prefix: string, suffix: string): string {
373
+ export function apply_formatting_to_segments(
374
+ text: string,
375
+ prefix: string,
376
+ suffix: string,
377
+ ): string {
305
378
  if (!prefix && !suffix) return text;
306
- if (!text) return '';
307
- if (!text.includes('\n')) return `${prefix}${text}${suffix}`;
379
+ if (!text) return "";
380
+ if (!text.includes("\n")) return `${prefix}${text}${suffix}`;
308
381
 
309
- const parts = text.split('\n');
310
- return parts.map(p => p ? `${prefix}${p}${suffix}` : '').join('\n');
382
+ const parts = text.split("\n");
383
+ return parts.map((p) => (p ? `${prefix}${p}${suffix}` : "")).join("\n");
311
384
  }
312
385
 
313
386
  export function get_run_text(run: Run): string {
314
- let text = '';
387
+ let text = "";
315
388
  for (let i = 0; i < run._element.childNodes.length; i++) {
316
389
  const child = run._element.childNodes[i] as Element;
317
390
  if (child.nodeType !== 1) continue;
318
-
391
+
319
392
  if (child.tagName === QN_W_T || child.tagName === QN_W_DELTEXT) {
320
- const raw = child.textContent || '';
321
- text += raw.replace(/\t/g, ' ');
393
+ const raw = child.textContent || "";
394
+ text += raw.replace(/\t/g, " ");
322
395
  } else if (child.tagName === QN_W_TAB) {
323
- text += ' ';
396
+ text += " ";
324
397
  } else if (child.tagName === QN_W_BR || child.tagName === QN_W_CR) {
325
- text += '\n';
398
+ text += "\n";
326
399
  }
327
400
  }
328
401
  return text;
329
402
  }
330
403
 
331
- export function* iter_block_items(parent: any): Generator<Paragraph | Table | FootnoteItem> {
404
+ export function* iter_block_items(
405
+ parent: any,
406
+ ): Generator<Paragraph | Table | FootnoteItem> {
332
407
  const parent_elm = parent._element || parent.element || parent;
333
408
 
334
- if (parent.constructor.name === 'NotesPart') {
335
- const tag = parent.note_type === 'fn' ? 'w:footnote' : 'w:endnote';
409
+ if (parent.constructor.name === "NotesPart") {
410
+ const tag = parent.note_type === "fn" ? "w:footnote" : "w:endnote";
336
411
  const notes = findAllDescendants(parent_elm, tag);
337
412
  for (const child of notes) {
338
- if (child.getAttribute('w:type') === 'separator' || child.getAttribute('w:type') === 'continuationSeparator') continue;
413
+ if (
414
+ child.getAttribute("w:type") === "separator" ||
415
+ child.getAttribute("w:type") === "continuationSeparator"
416
+ )
417
+ continue;
339
418
  yield new FootnoteItem(child, parent, parent.note_type);
340
419
  }
341
420
  return;
@@ -347,42 +426,77 @@ export function* iter_block_items(parent: any): Generator<Paragraph | Table | Fo
347
426
 
348
427
  if (child.tagName === QN_W_P) {
349
428
  yield new Paragraph(child, parent);
350
- } else if (child.tagName === 'w:tbl') {
429
+ } else if (child.tagName === "w:tbl") {
351
430
  yield new Table(child, parent);
352
431
  }
353
432
  }
354
433
  }
355
434
 
356
435
  export function* iter_document_parts(doc: any): Generator<any> {
357
- // Simplified for TS port - just yield main document and notes for ingestion
436
+ // 1. Headers
437
+ const headers = doc.pkg.parts.filter(
438
+ (p: any) =>
439
+ p.contentType ===
440
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml",
441
+ );
442
+ for (const h of headers) yield h;
443
+
444
+ // 2. Main Document Body
358
445
  yield doc;
359
446
 
360
- const fnPart = doc.pkg.getPartByPath('word/footnotes.xml');
361
- const enPart = doc.pkg.getPartByPath('word/endnotes.xml');
447
+ // 3. Footers
448
+ const footers = doc.pkg.parts.filter(
449
+ (p: any) =>
450
+ p.contentType ===
451
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml",
452
+ );
453
+ for (const f of footers) yield f;
454
+
455
+ // 4. Notes
456
+ const fnPart = doc.pkg.getPartByPath("word/footnotes.xml");
457
+ const enPart = doc.pkg.getPartByPath("word/endnotes.xml");
362
458
 
363
- if (fnPart) yield new NotesPart(fnPart, 'fn');
364
- if (enPart) yield new NotesPart(enPart, 'en');
459
+ if (fnPart) yield new NotesPart(fnPart, "fn");
460
+ if (enPart) yield new NotesPart(enPart, "en");
365
461
  }
366
462
 
367
463
  function _is_page_instr(instr: string): boolean {
368
464
  if (!instr) return false;
369
465
  const parts = instr.toUpperCase().trim().split(/\s+/);
370
- return parts.length > 0 && (parts[0] === 'PAGE' || parts[0] === 'NUMPAGES');
466
+ return parts.length > 0 && (parts[0] === "PAGE" || parts[0] === "NUMPAGES");
371
467
  }
372
468
 
373
- export function* iter_paragraph_content(paragraph: Paragraph): Generator<Run | DocxEvent> {
469
+ export function _get_part(parent: any): any {
470
+ if (!parent) return null;
471
+ if (parent.part) return parent.part;
472
+ if (parent.pkg && parent.pkg.mainDocumentPart)
473
+ return parent.pkg.mainDocumentPart;
474
+ if (parent._parent) return _get_part(parent._parent);
475
+ return null;
476
+ }
477
+
478
+ export function* iter_paragraph_content(
479
+ paragraph: Paragraph,
480
+ ): Generator<Run | DocxEvent> {
374
481
  let in_complex_field = false;
375
- let current_instr = '';
482
+ let current_instr = "";
376
483
  let hide_result = false;
377
484
 
378
- function* process_run_element(r_element: Element): Generator<Run | DocxEvent> {
485
+ function* process_run_element(
486
+ r_element: Element,
487
+ ): Generator<Run | DocxEvent> {
379
488
  let c_id: string | null = null;
380
489
  const rPr = findChild(r_element, QN_W_RPR);
381
490
  if (rPr) {
382
491
  const rPrChange = findChild(rPr, QN_W_RPRCHANGE);
383
492
  if (rPrChange) {
384
493
  c_id = rPrChange.getAttribute(QN_W_ID);
385
- yield { type: 'fmt_start', id: c_id!, author: rPrChange.getAttribute(QN_W_AUTHOR) || undefined, date: rPrChange.getAttribute(QN_W_DATE) || undefined };
494
+ yield {
495
+ type: "fmt_start",
496
+ id: c_id!,
497
+ author: rPrChange.getAttribute(QN_W_AUTHOR) || undefined,
498
+ date: rPrChange.getAttribute(QN_W_DATE) || undefined,
499
+ };
386
500
  }
387
501
  }
388
502
 
@@ -393,40 +507,42 @@ export function* iter_paragraph_content(paragraph: Paragraph): Generator<Run | D
393
507
  const tag = child.tagName;
394
508
  if (tag === QN_W_COMMENTREFERENCE) {
395
509
  const ref_id = child.getAttribute(QN_W_ID);
396
- if (ref_id) yield { type: 'ref', id: ref_id };
510
+ if (ref_id) yield { type: "ref", id: ref_id };
397
511
  } else if (tag === QN_W_FOOTNOTEREFERENCE) {
398
512
  const f_id = child.getAttribute(QN_W_ID);
399
- if (f_id) yield { type: 'footnote', id: f_id };
513
+ if (f_id) yield { type: "footnote", id: f_id };
400
514
  } else if (tag === QN_W_ENDNOTEREFERENCE) {
401
515
  const e_id = child.getAttribute(QN_W_ID);
402
- if (e_id) yield { type: 'endnote', id: e_id };
516
+ if (e_id) yield { type: "endnote", id: e_id };
403
517
  } else if (tag === QN_W_FLDCHAR) {
404
518
  const fld_type = child.getAttribute(QN_W_FLDCHARTYPE);
405
- if (fld_type === 'begin') {
519
+ if (fld_type === "begin") {
406
520
  in_complex_field = true;
407
- current_instr = '';
408
- } else if (fld_type === 'separate') {
521
+ current_instr = "";
522
+ } else if (fld_type === "separate") {
409
523
  if (_is_page_instr(current_instr)) hide_result = true;
410
524
  else {
411
525
  const parts = current_instr.trim().split(/\s+/);
412
- if (parts.length > 1 && parts[0] === 'REF') yield { type: 'xref_start', id: parts[1] };
526
+ if (parts.length > 1 && parts[0] === "REF")
527
+ yield { type: "xref_start", id: parts[1] };
413
528
  }
414
- } else if (fld_type === 'end') {
529
+ } else if (fld_type === "end") {
415
530
  if (!hide_result) {
416
531
  const parts = current_instr.trim().split(/\s+/);
417
- if (parts.length > 1 && parts[0] === 'REF') yield { type: 'xref_end', id: parts[1] };
532
+ if (parts.length > 1 && parts[0] === "REF")
533
+ yield { type: "xref_end", id: parts[1] };
418
534
  }
419
535
  in_complex_field = false;
420
- current_instr = '';
536
+ current_instr = "";
421
537
  hide_result = false;
422
538
  }
423
539
  } else if (tag === QN_W_INSTRTEXT && in_complex_field && !hide_result) {
424
- current_instr += child.textContent || '';
540
+ current_instr += child.textContent || "";
425
541
  }
426
542
  }
427
543
 
428
544
  if (!hide_result) yield new Run(r_element, paragraph);
429
- if (c_id !== null) yield { type: 'fmt_end', id: c_id };
545
+ if (c_id !== null) yield { type: "fmt_end", id: c_id };
430
546
  }
431
547
 
432
548
  function* traverse_node(node: Element): Generator<Run | DocxEvent> {
@@ -438,41 +554,59 @@ export function* iter_paragraph_content(paragraph: Paragraph): Generator<Run | D
438
554
  if (tag === QN_W_R) yield* process_run_element(child);
439
555
  else if (tag === QN_W_INS) {
440
556
  const i_id = child.getAttribute(QN_W_ID)!;
441
- yield { type: 'ins_start', id: i_id, author: child.getAttribute(QN_W_AUTHOR) || undefined, date: child.getAttribute(QN_W_DATE) || undefined };
557
+ yield {
558
+ type: "ins_start",
559
+ id: i_id,
560
+ author: child.getAttribute(QN_W_AUTHOR) || undefined,
561
+ date: child.getAttribute(QN_W_DATE) || undefined,
562
+ };
442
563
  yield* traverse_node(child);
443
- yield { type: 'ins_end', id: i_id };
564
+ yield { type: "ins_end", id: i_id };
444
565
  } else if (tag === QN_W_DEL) {
445
566
  const d_id = child.getAttribute(QN_W_ID)!;
446
- yield { type: 'del_start', id: d_id, author: child.getAttribute(QN_W_AUTHOR) || undefined, date: child.getAttribute(QN_W_DATE) || undefined };
567
+ yield {
568
+ type: "del_start",
569
+ id: d_id,
570
+ author: child.getAttribute(QN_W_AUTHOR) || undefined,
571
+ date: child.getAttribute(QN_W_DATE) || undefined,
572
+ };
447
573
  yield* traverse_node(child);
448
- yield { type: 'del_end', id: d_id };
449
- } else if (tag === QN_W_COMMENTRANGESTART) yield { type: 'start', id: child.getAttribute(QN_W_ID)! };
450
- else if (tag === QN_W_COMMENTRANGEEND) yield { type: 'end', id: child.getAttribute(QN_W_ID)! };
574
+ yield { type: "del_end", id: d_id };
575
+ } else if (tag === QN_W_COMMENTRANGESTART)
576
+ yield { type: "start", id: child.getAttribute(QN_W_ID)! };
577
+ else if (tag === QN_W_COMMENTRANGEEND)
578
+ yield { type: "end", id: child.getAttribute(QN_W_ID)! };
451
579
  else if (tag === QN_W_HYPERLINK) {
452
- const rId = child.getAttribute(QN_R_ID);
453
- let url = '';
454
- if (rId && paragraph._parent.part) {
455
- const rel = paragraph._parent.part.rels.get(rId);
580
+ const rId = child.getAttribute(QN_R_ID) || child.getAttribute("id");
581
+ let url = "";
582
+ const part = _get_part(paragraph._parent);
583
+ if (rId && part) {
584
+ const rel = part.rels.get(rId);
456
585
  if (rel && rel.isExternal) url = rel.target;
457
586
  }
458
- if (url) yield { type: 'hyperlink_start', id: rId!, date: url };
587
+ if (url) yield { type: "hyperlink_start", id: rId!, date: url };
459
588
  yield* traverse_node(child);
460
- if (url) yield { type: 'hyperlink_end', id: rId!, date: url };
589
+ if (url) yield { type: "hyperlink_end", id: rId!, date: url };
461
590
  } else if (tag === QN_W_FLDSIMPLE) {
462
- const instr = child.getAttribute(QN_W_INSTR) || '';
591
+ const instr = child.getAttribute(QN_W_INSTR) || "";
463
592
  const parts = instr.trim().split(/\s+/);
464
- const target = (parts.length > 1 && parts[0] === 'REF') ? parts[1] : '';
465
- if (target) yield { type: 'xref_start', id: target };
593
+ const target = parts.length > 1 && parts[0] === "REF" ? parts[1] : "";
594
+ if (target) yield { type: "xref_start", id: target };
466
595
  yield* traverse_node(child);
467
- if (target) yield { type: 'xref_end', id: target };
596
+ if (target) yield { type: "xref_end", id: target };
468
597
  } else if (tag === QN_W_BOOKMARKSTART) {
469
598
  const b_name = child.getAttribute(QN_W_NAME);
470
- if (b_name && (!b_name.startsWith('_') || b_name.startsWith('_Ref'))) yield { type: 'bookmark', id: b_name };
471
- } else if (tag === QN_W_SDT || tag === QN_W_SMARTTAG || tag === QN_W_SDTCONTENT) {
599
+ if (b_name && (!b_name.startsWith("_") || b_name.startsWith("_Ref")))
600
+ yield { type: "bookmark", id: b_name };
601
+ } else if (
602
+ tag === QN_W_SDT ||
603
+ tag === QN_W_SMARTTAG ||
604
+ tag === QN_W_SDTCONTENT
605
+ ) {
472
606
  yield* traverse_node(child);
473
607
  }
474
608
  }
475
609
  }
476
610
 
477
611
  yield* traverse_node(paragraph._element);
478
- }
612
+ }