@adeu/core 1.6.7 → 1.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -12,6 +12,16 @@ function findChild(element, tagName) {
12
12
  }
13
13
  return null;
14
14
  }
15
+ function findChildren(element, tagName) {
16
+ const result = [];
17
+ for (let i = 0; i < element.childNodes.length; i++) {
18
+ const child = element.childNodes[i];
19
+ if (child.nodeType === 1 && child.tagName === tagName) {
20
+ result.push(child);
21
+ }
22
+ }
23
+ return result;
24
+ }
15
25
  function findAllDescendants(element, tagName) {
16
26
  return Array.from(element.getElementsByTagName(tagName));
17
27
  }
@@ -49,10 +59,10 @@ var Part = class {
49
59
  _element;
50
60
  addRelationship(id, type, target, isExternal = false) {
51
61
  this.rels.set(id, new Relationship(id, type, target, isExternal));
52
- if (this._element.tagName === "Relationships") {
62
+ if (this.partname.endsWith(".rels")) {
53
63
  const doc = this._element.ownerDocument;
54
64
  if (doc) {
55
- const relEl = doc.createElement("Relationship");
65
+ const relEl = doc.createElementNS("http://schemas.openxmlformats.org/package/2006/relationships", "Relationship");
56
66
  relEl.setAttribute("Id", id);
57
67
  relEl.setAttribute("Type", type);
58
68
  relEl.setAttribute("Target", target);
@@ -174,6 +184,15 @@ var DocumentObject = class _DocumentObject {
174
184
  const relsPart = this.pkg.getOrCreateRelsPart(this.part.partname);
175
185
  relsPart.addRelationship(id, relType, target, false);
176
186
  }
187
+ relateToExternal(target, relType) {
188
+ let rId = 1;
189
+ while (this.part.rels.has(`rId${rId}`)) rId++;
190
+ const id = `rId${rId}`;
191
+ this.part.rels.set(id, new Relationship(id, relType, target, true));
192
+ const relsPart = this.pkg.getOrCreateRelsPart(this.part.partname);
193
+ relsPart.addRelationship(id, relType, target, true);
194
+ return id;
195
+ }
177
196
  async save() {
178
197
  for (const part of this.pkg.parts) {
179
198
  let xmlStr = serializeXml(part._element.ownerDocument || part._element);
@@ -270,573 +289,157 @@ var FootnoteItem = class {
270
289
  part;
271
290
  };
272
291
 
273
- // src/utils/docx.ts
274
- var QN_W_P = "w:p";
275
- var QN_W_R = "w:r";
276
- var QN_W_T = "w:t";
277
- var QN_W_DELTEXT = "w:delText";
278
- var QN_W_TAB = "w:tab";
279
- var QN_W_BR = "w:br";
280
- var QN_W_CR = "w:cr";
281
- var QN_W_RPR = "w:rPr";
282
- var QN_W_RPRCHANGE = "w:rPrChange";
283
- var QN_W_COMMENTREFERENCE = "w:commentReference";
284
- var QN_W_FOOTNOTEREFERENCE = "w:footnoteReference";
285
- var QN_W_ENDNOTEREFERENCE = "w:endnoteReference";
286
- var QN_W_FLDCHAR = "w:fldChar";
287
- var QN_W_FLDCHARTYPE = "w:fldCharType";
288
- var QN_W_INSTRTEXT = "w:instrText";
289
- var QN_W_INS = "w:ins";
290
- var QN_W_DEL = "w:del";
291
- var QN_W_ID = "w:id";
292
- var QN_W_AUTHOR = "w:author";
293
- var QN_W_DATE = "w:date";
294
- var QN_W_COMMENTRANGESTART = "w:commentRangeStart";
295
- var QN_W_COMMENTRANGEEND = "w:commentRangeEnd";
296
- var QN_W_HYPERLINK = "w:hyperlink";
297
- var QN_R_ID = "r:id";
298
- var QN_W_FLDSIMPLE = "w:fldSimple";
299
- var QN_W_INSTR = "w:instr";
300
- var QN_W_BOOKMARKSTART = "w:bookmarkStart";
301
- var QN_W_NAME = "w:name";
302
- var QN_W_SDT = "w:sdt";
303
- var QN_W_SMARTTAG = "w:smartTag";
304
- var QN_W_SDTCONTENT = "w:sdtContent";
305
- var QN_W_B = "w:b";
306
- var QN_W_I = "w:i";
307
- var QN_W_VAL = "w:val";
308
- var QN_W_PPR = "w:pPr";
309
- var QN_W_PSTYLE = "w:pStyle";
310
- var QN_W_OUTLINELVL = "w:outlineLvl";
311
- var QN_W_NUMPR = "w:numPr";
312
- var QN_W_NUMID = "w:numId";
313
- var QN_W_ILVL = "w:ilvl";
314
- var _CUSTOM_HEADING_NAME_RE = /Heading[ ]?([1-6])(?![0-9])/;
315
- function _get_style_cache(part) {
316
- const pkg = part.package || part.pkg || (part.part ? part.part.pkg : null);
317
- if (pkg && pkg._adeu_style_cache) {
318
- return pkg._adeu_style_cache;
319
- }
320
- const cache = {};
321
- let default_pstyle = null;
322
- const raw_styles = {};
323
- const stylesPart = pkg?.getPartByPath("word/styles.xml");
324
- if (!stylesPart) {
325
- const result2 = [cache, null];
326
- if (pkg) pkg._adeu_style_cache = result2;
327
- return result2;
292
+ // src/comments.ts
293
+ var NS = {
294
+ w: "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
295
+ w14: "http://schemas.microsoft.com/office/word/2010/wordml",
296
+ w15: "http://schemas.microsoft.com/office/word/2012/wordml",
297
+ w16cid: "http://schemas.microsoft.com/office/word/2016/wordml/cid",
298
+ w16cex: "http://schemas.microsoft.com/office/word/2018/wordml/cex",
299
+ mc: "http://schemas.openxmlformats.org/markup-compatibility/2006"
300
+ };
301
+ var CT = {
302
+ COMMENTS: "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml",
303
+ EXTENDED: "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml",
304
+ IDS: "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml",
305
+ EXTENSIBLE: "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml"
306
+ };
307
+ var RT = {
308
+ COMMENTS: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments",
309
+ EXTENDED: "http://schemas.microsoft.com/office/2011/relationships/commentsExtended",
310
+ IDS: "http://schemas.microsoft.com/office/2016/09/relationships/commentsIds",
311
+ EXTENSIBLE: "http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible"
312
+ };
313
+ var CommentsManager = class {
314
+ constructor(doc) {
315
+ this.doc = doc;
328
316
  }
329
- const styles = findAllDescendants(stylesPart._element, "w:style");
330
- for (const s of styles) {
331
- const s_id = s.getAttribute("w:styleId");
332
- if (!s_id) continue;
333
- const s_type = s.getAttribute("w:type");
334
- const is_default = s.getAttribute("w:default") === "1" || s.getAttribute("w:default") === "true";
335
- if (s_type === "paragraph" && is_default) default_pstyle = s_id;
336
- const name_el = findChild(s, "w:name");
337
- const name = name_el ? name_el.getAttribute("w:val") : s_id;
338
- const based_on_el = findChild(s, "w:basedOn");
339
- const based_on = based_on_el ? based_on_el.getAttribute("w:val") : null;
340
- let outline_lvl = null;
341
- const pPr = findChild(s, "w:pPr");
342
- if (pPr) {
343
- const oLvl = findChild(pPr, "w:outlineLvl");
344
- if (oLvl) {
345
- const val = oLvl.getAttribute("w:val");
346
- if (val && /^\d+$/.test(val)) outline_lvl = parseInt(val, 10);
347
- }
348
- }
349
- let bold = null;
350
- const rPr = findChild(s, "w:rPr");
351
- if (rPr) {
352
- const b = findChild(rPr, "w:b");
353
- if (b) {
354
- const val = b.getAttribute("w:val");
355
- bold = val !== "0" && val !== "false" && val !== "off";
356
- }
317
+ doc;
318
+ _commentsPart = null;
319
+ _extendedPart = null;
320
+ _idsPart = null;
321
+ _extensiblePart = null;
322
+ _nextId = null;
323
+ get commentsPart() {
324
+ if (!this._commentsPart) {
325
+ this._commentsPart = this._getOrCreateCommentsPart();
326
+ this._ensureNamespaces();
357
327
  }
358
- raw_styles[s_id] = { name, based_on, outline_level: outline_lvl, bold };
328
+ return this._commentsPart;
359
329
  }
360
- const resolve_style = (s_id, visited) => {
361
- if (cache[s_id]) return cache[s_id];
362
- if (visited.has(s_id) || !raw_styles[s_id]) return { name: s_id, outline_level: null, bold: false };
363
- visited.add(s_id);
364
- const raw = raw_styles[s_id];
365
- const based_on_id = raw.based_on;
366
- let o_lvl = raw.outline_level;
367
- let bold_val = raw.bold !== null ? raw.bold : false;
368
- if (based_on_id) {
369
- const parent = resolve_style(based_on_id, visited);
370
- if (o_lvl === null) o_lvl = parent.outline_level;
371
- if (raw.bold === null) bold_val = parent.bold;
372
- }
373
- const resolved = { name: raw.name, outline_level: o_lvl, bold: bold_val };
374
- cache[s_id] = resolved;
375
- return resolved;
376
- };
377
- for (const s_id in raw_styles) resolve_style(s_id, /* @__PURE__ */ new Set());
378
- const result = [cache, default_pstyle];
379
- if (pkg) pkg._adeu_style_cache = result;
380
- return result;
381
- }
382
- function _detect_heading_level_from_name(name) {
383
- if (!name) return null;
384
- const match = name.match(_CUSTOM_HEADING_NAME_RE);
385
- return match ? parseInt(match[1], 10) : null;
386
- }
387
- function is_native_heading(paragraph, style_cache, default_pstyle) {
388
- if (!style_cache) {
389
- [style_cache, default_pstyle] = _get_style_cache(paragraph._parent.part || paragraph._parent);
330
+ get extendedPart() {
331
+ if (!this._extendedPart) this._extendedPart = this._getOrCreateExtendedPart();
332
+ return this._extendedPart;
390
333
  }
391
- const pPr = findChild(paragraph._element, QN_W_PPR);
392
- if (pPr) {
393
- const oLvl = findChild(pPr, QN_W_OUTLINELVL);
394
- if (oLvl) {
395
- const val = oLvl.getAttribute(QN_W_VAL);
396
- if (val && /^\d+$/.test(val)) {
397
- const lvl = parseInt(val, 10);
398
- if (lvl >= 0 && lvl <= 8) return true;
399
- }
400
- }
334
+ get idsPart() {
335
+ if (!this._idsPart) this._idsPart = this._getOrCreateIdsPart();
336
+ return this._idsPart;
401
337
  }
402
- let style_id = default_pstyle;
403
- if (pPr) {
404
- const pStyle = findChild(pPr, QN_W_PSTYLE);
405
- if (pStyle) style_id = pStyle.getAttribute(QN_W_VAL) || default_pstyle;
338
+ get extensiblePart() {
339
+ if (!this._extensiblePart) this._extensiblePart = this._getOrCreateExtensiblePart();
340
+ return this._extensiblePart;
406
341
  }
407
- const style_info = style_id && style_cache ? style_cache[style_id] : null;
408
- if (style_info && style_info.outline_level !== null && style_info.outline_level >= 0 && style_info.outline_level <= 8) {
409
- return true;
342
+ get nextId() {
343
+ if (this._nextId === null) this._nextId = this._getNextCommentId();
344
+ return this._nextId;
410
345
  }
411
- const style_name = style_info ? style_info.name : null;
412
- if (style_name?.startsWith("Heading")) return true;
413
- if (style_name === "Title") return true;
414
- if (style_name && style_name !== "Normal") {
415
- if (_detect_heading_level_from_name(style_name) !== null) return true;
346
+ set nextId(value) {
347
+ this._nextId = value;
416
348
  }
417
- return false;
418
- }
419
- function get_paragraph_prefix(paragraph, style_cache, default_pstyle) {
420
- if (!style_cache) {
421
- [style_cache, default_pstyle] = _get_style_cache(paragraph._parent.part || paragraph._parent);
349
+ _getExistingPartByType(contentType) {
350
+ return this.doc.pkg.parts.find((p) => p.contentType === contentType) || null;
422
351
  }
423
- const pPr = findChild(paragraph._element, QN_W_PPR);
424
- if (pPr) {
425
- const oLvl = findChild(pPr, QN_W_OUTLINELVL);
426
- if (oLvl) {
427
- const val = oLvl.getAttribute(QN_W_VAL);
428
- if (val && /^\d+$/.test(val)) {
429
- const lvl = parseInt(val, 10);
430
- if (lvl >= 0 && lvl <= 8) return "#".repeat(lvl + 1) + " ";
352
+ _linkPart(part, relType) {
353
+ for (const rel of this.doc.part.rels.values()) {
354
+ if (!rel.isExternal && rel.target === part.partname.split("/").pop()) {
355
+ return part;
431
356
  }
432
357
  }
358
+ this.doc.relateTo(part, relType);
359
+ return part;
433
360
  }
434
- let style_id = default_pstyle;
435
- if (pPr) {
436
- const pStyle = findChild(pPr, QN_W_PSTYLE);
437
- if (pStyle) style_id = pStyle.getAttribute(QN_W_VAL) || default_pstyle;
361
+ _getOrCreateCommentsPart() {
362
+ let part = this._getExistingPartByType(CT.COMMENTS);
363
+ if (part) return this._linkPart(part, RT.COMMENTS);
364
+ const partname = this.doc.pkg.nextPartname("/word/comments%d.xml");
365
+ const xml = `<w:comments xmlns:w="${NS.w}" xmlns:w14="${NS.w14}" xmlns:w15="${NS.w15}" xmlns:w16cid="${NS.w16cid}" xmlns:w16cex="${NS.w16cex}" xmlns:mc="${NS.mc}" mc:Ignorable="w14 w15 w16cid w16cex"></w:comments>`;
366
+ part = this.doc.pkg.addPart(partname, CT.COMMENTS, xml);
367
+ this.doc.relateTo(part, RT.COMMENTS);
368
+ return part;
438
369
  }
439
- const style_info = style_id && style_cache ? style_cache[style_id] : null;
440
- if (style_info && style_info.outline_level !== null && style_info.outline_level >= 0 && style_info.outline_level <= 8) {
441
- return "#".repeat(style_info.outline_level + 1) + " ";
370
+ _getOrCreateExtendedPart() {
371
+ let part = this._getExistingPartByType(CT.EXTENDED);
372
+ if (part) return this._linkPart(part, RT.EXTENDED);
373
+ const partname = this.doc.pkg.nextPartname("/word/commentsExtended%d.xml");
374
+ const xml = `<w15:commentsEx xmlns:w15="${NS.w15}"></w15:commentsEx>`;
375
+ part = this.doc.pkg.addPart(partname, CT.EXTENDED, xml);
376
+ this.doc.relateTo(part, RT.EXTENDED);
377
+ return part;
442
378
  }
443
- const style_name = style_info ? style_info.name : null;
444
- if (style_name?.startsWith("Heading")) {
445
- const match = style_name.replace("Heading", "").trim();
446
- if (/^\d+$/.test(match)) return "#".repeat(parseInt(match, 10)) + " ";
379
+ _getOrCreateIdsPart() {
380
+ let part = this._getExistingPartByType(CT.IDS);
381
+ if (part) return this._linkPart(part, RT.IDS);
382
+ const partname = this.doc.pkg.nextPartname("/word/commentsIds%d.xml");
383
+ const xml = `<w16cid:commentsIds xmlns:w16cid="${NS.w16cid}"></w16cid:commentsIds>`;
384
+ part = this.doc.pkg.addPart(partname, CT.IDS, xml);
385
+ this.doc.relateTo(part, RT.IDS);
386
+ return part;
447
387
  }
448
- if (style_name === "Title") return "# ";
449
- if (pPr) {
450
- const numPr = findChild(pPr, QN_W_NUMPR);
451
- if (numPr) {
452
- const numId = findChild(numPr, QN_W_NUMID);
453
- if (numId && numId.getAttribute(QN_W_VAL) !== "0") {
454
- let level = 0;
455
- const ilvl = findChild(numPr, QN_W_ILVL);
456
- if (ilvl) {
457
- const valAttr = ilvl.getAttribute(QN_W_VAL);
458
- if (valAttr) level = parseInt(valAttr, 10) || 0;
459
- }
460
- return " ".repeat(level) + "* ";
461
- }
462
- }
388
+ _getOrCreateExtensiblePart() {
389
+ let part = this._getExistingPartByType(CT.EXTENSIBLE);
390
+ if (part) return this._linkPart(part, RT.EXTENSIBLE);
391
+ const partname = this.doc.pkg.nextPartname("/word/commentsExtensible%d.xml");
392
+ const xml = `<w16cex:commentsExtensible xmlns:w16cex="${NS.w16cex}"></w16cex:commentsExtensible>`;
393
+ part = this.doc.pkg.addPart(partname, CT.EXTENSIBLE, xml);
394
+ this.doc.relateTo(part, RT.EXTENSIBLE);
395
+ return part;
463
396
  }
464
- if (style_name && style_name !== "Normal") {
465
- const custom_level = _detect_heading_level_from_name(style_name);
466
- if (custom_level !== null) return "#".repeat(custom_level) + " ";
397
+ _ensureNamespaces() {
467
398
  }
468
- if (!style_name || style_name === "Normal") {
469
- const text = paragraph.text.trim();
470
- if (text && text.length < 100 && text === text.toUpperCase()) {
471
- let is_bold = false;
472
- if (style_info?.bold) {
473
- is_bold = true;
474
- } else {
475
- const runs = findAllDescendants(paragraph._element, QN_W_R);
476
- for (const r of runs) {
477
- const tList = findAllDescendants(r, QN_W_T);
478
- const tText = tList.map((t) => t.textContent || "").join("");
479
- if (tText.trim()) {
480
- const rPr_run = findChild(r, QN_W_RPR);
481
- if (rPr_run) {
482
- const b = findChild(rPr_run, QN_W_B);
483
- if (b && b.getAttribute(QN_W_VAL) !== "0" && b.getAttribute(QN_W_VAL) !== "false") {
484
- is_bold = true;
485
- }
486
- }
487
- break;
488
- }
489
- }
399
+ _getNextCommentId() {
400
+ const ids = [0];
401
+ const part = this._getExistingPartByType(CT.COMMENTS);
402
+ if (part) {
403
+ const comments = findAllDescendants(part._element, "w:comment");
404
+ for (const c of comments) {
405
+ const idStr = c.getAttribute("w:id");
406
+ if (idStr) ids.push(parseInt(idStr, 10) || 0);
490
407
  }
491
- if (is_bold) return "## ";
492
- }
493
- }
494
- return "";
495
- }
496
- function is_heading_paragraph(paragraph, style_cache, default_pstyle) {
497
- const prefix = get_paragraph_prefix(paragraph, style_cache, default_pstyle);
498
- if (!prefix) return false;
499
- const stripped = prefix.trimEnd();
500
- return stripped.length > 0 && stripped === "#".repeat(stripped.length);
501
- }
502
- function get_run_style_markers(run, is_heading = null) {
503
- let prefix = "";
504
- let suffix = "";
505
- const rPr = findChild(run._element, QN_W_RPR);
506
- let is_bold = false;
507
- let is_italic = false;
508
- if (rPr) {
509
- const b = findChild(rPr, QN_W_B);
510
- if (b && b.getAttribute(QN_W_VAL) !== "0" && b.getAttribute(QN_W_VAL) !== "false") is_bold = true;
511
- const i = findChild(rPr, QN_W_I);
512
- if (i && i.getAttribute(QN_W_VAL) !== "0" && i.getAttribute(QN_W_VAL) !== "false") is_italic = true;
513
- }
514
- if (is_heading === null) {
515
- const parent = run._parent;
516
- is_heading = parent instanceof Paragraph ? is_native_heading(parent) : false;
517
- }
518
- if (is_bold && !is_heading) {
519
- prefix += "**";
520
- suffix = "**" + suffix;
521
- }
522
- if (is_italic) {
523
- prefix += "_";
524
- suffix = "_" + suffix;
525
- }
526
- return [prefix, suffix];
527
- }
528
- function apply_formatting_to_segments(text, prefix, suffix) {
529
- if (!prefix && !suffix) return text;
530
- if (!text) return "";
531
- if (!text.includes("\n")) return `${prefix}${text}${suffix}`;
532
- const parts = text.split("\n");
533
- return parts.map((p) => p ? `${prefix}${p}${suffix}` : "").join("\n");
534
- }
535
- function get_run_text(run) {
536
- let text = "";
537
- for (let i = 0; i < run._element.childNodes.length; i++) {
538
- const child = run._element.childNodes[i];
539
- if (child.nodeType !== 1) continue;
540
- if (child.tagName === QN_W_T || child.tagName === QN_W_DELTEXT) {
541
- const raw = child.textContent || "";
542
- text += raw.replace(/\t/g, " ");
543
- } else if (child.tagName === QN_W_TAB) {
544
- text += " ";
545
- } else if (child.tagName === QN_W_BR || child.tagName === QN_W_CR) {
546
- text += "\n";
547
408
  }
409
+ return Math.max(...ids) + 1;
548
410
  }
549
- return text;
550
- }
551
- function* iter_block_items(parent) {
552
- const parent_elm = parent._element || parent.element || parent;
553
- if (parent.constructor.name === "NotesPart") {
554
- const tag = parent.note_type === "fn" ? "w:footnote" : "w:endnote";
555
- const notes = findAllDescendants(parent_elm, tag);
556
- for (const child of notes) {
557
- if (child.getAttribute("w:type") === "separator" || child.getAttribute("w:type") === "continuationSeparator") continue;
558
- yield new FootnoteItem(child, parent, parent.note_type);
559
- }
560
- return;
411
+ _generateHexId() {
412
+ return Math.floor(Math.random() * 4294967295).toString(16).toUpperCase().padStart(8, "0");
561
413
  }
562
- for (let i = 0; i < parent_elm.childNodes.length; i++) {
563
- const child = parent_elm.childNodes[i];
564
- if (child.nodeType !== 1) continue;
565
- if (child.tagName === QN_W_P) {
566
- yield new Paragraph(child, parent);
567
- } else if (child.tagName === "w:tbl") {
568
- yield new Table(child, parent);
569
- }
414
+ _getInitials(author) {
415
+ if (!author) return "";
416
+ return author.split(" ").filter(Boolean).map((p) => p[0]).join("").toUpperCase();
570
417
  }
571
- }
572
- function* iter_document_parts(doc) {
573
- yield doc;
574
- const fnPart = doc.pkg.getPartByPath("word/footnotes.xml");
575
- const enPart = doc.pkg.getPartByPath("word/endnotes.xml");
576
- if (fnPart) yield new NotesPart(fnPart, "fn");
577
- if (enPart) yield new NotesPart(enPart, "en");
578
- }
579
- function _is_page_instr(instr) {
580
- if (!instr) return false;
581
- const parts = instr.toUpperCase().trim().split(/\s+/);
582
- return parts.length > 0 && (parts[0] === "PAGE" || parts[0] === "NUMPAGES");
583
- }
584
- function* iter_paragraph_content(paragraph) {
585
- let in_complex_field = false;
586
- let current_instr = "";
587
- let hide_result = false;
588
- function* process_run_element(r_element) {
589
- let c_id = null;
590
- const rPr = findChild(r_element, QN_W_RPR);
591
- if (rPr) {
592
- const rPrChange = findChild(rPr, QN_W_RPRCHANGE);
593
- if (rPrChange) {
594
- c_id = rPrChange.getAttribute(QN_W_ID);
595
- yield { type: "fmt_start", id: c_id, author: rPrChange.getAttribute(QN_W_AUTHOR) || void 0, date: rPrChange.getAttribute(QN_W_DATE) || void 0 };
596
- }
597
- }
598
- for (let i = 0; i < r_element.childNodes.length; i++) {
599
- const child = r_element.childNodes[i];
600
- if (child.nodeType !== 1) continue;
601
- const tag = child.tagName;
602
- if (tag === QN_W_COMMENTREFERENCE) {
603
- const ref_id = child.getAttribute(QN_W_ID);
604
- if (ref_id) yield { type: "ref", id: ref_id };
605
- } else if (tag === QN_W_FOOTNOTEREFERENCE) {
606
- const f_id = child.getAttribute(QN_W_ID);
607
- if (f_id) yield { type: "footnote", id: f_id };
608
- } else if (tag === QN_W_ENDNOTEREFERENCE) {
609
- const e_id = child.getAttribute(QN_W_ID);
610
- if (e_id) yield { type: "endnote", id: e_id };
611
- } else if (tag === QN_W_FLDCHAR) {
612
- const fld_type = child.getAttribute(QN_W_FLDCHARTYPE);
613
- if (fld_type === "begin") {
614
- in_complex_field = true;
615
- current_instr = "";
616
- } else if (fld_type === "separate") {
617
- if (_is_page_instr(current_instr)) hide_result = true;
618
- else {
619
- const parts = current_instr.trim().split(/\s+/);
620
- if (parts.length > 1 && parts[0] === "REF") yield { type: "xref_start", id: parts[1] };
621
- }
622
- } else if (fld_type === "end") {
623
- if (!hide_result) {
624
- const parts = current_instr.trim().split(/\s+/);
625
- if (parts.length > 1 && parts[0] === "REF") yield { type: "xref_end", id: parts[1] };
626
- }
627
- in_complex_field = false;
628
- current_instr = "";
629
- hide_result = false;
418
+ _findParaIdForComment(commentId) {
419
+ if (!this._commentsPart) return null;
420
+ for (const c of findAllDescendants(this._commentsPart._element, "w:comment")) {
421
+ if (c.getAttribute("w:id") === commentId) {
422
+ for (const p of findAllDescendants(c, "w:p")) {
423
+ const pid = p.getAttribute("w14:paraId");
424
+ if (pid) return pid;
630
425
  }
631
- } else if (tag === QN_W_INSTRTEXT && in_complex_field && !hide_result) {
632
- current_instr += child.textContent || "";
633
426
  }
634
427
  }
635
- if (!hide_result) yield new Run(r_element, paragraph);
636
- if (c_id !== null) yield { type: "fmt_end", id: c_id };
428
+ return null;
637
429
  }
638
- function* traverse_node(node) {
639
- for (let i = 0; i < node.childNodes.length; i++) {
640
- const child = node.childNodes[i];
430
+ _findThreadRootParaId(commentId) {
431
+ const directParaId = this._findParaIdForComment(commentId);
432
+ const extPart = this._getExistingPartByType(CT.EXTENDED);
433
+ if (!directParaId || !extPart) return directParaId;
434
+ for (let i = 0; i < extPart._element.childNodes.length; i++) {
435
+ const child = extPart._element.childNodes[i];
641
436
  if (child.nodeType !== 1) continue;
642
- const tag = child.tagName;
643
- if (tag === QN_W_R) yield* process_run_element(child);
644
- else if (tag === QN_W_INS) {
645
- const i_id = child.getAttribute(QN_W_ID);
646
- yield { type: "ins_start", id: i_id, author: child.getAttribute(QN_W_AUTHOR) || void 0, date: child.getAttribute(QN_W_DATE) || void 0 };
647
- yield* traverse_node(child);
648
- yield { type: "ins_end", id: i_id };
649
- } else if (tag === QN_W_DEL) {
650
- const d_id = child.getAttribute(QN_W_ID);
651
- yield { type: "del_start", id: d_id, author: child.getAttribute(QN_W_AUTHOR) || void 0, date: child.getAttribute(QN_W_DATE) || void 0 };
652
- yield* traverse_node(child);
653
- yield { type: "del_end", id: d_id };
654
- } else if (tag === QN_W_COMMENTRANGESTART) yield { type: "start", id: child.getAttribute(QN_W_ID) };
655
- else if (tag === QN_W_COMMENTRANGEEND) yield { type: "end", id: child.getAttribute(QN_W_ID) };
656
- else if (tag === QN_W_HYPERLINK) {
657
- const rId = child.getAttribute(QN_R_ID);
658
- let url = "";
659
- if (rId && paragraph._parent.part) {
660
- const rel = paragraph._parent.part.rels.get(rId);
661
- if (rel && rel.isExternal) url = rel.target;
662
- }
663
- if (url) yield { type: "hyperlink_start", id: rId, date: url };
664
- yield* traverse_node(child);
665
- if (url) yield { type: "hyperlink_end", id: rId, date: url };
666
- } else if (tag === QN_W_FLDSIMPLE) {
667
- const instr = child.getAttribute(QN_W_INSTR) || "";
668
- const parts = instr.trim().split(/\s+/);
669
- const target = parts.length > 1 && parts[0] === "REF" ? parts[1] : "";
670
- if (target) yield { type: "xref_start", id: target };
671
- yield* traverse_node(child);
672
- if (target) yield { type: "xref_end", id: target };
673
- } else if (tag === QN_W_BOOKMARKSTART) {
674
- const b_name = child.getAttribute(QN_W_NAME);
675
- if (b_name && (!b_name.startsWith("_") || b_name.startsWith("_Ref"))) yield { type: "bookmark", id: b_name };
676
- } else if (tag === QN_W_SDT || tag === QN_W_SMARTTAG || tag === QN_W_SDTCONTENT) {
677
- yield* traverse_node(child);
437
+ if (child.getAttribute("w15:paraId") === directParaId) {
438
+ const parent = child.getAttribute("w15:paraIdParent");
439
+ if (parent) return parent;
678
440
  }
679
441
  }
680
- }
681
- yield* traverse_node(paragraph._element);
682
- }
683
-
684
- // src/domain.ts
685
- function build_structural_appendix(doc, base_text) {
686
- return "";
687
- }
688
-
689
- // src/comments.ts
690
- var NS = {
691
- w: "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
692
- w14: "http://schemas.microsoft.com/office/word/2010/wordml",
693
- w15: "http://schemas.microsoft.com/office/word/2012/wordml",
694
- w16cid: "http://schemas.microsoft.com/office/word/2016/wordml/cid",
695
- w16cex: "http://schemas.microsoft.com/office/word/2018/wordml/cex",
696
- mc: "http://schemas.openxmlformats.org/markup-compatibility/2006"
697
- };
698
- var CT = {
699
- COMMENTS: "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml",
700
- EXTENDED: "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml",
701
- IDS: "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml",
702
- EXTENSIBLE: "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml"
703
- };
704
- var RT = {
705
- COMMENTS: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments",
706
- EXTENDED: "http://schemas.microsoft.com/office/2011/relationships/commentsExtended",
707
- IDS: "http://schemas.microsoft.com/office/2016/09/relationships/commentsIds",
708
- EXTENSIBLE: "http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible"
709
- };
710
- var CommentsManager = class {
711
- constructor(doc) {
712
- this.doc = doc;
713
- }
714
- doc;
715
- _commentsPart = null;
716
- _extendedPart = null;
717
- _idsPart = null;
718
- _extensiblePart = null;
719
- _nextId = null;
720
- get commentsPart() {
721
- if (!this._commentsPart) {
722
- this._commentsPart = this._getOrCreateCommentsPart();
723
- this._ensureNamespaces();
724
- }
725
- return this._commentsPart;
726
- }
727
- get extendedPart() {
728
- if (!this._extendedPart) this._extendedPart = this._getOrCreateExtendedPart();
729
- return this._extendedPart;
730
- }
731
- get idsPart() {
732
- if (!this._idsPart) this._idsPart = this._getOrCreateIdsPart();
733
- return this._idsPart;
734
- }
735
- get extensiblePart() {
736
- if (!this._extensiblePart) this._extensiblePart = this._getOrCreateExtensiblePart();
737
- return this._extensiblePart;
738
- }
739
- get nextId() {
740
- if (this._nextId === null) this._nextId = this._getNextCommentId();
741
- return this._nextId;
742
- }
743
- set nextId(value) {
744
- this._nextId = value;
745
- }
746
- _getExistingPartByType(contentType) {
747
- return this.doc.pkg.parts.find((p) => p.contentType === contentType) || null;
748
- }
749
- _linkPart(part, relType) {
750
- for (const rel of this.doc.part.rels.values()) {
751
- if (!rel.isExternal && rel.target === part.partname.split("/").pop()) {
752
- return part;
753
- }
754
- }
755
- this.doc.relateTo(part, relType);
756
- return part;
757
- }
758
- _getOrCreateCommentsPart() {
759
- let part = this._getExistingPartByType(CT.COMMENTS);
760
- if (part) return this._linkPart(part, RT.COMMENTS);
761
- const partname = this.doc.pkg.nextPartname("/word/comments%d.xml");
762
- const xml = `<w:comments xmlns:w="${NS.w}" xmlns:w14="${NS.w14}" xmlns:w15="${NS.w15}" xmlns:w16cid="${NS.w16cid}" xmlns:w16cex="${NS.w16cex}" xmlns:mc="${NS.mc}" mc:Ignorable="w14 w15 w16cid w16cex"></w:comments>`;
763
- part = this.doc.pkg.addPart(partname, CT.COMMENTS, xml);
764
- this.doc.relateTo(part, RT.COMMENTS);
765
- return part;
766
- }
767
- _getOrCreateExtendedPart() {
768
- let part = this._getExistingPartByType(CT.EXTENDED);
769
- if (part) return this._linkPart(part, RT.EXTENDED);
770
- const partname = this.doc.pkg.nextPartname("/word/commentsExtended%d.xml");
771
- const xml = `<w15:commentsEx xmlns:w15="${NS.w15}"></w15:commentsEx>`;
772
- part = this.doc.pkg.addPart(partname, CT.EXTENDED, xml);
773
- this.doc.relateTo(part, RT.EXTENDED);
774
- return part;
775
- }
776
- _getOrCreateIdsPart() {
777
- let part = this._getExistingPartByType(CT.IDS);
778
- if (part) return this._linkPart(part, RT.IDS);
779
- const partname = this.doc.pkg.nextPartname("/word/commentsIds%d.xml");
780
- const xml = `<w16cid:commentsIds xmlns:w16cid="${NS.w16cid}"></w16cid:commentsIds>`;
781
- part = this.doc.pkg.addPart(partname, CT.IDS, xml);
782
- this.doc.relateTo(part, RT.IDS);
783
- return part;
784
- }
785
- _getOrCreateExtensiblePart() {
786
- let part = this._getExistingPartByType(CT.EXTENSIBLE);
787
- if (part) return this._linkPart(part, RT.EXTENSIBLE);
788
- const partname = this.doc.pkg.nextPartname("/word/commentsExtensible%d.xml");
789
- const xml = `<w16cex:commentsExtensible xmlns:w16cex="${NS.w16cex}"></w16cex:commentsExtensible>`;
790
- part = this.doc.pkg.addPart(partname, CT.EXTENSIBLE, xml);
791
- this.doc.relateTo(part, RT.EXTENSIBLE);
792
- return part;
793
- }
794
- _ensureNamespaces() {
795
- }
796
- _getNextCommentId() {
797
- const ids = [0];
798
- const part = this._getExistingPartByType(CT.COMMENTS);
799
- if (part) {
800
- const comments = findAllDescendants(part._element, "w:comment");
801
- for (const c of comments) {
802
- const idStr = c.getAttribute("w:id");
803
- if (idStr) ids.push(parseInt(idStr, 10) || 0);
804
- }
805
- }
806
- return Math.max(...ids) + 1;
807
- }
808
- _generateHexId() {
809
- return Math.floor(Math.random() * 4294967295).toString(16).toUpperCase().padStart(8, "0");
810
- }
811
- _getInitials(author) {
812
- if (!author) return "";
813
- return author.split(" ").filter(Boolean).map((p) => p[0]).join("").toUpperCase();
814
- }
815
- _findParaIdForComment(commentId) {
816
- if (!this._commentsPart) return null;
817
- for (const c of findAllDescendants(this._commentsPart._element, "w:comment")) {
818
- if (c.getAttribute("w:id") === commentId) {
819
- for (const p of findAllDescendants(c, "w:p")) {
820
- const pid = p.getAttribute("w14:paraId");
821
- if (pid) return pid;
822
- }
823
- }
824
- }
825
- return null;
826
- }
827
- _findThreadRootParaId(commentId) {
828
- const directParaId = this._findParaIdForComment(commentId);
829
- const extPart = this._getExistingPartByType(CT.EXTENDED);
830
- if (!directParaId || !extPart) return directParaId;
831
- for (let i = 0; i < extPart._element.childNodes.length; i++) {
832
- const child = extPart._element.childNodes[i];
833
- if (child.nodeType !== 1) continue;
834
- if (child.getAttribute("w15:paraId") === directParaId) {
835
- const parent = child.getAttribute("w15:paraIdParent");
836
- if (parent) return parent;
837
- }
838
- }
839
- return directParaId;
442
+ return directParaId;
840
443
  }
841
444
  addComment(author, text, parentId = null) {
842
445
  const commentId = this.nextId.toString();
@@ -1068,377 +671,458 @@ function extract_comments_data(pkg) {
1068
671
  return data;
1069
672
  }
1070
673
 
1071
- // src/ingest.ts
1072
- async function extractTextFromBuffer(buffer, cleanView = false) {
1073
- const doc = await DocumentObject.load(buffer);
1074
- return _extractTextFromDoc(doc, cleanView);
1075
- }
1076
- function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
1077
- const comments_map = extract_comments_data(doc.pkg);
1078
- const full_text = [];
1079
- let cursor = 0;
1080
- for (const part of iter_document_parts(doc)) {
1081
- const part_cursor = full_text.length > 0 ? cursor + 2 : cursor;
1082
- const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor);
1083
- if (part_text) {
1084
- if (full_text.length > 0) cursor += 2;
1085
- full_text.push(part_text);
1086
- cursor += part_text.length;
1087
- }
674
+ // src/utils/docx.ts
675
+ var QN_W_P = "w:p";
676
+ var QN_W_R = "w:r";
677
+ var QN_W_T = "w:t";
678
+ var QN_W_DELTEXT = "w:delText";
679
+ var QN_W_TAB = "w:tab";
680
+ var QN_W_BR = "w:br";
681
+ var QN_W_CR = "w:cr";
682
+ var QN_W_RPR = "w:rPr";
683
+ var QN_W_RPRCHANGE = "w:rPrChange";
684
+ var QN_W_COMMENTREFERENCE = "w:commentReference";
685
+ var QN_W_FOOTNOTEREFERENCE = "w:footnoteReference";
686
+ var QN_W_ENDNOTEREFERENCE = "w:endnoteReference";
687
+ var QN_W_FLDCHAR = "w:fldChar";
688
+ var QN_W_FLDCHARTYPE = "w:fldCharType";
689
+ var QN_W_INSTRTEXT = "w:instrText";
690
+ var QN_W_INS = "w:ins";
691
+ var QN_W_DEL = "w:del";
692
+ var QN_W_ID = "w:id";
693
+ var QN_W_AUTHOR = "w:author";
694
+ var QN_W_DATE = "w:date";
695
+ var QN_W_COMMENTRANGESTART = "w:commentRangeStart";
696
+ var QN_W_COMMENTRANGEEND = "w:commentRangeEnd";
697
+ var QN_W_HYPERLINK = "w:hyperlink";
698
+ var QN_R_ID = "r:id";
699
+ var QN_W_FLDSIMPLE = "w:fldSimple";
700
+ var QN_W_INSTR = "w:instr";
701
+ var QN_W_BOOKMARKSTART = "w:bookmarkStart";
702
+ var QN_W_NAME = "w:name";
703
+ var QN_W_SDT = "w:sdt";
704
+ var QN_W_SMARTTAG = "w:smartTag";
705
+ var QN_W_SDTCONTENT = "w:sdtContent";
706
+ var QN_W_B = "w:b";
707
+ var QN_W_I = "w:i";
708
+ var QN_W_VAL = "w:val";
709
+ var QN_W_PPR = "w:pPr";
710
+ var QN_W_PSTYLE = "w:pStyle";
711
+ var QN_W_OUTLINELVL = "w:outlineLvl";
712
+ var QN_W_NUMPR = "w:numPr";
713
+ var QN_W_NUMID = "w:numId";
714
+ var QN_W_ILVL = "w:ilvl";
715
+ var _CUSTOM_HEADING_NAME_RE = /Heading[ ]?([1-6])(?![0-9])/;
716
+ function _get_style_cache(part) {
717
+ const pkg = part.package || part.pkg || (part.part ? part.part.pkg : null);
718
+ if (pkg && pkg._adeu_style_cache) {
719
+ return pkg._adeu_style_cache;
1088
720
  }
1089
- let base_text = full_text.join("\n\n");
1090
- if (includeAppendix) {
1091
- const appendix = build_structural_appendix(doc, base_text);
1092
- if (appendix) base_text += appendix;
721
+ const cache = {};
722
+ let default_pstyle = null;
723
+ const raw_styles = {};
724
+ const stylesPart = pkg?.getPartByPath("word/styles.xml");
725
+ if (!stylesPart) {
726
+ const result2 = [cache, null];
727
+ if (pkg) pkg._adeu_style_cache = result2;
728
+ return result2;
1093
729
  }
1094
- return base_text;
1095
- }
1096
- function _extract_blocks(container, comments_map, cleanView, cursor) {
1097
- const part = container.part || container;
1098
- const [style_cache, default_pstyle] = _get_style_cache(part);
1099
- const blocks = [];
1100
- let local_cursor = cursor;
1101
- let is_first_block = true;
1102
- let is_first_para = true;
1103
- for (const item of iter_block_items(container)) {
1104
- if (!is_first_block) local_cursor += 2;
1105
- const block_start = local_cursor;
1106
- if (item.constructor.name === "FootnoteItem") {
1107
- const fn_text = _extract_blocks(item, comments_map, cleanView, block_start);
1108
- if (fn_text) {
1109
- blocks.push(fn_text);
1110
- local_cursor = block_start + fn_text.length;
1111
- is_first_block = false;
1112
- } else if (!is_first_block) {
1113
- local_cursor -= 2;
1114
- }
1115
- } else if (item instanceof Paragraph) {
1116
- let prefix = get_paragraph_prefix(item, style_cache, default_pstyle);
1117
- if (is_first_para && container.constructor.name === "FootnoteItem") {
1118
- prefix = `[^${container.note_type}-${container.id}]: ` + prefix;
730
+ const styles = findAllDescendants(stylesPart._element, "w:style");
731
+ for (const s of styles) {
732
+ const s_id = s.getAttribute("w:styleId");
733
+ if (!s_id) continue;
734
+ const s_type = s.getAttribute("w:type");
735
+ const is_default = s.getAttribute("w:default") === "1" || s.getAttribute("w:default") === "true";
736
+ if (s_type === "paragraph" && is_default) default_pstyle = s_id;
737
+ const name_el = findChild(s, "w:name");
738
+ const name = name_el ? name_el.getAttribute("w:val") : s_id;
739
+ const based_on_el = findChild(s, "w:basedOn");
740
+ const based_on = based_on_el ? based_on_el.getAttribute("w:val") : null;
741
+ let outline_lvl = null;
742
+ const pPr = findChild(s, "w:pPr");
743
+ if (pPr) {
744
+ const oLvl = findChild(pPr, "w:outlineLvl");
745
+ if (oLvl) {
746
+ const val = oLvl.getAttribute("w:val");
747
+ if (val && /^\d+$/.test(val)) outline_lvl = parseInt(val, 10);
1119
748
  }
1120
- const p_text = build_paragraph_text(item, comments_map, cleanView, style_cache, default_pstyle);
1121
- const full_block = prefix + p_text;
1122
- blocks.push(full_block);
1123
- local_cursor = block_start + full_block.length;
1124
- is_first_para = false;
1125
- is_first_block = false;
1126
- } else if (item instanceof Table) {
1127
- const table_text = extract_table(item, comments_map, cleanView, block_start);
1128
- if (table_text) {
1129
- blocks.push(table_text);
1130
- local_cursor = block_start + table_text.length;
1131
- is_first_block = false;
1132
- } else if (!is_first_block) {
1133
- local_cursor -= 2;
749
+ }
750
+ let bold = null;
751
+ const rPr = findChild(s, "w:rPr");
752
+ if (rPr) {
753
+ const b = findChild(rPr, "w:b");
754
+ if (b) {
755
+ const val = b.getAttribute("w:val");
756
+ bold = val !== "0" && val !== "false" && val !== "off";
1134
757
  }
1135
- is_first_para = false;
1136
758
  }
759
+ raw_styles[s_id] = { name, based_on, outline_level: outline_lvl, bold };
1137
760
  }
1138
- return blocks.join("\n\n");
1139
- }
1140
- function extract_table(table, comments_map, cleanView, cursor) {
1141
- const rows_text = [];
1142
- let rows_processed = 0;
1143
- let local_cursor = cursor;
1144
- for (const row of table.rows) {
1145
- const cell_texts = [];
1146
- const seen_cells = /* @__PURE__ */ new Set();
1147
- const trPr = findChild(row._element, "w:trPr");
1148
- const ins = trPr ? findChild(trPr, "w:ins") : null;
1149
- const del_node = trPr ? findChild(trPr, "w:del") : null;
1150
- if (cleanView && del_node) continue;
1151
- const row_start = local_cursor + (rows_processed > 0 ? 1 : 0);
1152
- const wrapper_prefix_len = !cleanView && ins ? 4 : !cleanView && del_node ? 4 : 0;
1153
- let cell_cursor = row_start + wrapper_prefix_len;
1154
- let first_cell = true;
1155
- for (const cell of row.cells) {
1156
- if (seen_cells.has(cell)) continue;
1157
- seen_cells.add(cell);
1158
- if (!first_cell) cell_cursor += 3;
1159
- const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor);
1160
- cell_texts.push(cell_content);
1161
- cell_cursor += cell_content.length;
1162
- first_cell = false;
761
+ const resolve_style = (s_id, visited) => {
762
+ if (cache[s_id]) return cache[s_id];
763
+ if (visited.has(s_id) || !raw_styles[s_id]) return { name: s_id, outline_level: null, bold: false };
764
+ visited.add(s_id);
765
+ const raw = raw_styles[s_id];
766
+ const based_on_id = raw.based_on;
767
+ let o_lvl = raw.outline_level;
768
+ let bold_val = raw.bold !== null ? raw.bold : false;
769
+ if (based_on_id) {
770
+ const parent = resolve_style(based_on_id, visited);
771
+ if (o_lvl === null) o_lvl = parent.outline_level;
772
+ if (raw.bold === null) bold_val = parent.bold;
1163
773
  }
1164
- let row_str = cell_texts.join(" | ");
1165
- if (!cleanView) {
1166
- if (ins) row_str = `{++ ${row_str} |Chg:${ins.getAttribute("w:id")}++}`;
1167
- else if (del_node) row_str = `{-- ${row_str} |Chg:${del_node.getAttribute("w:id")}--}`;
774
+ const resolved = { name: raw.name, outline_level: o_lvl, bold: bold_val };
775
+ cache[s_id] = resolved;
776
+ return resolved;
777
+ };
778
+ for (const s_id in raw_styles) resolve_style(s_id, /* @__PURE__ */ new Set());
779
+ const result = [cache, default_pstyle];
780
+ if (pkg) pkg._adeu_style_cache = result;
781
+ return result;
782
+ }
783
+ function _detect_heading_level_from_name(name) {
784
+ if (!name) return null;
785
+ const match = name.match(_CUSTOM_HEADING_NAME_RE);
786
+ return match ? parseInt(match[1], 10) : null;
787
+ }
788
+ function is_native_heading(paragraph, style_cache, default_pstyle) {
789
+ if (!style_cache) {
790
+ [style_cache, default_pstyle] = _get_style_cache(paragraph._parent.part || paragraph._parent);
791
+ }
792
+ const pPr = findChild(paragraph._element, QN_W_PPR);
793
+ if (pPr) {
794
+ const oLvl = findChild(pPr, QN_W_OUTLINELVL);
795
+ if (oLvl) {
796
+ const val = oLvl.getAttribute(QN_W_VAL);
797
+ if (val && /^\d+$/.test(val)) {
798
+ const lvl = parseInt(val, 10);
799
+ if (lvl >= 0 && lvl <= 8) return true;
800
+ }
1168
801
  }
1169
- rows_text.push(row_str);
1170
- local_cursor = row_start + row_str.length;
1171
- rows_processed++;
1172
802
  }
1173
- return rows_text.join("\n");
803
+ let style_id = default_pstyle;
804
+ if (pPr) {
805
+ const pStyle = findChild(pPr, QN_W_PSTYLE);
806
+ if (pStyle) style_id = pStyle.getAttribute(QN_W_VAL) || default_pstyle;
807
+ }
808
+ const style_info = style_id && style_cache ? style_cache[style_id] : null;
809
+ if (style_info && style_info.outline_level !== null && style_info.outline_level >= 0 && style_info.outline_level <= 8) {
810
+ return true;
811
+ }
812
+ const style_name = style_info ? style_info.name : null;
813
+ if (style_name?.startsWith("Heading")) return true;
814
+ if (style_name === "Title") return true;
815
+ if (style_name && style_name !== "Normal") {
816
+ if (_detect_heading_level_from_name(style_name) !== null) return true;
817
+ }
818
+ return false;
1174
819
  }
1175
- function build_paragraph_text(paragraph, comments_map, cleanView, style_cache, default_pstyle) {
1176
- const parts = [];
1177
- const active_ins = {};
1178
- const active_del = {};
1179
- const active_comments = /* @__PURE__ */ new Set();
1180
- const active_fmt = {};
1181
- const deferred_meta_states = [];
1182
- let pending_text = "";
1183
- let current_wrappers = ["", ""];
1184
- let current_style = ["", ""];
1185
- const items = Array.from(iter_paragraph_content(paragraph));
1186
- const is_heading = is_heading_paragraph(paragraph, style_cache, default_pstyle);
1187
- const native_heading = is_native_heading(paragraph, style_cache, default_pstyle);
1188
- let leading_strip_active = is_heading;
1189
- for (let i = 0; i < items.length; i++) {
1190
- const item = items[i];
1191
- if (item instanceof Run) {
1192
- const [prefix, suffix] = get_run_style_markers(item, native_heading);
1193
- const text = get_run_text(item);
1194
- if (cleanView && Object.keys(active_del).length > 0) continue;
1195
- if (leading_strip_active) {
1196
- if (!text || !text.trim()) continue;
1197
- leading_strip_active = false;
820
+ function get_paragraph_prefix(paragraph, style_cache, default_pstyle) {
821
+ if (!style_cache) {
822
+ [style_cache, default_pstyle] = _get_style_cache(paragraph._parent.part || paragraph._parent);
823
+ }
824
+ const pPr = findChild(paragraph._element, QN_W_PPR);
825
+ if (pPr) {
826
+ const oLvl = findChild(pPr, QN_W_OUTLINELVL);
827
+ if (oLvl) {
828
+ const val = oLvl.getAttribute(QN_W_VAL);
829
+ if (val && /^\d+$/.test(val)) {
830
+ const lvl = parseInt(val, 10);
831
+ if (lvl >= 0 && lvl <= 8) return "#".repeat(lvl + 1) + " ";
1198
832
  }
1199
- const seg = apply_formatting_to_segments(text, prefix, suffix);
1200
- if (seg) {
1201
- const new_wrappers = cleanView ? ["", ""] : _get_wrappers(active_ins, active_del, active_comments, active_fmt);
1202
- const new_style = [prefix, suffix];
1203
- if (pending_text && new_wrappers[0] === current_wrappers[0] && new_wrappers[1] === current_wrappers[1]) {
1204
- if (new_style[0] === current_style[0] && new_style[1] === current_style[1] && current_style[0] !== "" && pending_text.endsWith(current_style[1]) && seg.startsWith(new_style[0])) {
1205
- pending_text = pending_text.slice(0, -current_style[1].length) + seg.slice(new_style[0].length);
1206
- } else {
1207
- pending_text += seg;
1208
- }
1209
- current_style = new_style;
1210
- } else {
1211
- if (pending_text) parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
1212
- pending_text = seg;
1213
- current_wrappers = new_wrappers;
1214
- current_style = new_style;
833
+ }
834
+ }
835
+ let style_id = default_pstyle;
836
+ if (pPr) {
837
+ const pStyle = findChild(pPr, QN_W_PSTYLE);
838
+ if (pStyle) style_id = pStyle.getAttribute(QN_W_VAL) || default_pstyle;
839
+ }
840
+ const style_info = style_id && style_cache ? style_cache[style_id] : null;
841
+ if (style_info && style_info.outline_level !== null && style_info.outline_level >= 0 && style_info.outline_level <= 8) {
842
+ return "#".repeat(style_info.outline_level + 1) + " ";
843
+ }
844
+ const style_name = style_info ? style_info.name : null;
845
+ if (style_name?.startsWith("Heading")) {
846
+ const match = style_name.replace("Heading", "").trim();
847
+ if (/^\d+$/.test(match)) return "#".repeat(parseInt(match, 10)) + " ";
848
+ }
849
+ if (style_name === "Title") return "# ";
850
+ if (pPr) {
851
+ const numPr = findChild(pPr, QN_W_NUMPR);
852
+ if (numPr) {
853
+ const numId = findChild(numPr, QN_W_NUMID);
854
+ if (numId && numId.getAttribute(QN_W_VAL) !== "0") {
855
+ let level = 0;
856
+ const ilvl = findChild(numPr, QN_W_ILVL);
857
+ if (ilvl) {
858
+ const valAttr = ilvl.getAttribute(QN_W_VAL);
859
+ if (valAttr) level = parseInt(valAttr, 10) || 0;
1215
860
  }
1216
- if (!cleanView) {
1217
- const has_meta = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || active_comments.size > 0 || Object.keys(active_fmt).length > 0;
1218
- if (has_meta) {
1219
- deferred_meta_states.push([{ ...active_ins }, { ...active_del }, new Set(active_comments), { ...active_fmt }]);
861
+ return " ".repeat(level) + "* ";
862
+ }
863
+ }
864
+ }
865
+ if (style_name && style_name !== "Normal") {
866
+ const custom_level = _detect_heading_level_from_name(style_name);
867
+ if (custom_level !== null) return "#".repeat(custom_level) + " ";
868
+ }
869
+ if (!style_name || style_name === "Normal") {
870
+ const text = paragraph.text.trim();
871
+ if (text && text.length < 100 && text === text.toUpperCase()) {
872
+ let is_bold = false;
873
+ if (style_info?.bold) {
874
+ is_bold = true;
875
+ } else {
876
+ const runs = findAllDescendants(paragraph._element, QN_W_R);
877
+ for (const r of runs) {
878
+ const tList = findAllDescendants(r, QN_W_T);
879
+ const tText = tList.map((t) => t.textContent || "").join("");
880
+ if (tText.trim()) {
881
+ const rPr_run = findChild(r, QN_W_RPR);
882
+ if (rPr_run) {
883
+ const b = findChild(rPr_run, QN_W_B);
884
+ if (b && b.getAttribute(QN_W_VAL) !== "0" && b.getAttribute(QN_W_VAL) !== "false") {
885
+ is_bold = true;
886
+ }
887
+ }
888
+ break;
1220
889
  }
1221
- let should_defer = false;
1222
- const is_redline = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || Object.keys(active_fmt).length > 0;
1223
- if (is_redline) {
1224
- let j = i + 1;
1225
- let next_is_redline = false;
1226
- let temp_ins = Object.keys(active_ins).length;
1227
- let temp_del = Object.keys(active_del).length;
1228
- let temp_fmt = Object.keys(active_fmt).length;
1229
- while (j < items.length) {
1230
- const next_item = items[j];
1231
- if (next_item instanceof Run) {
1232
- if (!get_run_text(next_item)) {
1233
- j++;
1234
- continue;
1235
- }
1236
- if (temp_ins > 0 || temp_del > 0 || temp_fmt > 0) next_is_redline = true;
1237
- break;
1238
- } else {
1239
- const ev = next_item;
1240
- if (ev.type === "ins_start") temp_ins++;
1241
- else if (ev.type === "ins_end") temp_ins = Math.max(0, temp_ins - 1);
1242
- else if (ev.type === "del_start") temp_del++;
1243
- else if (ev.type === "del_end") temp_del = Math.max(0, temp_del - 1);
1244
- else if (ev.type === "fmt_start") temp_fmt++;
1245
- else if (ev.type === "fmt_end") temp_fmt = Math.max(0, temp_fmt - 1);
1246
- }
1247
- j++;
1248
- }
1249
- if (next_is_redline) should_defer = true;
1250
- }
1251
- if (!should_defer && deferred_meta_states.length > 0) {
1252
- const meta_block = _build_merged_meta_block(deferred_meta_states, comments_map);
1253
- if (meta_block) {
1254
- if (pending_text) {
1255
- parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
1256
- pending_text = "";
1257
- current_wrappers = ["", ""];
1258
- current_style = ["", ""];
1259
- }
1260
- parts.push(`{>>${meta_block}<<}`);
1261
- }
1262
- deferred_meta_states.length = 0;
1263
- }
1264
- }
1265
- }
1266
- } else {
1267
- const ev = item;
1268
- leading_strip_active = false;
1269
- if (!["ins_start", "ins_end", "del_start", "del_end", "fmt_start", "fmt_end"].includes(ev.type)) {
1270
- if (pending_text) {
1271
- parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
1272
- pending_text = "";
1273
- current_wrappers = ["", ""];
1274
- current_style = ["", ""];
1275
- }
1276
- }
1277
- if (ev.type === "start") active_comments.add(ev.id);
1278
- else if (ev.type === "end") active_comments.delete(ev.id);
1279
- else if (ev.type === "ins_start") active_ins[ev.id] = ev;
1280
- else if (ev.type === "ins_end") delete active_ins[ev.id];
1281
- else if (ev.type === "del_start") active_del[ev.id] = ev;
1282
- else if (ev.type === "del_end") delete active_del[ev.id];
1283
- else if (ev.type === "fmt_start") active_fmt[ev.id] = ev;
1284
- else if (ev.type === "fmt_end") delete active_fmt[ev.id];
1285
- else if (ev.type === "footnote" || ev.type === "endnote") {
1286
- if (pending_text) {
1287
- parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
1288
- pending_text = "";
1289
- current_wrappers = ["", ""];
1290
- current_style = ["", ""];
1291
- }
1292
- parts.push(`[^${ev.type === "footnote" ? "fn" : "en"}-${ev.id}]`);
1293
- } else if (ev.type === "hyperlink_start") {
1294
- if (pending_text) {
1295
- parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
1296
- pending_text = "";
1297
- current_wrappers = ["", ""];
1298
- current_style = ["", ""];
1299
- }
1300
- parts.push("[");
1301
- } else if (ev.type === "hyperlink_end") {
1302
- if (pending_text) {
1303
- parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
1304
- pending_text = "";
1305
- current_wrappers = ["", ""];
1306
- current_style = ["", ""];
1307
- }
1308
- parts.push(`](${ev.date})`);
1309
- } else if (ev.type === "xref_start") {
1310
- if (pending_text) {
1311
- parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
1312
- pending_text = "";
1313
- current_wrappers = ["", ""];
1314
- current_style = ["", ""];
1315
- }
1316
- parts.push("[~");
1317
- } else if (ev.type === "xref_end") {
1318
- if (pending_text) {
1319
- parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
1320
- pending_text = "";
1321
- current_wrappers = ["", ""];
1322
- current_style = ["", ""];
1323
- }
1324
- parts.push(`~](#${ev.id})`);
1325
- } else if (ev.type === "bookmark") {
1326
- if (pending_text) {
1327
- parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
1328
- pending_text = "";
1329
- current_wrappers = ["", ""];
1330
- current_style = ["", ""];
1331
890
  }
1332
- parts.push(`{#${ev.id}}`);
1333
891
  }
892
+ if (is_bold) return "## ";
1334
893
  }
1335
894
  }
1336
- if (pending_text) parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
1337
- if (deferred_meta_states.length > 0) {
1338
- const meta_block = _build_merged_meta_block(deferred_meta_states, comments_map);
1339
- if (meta_block) parts.push(`{>>${meta_block}<<}`);
895
+ return "";
896
+ }
897
+ function is_heading_paragraph(paragraph, style_cache, default_pstyle) {
898
+ const prefix = get_paragraph_prefix(paragraph, style_cache, default_pstyle);
899
+ if (!prefix) return false;
900
+ const stripped = prefix.trimEnd();
901
+ return stripped.length > 0 && stripped === "#".repeat(stripped.length);
902
+ }
903
+ function get_run_style_markers(run, is_heading = null) {
904
+ let prefix = "";
905
+ let suffix = "";
906
+ const rPr = findChild(run._element, QN_W_RPR);
907
+ let is_bold = false;
908
+ let is_italic = false;
909
+ if (rPr) {
910
+ const b = findChild(rPr, QN_W_B);
911
+ if (b && b.getAttribute(QN_W_VAL) !== "0" && b.getAttribute(QN_W_VAL) !== "false") is_bold = true;
912
+ const i = findChild(rPr, QN_W_I);
913
+ if (i && i.getAttribute(QN_W_VAL) !== "0" && i.getAttribute(QN_W_VAL) !== "false") is_italic = true;
1340
914
  }
1341
- return parts.join("");
915
+ if (is_heading === null) {
916
+ const parent = run._parent;
917
+ is_heading = parent instanceof Paragraph ? is_native_heading(parent) : false;
918
+ }
919
+ if (is_bold && !is_heading) {
920
+ prefix += "**";
921
+ suffix = "**" + suffix;
922
+ }
923
+ if (is_italic) {
924
+ prefix += "_";
925
+ suffix = "_" + suffix;
926
+ }
927
+ return [prefix, suffix];
1342
928
  }
1343
- function _get_wrappers(ins, del, comments, fmt) {
1344
- if (Object.keys(del).length > 0) return ["{--", "--}"];
1345
- if (Object.keys(ins).length > 0) return ["{++", "++}"];
1346
- if (comments.size > 0 || Object.keys(fmt).length > 0) return ["{==", "==}"];
1347
- return ["", ""];
929
+ function apply_formatting_to_segments(text, prefix, suffix) {
930
+ if (!prefix && !suffix) return text;
931
+ if (!text) return "";
932
+ if (!text.includes("\n")) return `${prefix}${text}${suffix}`;
933
+ const parts = text.split("\n");
934
+ return parts.map((p) => p ? `${prefix}${p}${suffix}` : "").join("\n");
1348
935
  }
1349
- function _build_merged_meta_block(states_list, comments_map) {
1350
- const change_lines = [];
1351
- const comment_lines = [];
1352
- const seen_sigs = /* @__PURE__ */ new Set();
1353
- for (const [ins_map, del_map, comments_set, fmt_map] of states_list) {
1354
- let render_comment2 = function(cid) {
1355
- if (!comments_map[cid]) return;
1356
- const sig = `Com:${cid}`;
1357
- if (seen_sigs.has(sig)) return;
1358
- const data = comments_map[cid];
1359
- let header = `[${sig}] ${data.author}`;
1360
- if (data.date) header += ` @ ${data.date}`;
1361
- if (data.resolved) header += `(RESOLVED)`;
1362
- comment_lines.push(`${header}: ${data.text}`);
1363
- seen_sigs.add(sig);
1364
- if (children_map[cid]) {
1365
- const children = children_map[cid].sort((a, b) => (comments_map[a]?.date || "").localeCompare(comments_map[b]?.date || ""));
1366
- for (const child_id of children) {
1367
- render_comment2(child_id);
1368
- }
1369
- }
1370
- };
1371
- var render_comment = render_comment2;
1372
- for (const [uid, meta] of Object.entries(ins_map)) {
1373
- const sig = `Chg:${uid}`;
1374
- if (!seen_sigs.has(sig)) {
1375
- change_lines.push(`[${sig} insert] ${meta.author || "Unknown"}`);
1376
- seen_sigs.add(sig);
1377
- }
1378
- }
1379
- for (const [uid, meta] of Object.entries(del_map)) {
1380
- const sig = `Chg:${uid}`;
1381
- if (!seen_sigs.has(sig)) {
1382
- change_lines.push(`[${sig} delete] ${meta.author || "Unknown"}`);
1383
- seen_sigs.add(sig);
1384
- }
1385
- }
1386
- for (const [uid, meta] of Object.entries(fmt_map)) {
1387
- const sig = `Chg:${uid}`;
1388
- if (!seen_sigs.has(sig)) {
1389
- change_lines.push(`[${sig} format] ${meta.author || "Unknown"}`);
1390
- seen_sigs.add(sig);
1391
- }
1392
- }
1393
- const children_map = {};
1394
- for (const [c_id, data] of Object.entries(comments_map)) {
1395
- const p_id = data.parent_id;
1396
- if (p_id) {
1397
- if (!children_map[p_id]) children_map[p_id] = [];
1398
- children_map[p_id].push(c_id);
1399
- }
1400
- }
1401
- const sorted_ids = Array.from(comments_set).sort();
1402
- for (const c_id of sorted_ids) {
1403
- render_comment2(c_id);
936
+ function get_run_text(run) {
937
+ let text = "";
938
+ for (let i = 0; i < run._element.childNodes.length; i++) {
939
+ const child = run._element.childNodes[i];
940
+ if (child.nodeType !== 1) continue;
941
+ if (child.tagName === QN_W_T || child.tagName === QN_W_DELTEXT) {
942
+ const raw = child.textContent || "";
943
+ text += raw.replace(/\t/g, " ");
944
+ } else if (child.tagName === QN_W_TAB) {
945
+ text += " ";
946
+ } else if (child.tagName === QN_W_BR || child.tagName === QN_W_CR) {
947
+ text += "\n";
1404
948
  }
1405
949
  }
1406
- return [...change_lines, ...comment_lines].join("\n");
950
+ return text;
1407
951
  }
1408
-
1409
- // src/mapper.ts
1410
- var DocumentMapper = class {
1411
- doc;
1412
- clean_view;
1413
- comments_map;
1414
- full_text = "";
1415
- spans = [];
1416
- appendix_start_index = -1;
1417
- _text_chunks = [];
1418
- constructor(doc, clean_view = false) {
1419
- this.doc = doc;
1420
- this.clean_view = clean_view;
1421
- this.comments_map = extract_comments_data(doc.pkg);
1422
- this._build_map();
1423
- }
1424
- _build_map() {
1425
- let current_offset = 0;
1426
- this.spans = [];
1427
- this._text_chunks = [];
1428
- this.full_text = "";
1429
- for (const part of iter_document_parts(this.doc)) {
1430
- current_offset = this._map_blocks(part, current_offset);
1431
- if (this.spans.length > 0 && this.spans[this.spans.length - 1].text !== "\n\n") {
1432
- this._add_virtual_text("\n\n", current_offset, null);
1433
- current_offset += 2;
1434
- }
1435
- }
1436
- while (this.spans.length > 0 && this.spans[this.spans.length - 1].text === "\n\n") {
1437
- this.spans.pop();
1438
- this._text_chunks.pop();
952
+ function* iter_block_items(parent) {
953
+ const parent_elm = parent._element || parent.element || parent;
954
+ if (parent.constructor.name === "NotesPart") {
955
+ const tag = parent.note_type === "fn" ? "w:footnote" : "w:endnote";
956
+ const notes = findAllDescendants(parent_elm, tag);
957
+ for (const child of notes) {
958
+ if (child.getAttribute("w:type") === "separator" || child.getAttribute("w:type") === "continuationSeparator") continue;
959
+ yield new FootnoteItem(child, parent, parent.note_type);
1439
960
  }
1440
- this.full_text = this._text_chunks.join("");
1441
- this.appendix_start_index = -1;
961
+ return;
962
+ }
963
+ for (let i = 0; i < parent_elm.childNodes.length; i++) {
964
+ const child = parent_elm.childNodes[i];
965
+ if (child.nodeType !== 1) continue;
966
+ if (child.tagName === QN_W_P) {
967
+ yield new Paragraph(child, parent);
968
+ } else if (child.tagName === "w:tbl") {
969
+ yield new Table(child, parent);
970
+ }
971
+ }
972
+ }
973
+ function* iter_document_parts(doc) {
974
+ yield doc;
975
+ const fnPart = doc.pkg.getPartByPath("word/footnotes.xml");
976
+ const enPart = doc.pkg.getPartByPath("word/endnotes.xml");
977
+ if (fnPart) yield new NotesPart(fnPart, "fn");
978
+ if (enPart) yield new NotesPart(enPart, "en");
979
+ }
980
+ function _is_page_instr(instr) {
981
+ if (!instr) return false;
982
+ const parts = instr.toUpperCase().trim().split(/\s+/);
983
+ return parts.length > 0 && (parts[0] === "PAGE" || parts[0] === "NUMPAGES");
984
+ }
985
+ function _get_part(parent) {
986
+ if (!parent) return null;
987
+ if (parent.part) return parent.part;
988
+ if (parent.pkg && parent.pkg.mainDocumentPart) return parent.pkg.mainDocumentPart;
989
+ if (parent._parent) return _get_part(parent._parent);
990
+ return null;
991
+ }
992
+ function* iter_paragraph_content(paragraph) {
993
+ let in_complex_field = false;
994
+ let current_instr = "";
995
+ let hide_result = false;
996
+ function* process_run_element(r_element) {
997
+ let c_id = null;
998
+ const rPr = findChild(r_element, QN_W_RPR);
999
+ if (rPr) {
1000
+ const rPrChange = findChild(rPr, QN_W_RPRCHANGE);
1001
+ if (rPrChange) {
1002
+ c_id = rPrChange.getAttribute(QN_W_ID);
1003
+ yield { type: "fmt_start", id: c_id, author: rPrChange.getAttribute(QN_W_AUTHOR) || void 0, date: rPrChange.getAttribute(QN_W_DATE) || void 0 };
1004
+ }
1005
+ }
1006
+ for (let i = 0; i < r_element.childNodes.length; i++) {
1007
+ const child = r_element.childNodes[i];
1008
+ if (child.nodeType !== 1) continue;
1009
+ const tag = child.tagName;
1010
+ if (tag === QN_W_COMMENTREFERENCE) {
1011
+ const ref_id = child.getAttribute(QN_W_ID);
1012
+ if (ref_id) yield { type: "ref", id: ref_id };
1013
+ } else if (tag === QN_W_FOOTNOTEREFERENCE) {
1014
+ const f_id = child.getAttribute(QN_W_ID);
1015
+ if (f_id) yield { type: "footnote", id: f_id };
1016
+ } else if (tag === QN_W_ENDNOTEREFERENCE) {
1017
+ const e_id = child.getAttribute(QN_W_ID);
1018
+ if (e_id) yield { type: "endnote", id: e_id };
1019
+ } else if (tag === QN_W_FLDCHAR) {
1020
+ const fld_type = child.getAttribute(QN_W_FLDCHARTYPE);
1021
+ if (fld_type === "begin") {
1022
+ in_complex_field = true;
1023
+ current_instr = "";
1024
+ } else if (fld_type === "separate") {
1025
+ if (_is_page_instr(current_instr)) hide_result = true;
1026
+ else {
1027
+ const parts = current_instr.trim().split(/\s+/);
1028
+ if (parts.length > 1 && parts[0] === "REF") yield { type: "xref_start", id: parts[1] };
1029
+ }
1030
+ } else if (fld_type === "end") {
1031
+ if (!hide_result) {
1032
+ const parts = current_instr.trim().split(/\s+/);
1033
+ if (parts.length > 1 && parts[0] === "REF") yield { type: "xref_end", id: parts[1] };
1034
+ }
1035
+ in_complex_field = false;
1036
+ current_instr = "";
1037
+ hide_result = false;
1038
+ }
1039
+ } else if (tag === QN_W_INSTRTEXT && in_complex_field && !hide_result) {
1040
+ current_instr += child.textContent || "";
1041
+ }
1042
+ }
1043
+ if (!hide_result) yield new Run(r_element, paragraph);
1044
+ if (c_id !== null) yield { type: "fmt_end", id: c_id };
1045
+ }
1046
+ function* traverse_node(node) {
1047
+ for (let i = 0; i < node.childNodes.length; i++) {
1048
+ const child = node.childNodes[i];
1049
+ if (child.nodeType !== 1) continue;
1050
+ const tag = child.tagName;
1051
+ if (tag === QN_W_R) yield* process_run_element(child);
1052
+ else if (tag === QN_W_INS) {
1053
+ const i_id = child.getAttribute(QN_W_ID);
1054
+ yield { type: "ins_start", id: i_id, author: child.getAttribute(QN_W_AUTHOR) || void 0, date: child.getAttribute(QN_W_DATE) || void 0 };
1055
+ yield* traverse_node(child);
1056
+ yield { type: "ins_end", id: i_id };
1057
+ } else if (tag === QN_W_DEL) {
1058
+ const d_id = child.getAttribute(QN_W_ID);
1059
+ yield { type: "del_start", id: d_id, author: child.getAttribute(QN_W_AUTHOR) || void 0, date: child.getAttribute(QN_W_DATE) || void 0 };
1060
+ yield* traverse_node(child);
1061
+ yield { type: "del_end", id: d_id };
1062
+ } else if (tag === QN_W_COMMENTRANGESTART) yield { type: "start", id: child.getAttribute(QN_W_ID) };
1063
+ else if (tag === QN_W_COMMENTRANGEEND) yield { type: "end", id: child.getAttribute(QN_W_ID) };
1064
+ else if (tag === QN_W_HYPERLINK) {
1065
+ const rId = child.getAttribute(QN_R_ID) || child.getAttribute("id");
1066
+ let url = "";
1067
+ const part = _get_part(paragraph._parent);
1068
+ if (rId && part) {
1069
+ const rel = part.rels.get(rId);
1070
+ if (rel && rel.isExternal) url = rel.target;
1071
+ }
1072
+ if (url) yield { type: "hyperlink_start", id: rId, date: url };
1073
+ yield* traverse_node(child);
1074
+ if (url) yield { type: "hyperlink_end", id: rId, date: url };
1075
+ } else if (tag === QN_W_FLDSIMPLE) {
1076
+ const instr = child.getAttribute(QN_W_INSTR) || "";
1077
+ const parts = instr.trim().split(/\s+/);
1078
+ const target = parts.length > 1 && parts[0] === "REF" ? parts[1] : "";
1079
+ if (target) yield { type: "xref_start", id: target };
1080
+ yield* traverse_node(child);
1081
+ if (target) yield { type: "xref_end", id: target };
1082
+ } else if (tag === QN_W_BOOKMARKSTART) {
1083
+ const b_name = child.getAttribute(QN_W_NAME);
1084
+ if (b_name && (!b_name.startsWith("_") || b_name.startsWith("_Ref"))) yield { type: "bookmark", id: b_name };
1085
+ } else if (tag === QN_W_SDT || tag === QN_W_SMARTTAG || tag === QN_W_SDTCONTENT) {
1086
+ yield* traverse_node(child);
1087
+ }
1088
+ }
1089
+ }
1090
+ yield* traverse_node(paragraph._element);
1091
+ }
1092
+
1093
+ // src/mapper.ts
1094
+ var DocumentMapper = class {
1095
+ doc;
1096
+ clean_view;
1097
+ comments_map;
1098
+ full_text = "";
1099
+ spans = [];
1100
+ appendix_start_index = -1;
1101
+ _text_chunks = [];
1102
+ constructor(doc, clean_view = false) {
1103
+ this.doc = doc;
1104
+ this.clean_view = clean_view;
1105
+ this.comments_map = extract_comments_data(doc.pkg);
1106
+ this._build_map();
1107
+ }
1108
+ _build_map() {
1109
+ let current_offset = 0;
1110
+ this.spans = [];
1111
+ this._text_chunks = [];
1112
+ this.full_text = "";
1113
+ for (const part of iter_document_parts(this.doc)) {
1114
+ current_offset = this._map_blocks(part, current_offset);
1115
+ if (this.spans.length > 0 && this.spans[this.spans.length - 1].text !== "\n\n") {
1116
+ this._add_virtual_text("\n\n", current_offset, null);
1117
+ current_offset += 2;
1118
+ }
1119
+ }
1120
+ while (this.spans.length > 0 && this.spans[this.spans.length - 1].text === "\n\n") {
1121
+ this.spans.pop();
1122
+ this._text_chunks.pop();
1123
+ }
1124
+ this.full_text = this._text_chunks.join("");
1125
+ this.appendix_start_index = -1;
1442
1126
  }
1443
1127
  _map_blocks(container, offset) {
1444
1128
  let current = offset;
@@ -2248,6 +1932,50 @@ function generate_edits_from_text(original_text, modified_text) {
2248
1932
  }
2249
1933
  return edits;
2250
1934
  }
1935
+ function create_unified_diff(original_text, modified_text, context_lines = 3) {
1936
+ const dmp = new diff_match_patch.diff_match_patch();
1937
+ const a = dmp.diff_linesToChars_(original_text, modified_text);
1938
+ const diffs = dmp.diff_main(a.chars1, a.chars2, false);
1939
+ dmp.diff_charsToLines_(diffs, a.lineArray);
1940
+ const output = [];
1941
+ output.push("--- Original");
1942
+ output.push("+++ Modified");
1943
+ let i = 0;
1944
+ while (i < diffs.length) {
1945
+ while (i < diffs.length && diffs[i][0] === 0) i++;
1946
+ if (i >= diffs.length) break;
1947
+ let start = i;
1948
+ let preContext = [];
1949
+ if (start > 0 && diffs[start - 1][0] === 0) {
1950
+ const lines = diffs[start - 1][1].replace(/\n$/, "").split("\n");
1951
+ preContext = lines.slice(-context_lines);
1952
+ }
1953
+ const chunk = [];
1954
+ chunk.push(...preContext.map((l) => ` ${l}`));
1955
+ while (i < diffs.length) {
1956
+ const [op, text] = diffs[i];
1957
+ const lines = text.replace(/\n$/, "").split("\n");
1958
+ if (op === 0) {
1959
+ if (lines.length > context_lines * 2) break;
1960
+ chunk.push(...lines.map((l) => ` ${l}`));
1961
+ } else {
1962
+ const prefix = op === -1 ? "-" : "+";
1963
+ chunk.push(...lines.map((l) => `${prefix}${l}`));
1964
+ }
1965
+ i++;
1966
+ }
1967
+ let postContext = [];
1968
+ if (i < diffs.length && diffs[i][0] === 0) {
1969
+ const lines = diffs[i][1].replace(/\n$/, "").split("\n");
1970
+ postContext = lines.slice(0, context_lines);
1971
+ }
1972
+ chunk.push(...postContext.map((l) => ` ${l}`));
1973
+ output.push("@@ ... @@");
1974
+ output.push(...chunk);
1975
+ }
1976
+ if (output.length === 2) return "";
1977
+ return output.join("\n");
1978
+ }
2251
1979
 
2252
1980
  // src/engine.ts
2253
1981
  function insertAfter(newNode, refNode) {
@@ -3195,85 +2923,664 @@ function _split_on_safe_paragraph_breaks(text) {
3195
2923
  continue;
3196
2924
  }
3197
2925
  }
3198
- i++;
2926
+ i++;
2927
+ }
2928
+ if (block_start < n) {
2929
+ const block_text = text.substring(block_start, n);
2930
+ if (block_text) blocks.push([block_text, block_start]);
2931
+ }
2932
+ return blocks;
2933
+ }
2934
+ function _merge_footnote_sections(blocks) {
2935
+ if (!blocks.length) return blocks;
2936
+ const merged = [];
2937
+ let i = 0;
2938
+ while (i < blocks.length) {
2939
+ const [block_text, block_offset] = blocks[i];
2940
+ const stripped = block_text.trimStart();
2941
+ const is_section_header = stripped.startsWith("## Footnotes") || stripped.startsWith("## Endnotes");
2942
+ if (!is_section_header) {
2943
+ merged.push([block_text, block_offset]);
2944
+ i++;
2945
+ continue;
2946
+ }
2947
+ let accumulated_text = block_text;
2948
+ let j = i + 1;
2949
+ while (j < blocks.length) {
2950
+ const [next_text] = blocks[j];
2951
+ const next_stripped = next_text.trimStart();
2952
+ if (next_stripped.startsWith("[^fn-") || next_stripped.startsWith("[^en-")) {
2953
+ accumulated_text = `${accumulated_text}
2954
+
2955
+ ${next_text}`;
2956
+ j++;
2957
+ } else {
2958
+ break;
2959
+ }
2960
+ }
2961
+ merged.push([accumulated_text, block_offset]);
2962
+ i = j;
2963
+ }
2964
+ return merged;
2965
+ }
2966
+ function _assemble_pages(block_records) {
2967
+ if (!block_records.length) return [[""], [0]];
2968
+ const pages = [];
2969
+ const page_starts = [];
2970
+ let current_blocks = [];
2971
+ let current_size = 0;
2972
+ let current_start = -1;
2973
+ const flush_current = () => {
2974
+ if (current_blocks.length > 0) {
2975
+ pages.push(current_blocks.join("\n\n"));
2976
+ page_starts.push(current_start);
2977
+ }
2978
+ current_blocks = [];
2979
+ current_size = 0;
2980
+ current_start = -1;
2981
+ };
2982
+ for (const [block_text, block_offset] of block_records) {
2983
+ const block_size = block_text.length;
2984
+ const added_size = block_size + (current_blocks.length > 0 ? 2 : 0);
2985
+ if (current_blocks.length > 0 && current_size + added_size > PAGE_TARGET_CHARS) {
2986
+ flush_current();
2987
+ }
2988
+ if (current_blocks.length === 0 && block_size > PAGE_TARGET_CHARS) {
2989
+ pages.push(block_text);
2990
+ page_starts.push(block_offset);
2991
+ continue;
2992
+ }
2993
+ if (current_blocks.length === 0) current_start = block_offset;
2994
+ current_blocks.push(block_text);
2995
+ current_size += current_size > 0 ? added_size : block_size;
2996
+ }
2997
+ flush_current();
2998
+ if (!pages.length) return [[""], [0]];
2999
+ return [pages, page_starts];
3000
+ }
3001
+ function _count_tracked_changes(page_content) {
3002
+ const matches = [...page_content.matchAll(_CHG_ID_PATTERN)];
3003
+ const distinct = new Set(matches.map((m) => m[1]));
3004
+ return distinct.size;
3005
+ }
3006
+
3007
+ // src/domain.ts
3008
+ function boundedLevenshtein(a, b, maxDist = 2) {
3009
+ if (a === b) return 0;
3010
+ if (Math.abs(a.length - b.length) > maxDist) return maxDist + 1;
3011
+ if (a.length === 0) return b.length <= maxDist ? b.length : maxDist + 1;
3012
+ if (b.length === 0) return a.length <= maxDist ? a.length : maxDist + 1;
3013
+ if (a.length > b.length) {
3014
+ const temp = a;
3015
+ a = b;
3016
+ b = temp;
3017
+ }
3018
+ let row = Array.from({ length: a.length + 1 }, (_, i) => i);
3019
+ for (let i = 1; i <= b.length; i++) {
3020
+ const newRow = [i];
3021
+ let minInRow = i;
3022
+ for (let j = 1; j <= a.length; j++) {
3023
+ const cost = a[j - 1] === b[i - 1] ? 0 : 1;
3024
+ const val = Math.min(
3025
+ row[j] + 1,
3026
+ newRow[j - 1] + 1,
3027
+ row[j - 1] + cost
3028
+ );
3029
+ newRow.push(val);
3030
+ if (val < minInRow) minInRow = val;
3031
+ }
3032
+ if (minInRow > maxDist) return maxDist + 1;
3033
+ row = newRow;
3034
+ }
3035
+ return row[a.length] <= maxDist ? row[a.length] : maxDist + 1;
3036
+ }
3037
+ function _get_paragraph_text(p) {
3038
+ let text = "";
3039
+ const runs = findAllDescendants(p._element, "w:r");
3040
+ for (const r of runs) {
3041
+ text += get_run_text(new Run(r, p));
3042
+ }
3043
+ return text;
3044
+ }
3045
+ function extract_all_domain_metadata(doc, base_text) {
3046
+ const definitions = {};
3047
+ const duplicates = /* @__PURE__ */ new Set();
3048
+ const raw_anchors = {};
3049
+ const raw_references = [];
3050
+ const leading_re = /^(?:[\d.\-()a-zA-Z]+\s*)?["“]([A-Z][A-Za-z0-9\s\-&'’]{1,60})["”]/;
3051
+ const inline_re = /\([^)]*?["“]([A-Z][A-Za-z0-9\s\-&'’]{1,60})["”][^)]*?\)/g;
3052
+ for (const item of iter_block_items(doc)) {
3053
+ if (!(item instanceof Paragraph)) continue;
3054
+ const text = _get_paragraph_text(item).trim();
3055
+ if (!text) continue;
3056
+ const extracted_terms = [];
3057
+ const leading_match = text.match(leading_re);
3058
+ if (leading_match) extracted_terms.push(leading_match[1].trim());
3059
+ const inline_matches = text.matchAll(inline_re);
3060
+ for (const m of inline_matches) {
3061
+ extracted_terms.push(m[1].trim());
3062
+ }
3063
+ for (const term of extracted_terms) {
3064
+ if (definitions[term]) duplicates.add(term);
3065
+ else definitions[term] = { count: 0 };
3066
+ }
3067
+ const short_text = text.length > 60 ? text.substring(0, 60) + "..." : text;
3068
+ const nodes = findAllDescendants(item._element, "*");
3069
+ for (const node of nodes) {
3070
+ if (node.tagName === "w:bookmarkStart") {
3071
+ const b_name = node.getAttribute("w:name");
3072
+ if (b_name && (!b_name.startsWith("_") || b_name.startsWith("_Ref"))) {
3073
+ if (!raw_anchors[b_name]) {
3074
+ raw_anchors[b_name] = { anchored_to: short_text, referenced_from: [] };
3075
+ }
3076
+ }
3077
+ }
3078
+ let target = null;
3079
+ if (node.tagName === "w:fldSimple") {
3080
+ const instr = node.getAttribute("w:instr") || "";
3081
+ const parts = instr.trim().split(/\s+/);
3082
+ if (parts.length > 1 && parts[0] === "REF") target = parts[1];
3083
+ } else if (node.tagName === "w:instrText") {
3084
+ const instr = node.textContent || "";
3085
+ const parts = instr.trim().split(/\s+/);
3086
+ if (parts.length > 1 && parts[0] === "REF") target = parts[1];
3087
+ }
3088
+ if (target) raw_references.push([target, short_text]);
3089
+ }
3090
+ }
3091
+ for (const [target, ref_text] of raw_references) {
3092
+ if (raw_anchors[target]) {
3093
+ raw_anchors[target].referenced_from.push(ref_text);
3094
+ }
3095
+ }
3096
+ const diagnostics = [];
3097
+ const def_keys = Object.keys(definitions);
3098
+ if (def_keys.length > 0) {
3099
+ const sorted_terms = def_keys.sort((a, b) => b.length - a.length);
3100
+ const escapeRegExp = (str) => str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
3101
+ const alt = sorted_terms.map(escapeRegExp).join("|");
3102
+ const usage_pattern = new RegExp(`(?<!["\u201C])\\b(${alt})\\b(?![\u201D"])`, "g");
3103
+ for (const m of base_text.matchAll(usage_pattern)) {
3104
+ const matched_term = m[1];
3105
+ if (definitions[matched_term]) definitions[matched_term].count++;
3106
+ }
3107
+ for (const term of def_keys) {
3108
+ if (definitions[term].count === 0) {
3109
+ delete definitions[term];
3110
+ duplicates.delete(term);
3111
+ }
3112
+ }
3113
+ }
3114
+ for (const term of duplicates) {
3115
+ diagnostics.push(`[Error] Duplicate Definition: '${term}' is defined multiple times.`);
3116
+ }
3117
+ const stop_words = /* @__PURE__ */ new Set([
3118
+ "The",
3119
+ "This",
3120
+ "That",
3121
+ "Such",
3122
+ "A",
3123
+ "An",
3124
+ "Any",
3125
+ "All",
3126
+ "Some",
3127
+ "No",
3128
+ "Every",
3129
+ "Each",
3130
+ "As",
3131
+ "In",
3132
+ "Of",
3133
+ "For",
3134
+ "To",
3135
+ "On",
3136
+ "By",
3137
+ "With"
3138
+ ]);
3139
+ const all_cap_pattern = /\b[A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*\b/g;
3140
+ const all_caps = new Set(base_text.match(all_cap_pattern) || []);
3141
+ const valid_terms = new Set(Object.keys(definitions));
3142
+ const terms_by_first_letter = {};
3143
+ for (const term of valid_terms) {
3144
+ const fl = term[0].toLowerCase();
3145
+ if (!terms_by_first_letter[fl]) terms_by_first_letter[fl] = [];
3146
+ terms_by_first_letter[fl].push(term);
3147
+ }
3148
+ const candidates_by_term = {};
3149
+ for (const raw_candidate of all_caps) {
3150
+ let candidate = raw_candidate.trim();
3151
+ const words = candidate.split(/\s+/);
3152
+ while (words.length > 0) {
3153
+ const first = words[0];
3154
+ const title = first.charAt(0).toUpperCase() + first.slice(1).toLowerCase();
3155
+ if (stop_words.has(title)) words.shift();
3156
+ else break;
3157
+ }
3158
+ candidate = words.join(" ");
3159
+ if (candidate.length < 4) continue;
3160
+ if (valid_terms.has(candidate)) continue;
3161
+ const first_letter = candidate[0].toLowerCase();
3162
+ let candidate_terms = terms_by_first_letter[first_letter] || [];
3163
+ if (candidate.length > 5) {
3164
+ for (const [k, v] of Object.entries(terms_by_first_letter)) {
3165
+ if (k !== first_letter) candidate_terms = candidate_terms.concat(v);
3166
+ }
3167
+ }
3168
+ for (const term of candidate_terms) {
3169
+ if (Math.abs(candidate.length - term.length) > 2) continue;
3170
+ if (candidate === term + "s" || candidate === term + "es") continue;
3171
+ if (term === candidate + "s" || term === candidate + "es") continue;
3172
+ const dist = boundedLevenshtein(candidate, term, 2);
3173
+ if (dist === 0 || dist > 2) continue;
3174
+ if (term.length <= 5) {
3175
+ if (dist > 1) continue;
3176
+ if (candidate[0].toLowerCase() !== term[0].toLowerCase()) continue;
3177
+ }
3178
+ if (!candidates_by_term[term]) candidates_by_term[term] = [];
3179
+ if (!candidates_by_term[term].includes(candidate)) candidates_by_term[term].push(candidate);
3180
+ }
3181
+ }
3182
+ for (const [term, candidates] of Object.entries(candidates_by_term)) {
3183
+ candidates.sort();
3184
+ const c_str = candidates.map((c) => `'${c}'`).join(", ");
3185
+ diagnostics.push(`[Info] Possible Typos for '${term}': Found ${c_str}`);
3186
+ }
3187
+ function diag_sort_key(msg) {
3188
+ if (msg.startsWith("[Error]")) return 0;
3189
+ if (msg.startsWith("[Warning]")) return 1;
3190
+ return 2;
3191
+ }
3192
+ diagnostics.sort((a, b) => {
3193
+ const keyA = diag_sort_key(a);
3194
+ const keyB = diag_sort_key(b);
3195
+ if (keyA !== keyB) return keyA - keyB;
3196
+ return a.localeCompare(b);
3197
+ });
3198
+ return [definitions, diagnostics, raw_anchors];
3199
+ }
3200
+ function build_structural_appendix(doc, base_text) {
3201
+ const [defs, diagnostics, anchors] = extract_all_domain_metadata(doc, base_text);
3202
+ const lines = [
3203
+ "\n\n---",
3204
+ "",
3205
+ "<!-- READONLY_BOUNDARY_START -->",
3206
+ "# Document Structure (Read-Only)",
3207
+ "The content below is metadata describing the document's reference structure. Do not include this section in any tracked changes or edits \u2014 it is for your context only and will be discarded on write."
3208
+ ];
3209
+ let has_content = false;
3210
+ if (Object.keys(defs).length > 0) {
3211
+ has_content = true;
3212
+ lines.push("\n## Defined Terms");
3213
+ for (const [term, data] of Object.entries(defs)) {
3214
+ lines.push(`- "${term}" \u2014 used ${data.count} times.`);
3215
+ }
3216
+ }
3217
+ if (diagnostics.length > 0) {
3218
+ has_content = true;
3219
+ lines.push("\n## Semantic Diagnostics");
3220
+ for (const diag of diagnostics) {
3221
+ lines.push(`- ${diag}`);
3222
+ }
3223
+ }
3224
+ if (Object.keys(anchors).length > 0) {
3225
+ has_content = true;
3226
+ lines.push("\n## Named Anchors");
3227
+ for (const [b_name, data] of Object.entries(anchors)) {
3228
+ lines.push(`- ${b_name} \u2192 Anchored to: "${data.anchored_to}"`);
3229
+ for (const ref of data.referenced_from) {
3230
+ lines.push(` - Referenced from: "${ref}"`);
3231
+ }
3232
+ }
3233
+ }
3234
+ if (has_content) {
3235
+ return lines.join("\n");
3236
+ }
3237
+ return "";
3238
+ }
3239
+
3240
+ // src/ingest.ts
3241
+ async function extractTextFromBuffer(buffer, cleanView = false) {
3242
+ const doc = await DocumentObject.load(buffer);
3243
+ return _extractTextFromDoc(doc, cleanView);
3244
+ }
3245
+ function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
3246
+ const comments_map = extract_comments_data(doc.pkg);
3247
+ const full_text = [];
3248
+ let cursor = 0;
3249
+ for (const part of iter_document_parts(doc)) {
3250
+ const part_cursor = full_text.length > 0 ? cursor + 2 : cursor;
3251
+ const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor);
3252
+ if (part_text) {
3253
+ if (full_text.length > 0) cursor += 2;
3254
+ full_text.push(part_text);
3255
+ cursor += part_text.length;
3256
+ }
3257
+ }
3258
+ let base_text = full_text.join("\n\n");
3259
+ if (includeAppendix) {
3260
+ const appendix = build_structural_appendix(doc, base_text);
3261
+ if (appendix) base_text += appendix;
3262
+ }
3263
+ return base_text;
3264
+ }
3265
+ function _extract_blocks(container, comments_map, cleanView, cursor) {
3266
+ const part = container.part || container;
3267
+ const [style_cache, default_pstyle] = _get_style_cache(part);
3268
+ const blocks = [];
3269
+ let local_cursor = cursor;
3270
+ let is_first_block = true;
3271
+ let is_first_para = true;
3272
+ if (container.constructor && container.constructor.name === "NotesPart") {
3273
+ const header = container.note_type === "fn" ? "## Footnotes" : "## Endnotes";
3274
+ const sep = `---
3275
+ ${header}`;
3276
+ blocks.push(sep);
3277
+ local_cursor += sep.length;
3278
+ is_first_block = false;
3279
+ }
3280
+ for (const item of iter_block_items(container)) {
3281
+ if (!is_first_block) local_cursor += 2;
3282
+ const block_start = local_cursor;
3283
+ if (item.constructor.name === "FootnoteItem") {
3284
+ const fn_text = _extract_blocks(item, comments_map, cleanView, block_start);
3285
+ if (fn_text) {
3286
+ blocks.push(fn_text);
3287
+ local_cursor = block_start + fn_text.length;
3288
+ is_first_block = false;
3289
+ } else if (!is_first_block) {
3290
+ local_cursor -= 2;
3291
+ }
3292
+ } else if (item instanceof Paragraph) {
3293
+ let prefix = get_paragraph_prefix(item, style_cache, default_pstyle);
3294
+ if (is_first_para && container.constructor.name === "FootnoteItem") {
3295
+ prefix = `[^${container.note_type}-${container.id}]: ` + prefix;
3296
+ }
3297
+ const p_text = build_paragraph_text(item, comments_map, cleanView, style_cache, default_pstyle);
3298
+ const full_block = prefix + p_text;
3299
+ blocks.push(full_block);
3300
+ local_cursor = block_start + full_block.length;
3301
+ is_first_para = false;
3302
+ is_first_block = false;
3303
+ } else if (item instanceof Table) {
3304
+ const table_text = extract_table(item, comments_map, cleanView, block_start);
3305
+ if (table_text) {
3306
+ blocks.push(table_text);
3307
+ local_cursor = block_start + table_text.length;
3308
+ is_first_block = false;
3309
+ } else if (!is_first_block) {
3310
+ local_cursor -= 2;
3311
+ }
3312
+ is_first_para = false;
3313
+ }
3314
+ }
3315
+ return blocks.join("\n\n");
3316
+ }
3317
+ function extract_table(table, comments_map, cleanView, cursor) {
3318
+ const rows_text = [];
3319
+ let rows_processed = 0;
3320
+ let local_cursor = cursor;
3321
+ for (const row of table.rows) {
3322
+ const cell_texts = [];
3323
+ const seen_cells = /* @__PURE__ */ new Set();
3324
+ const trPr = findChild(row._element, "w:trPr");
3325
+ const ins = trPr ? findChild(trPr, "w:ins") : null;
3326
+ const del_node = trPr ? findChild(trPr, "w:del") : null;
3327
+ if (cleanView && del_node) continue;
3328
+ const row_start = local_cursor + (rows_processed > 0 ? 1 : 0);
3329
+ const wrapper_prefix_len = !cleanView && ins ? 4 : !cleanView && del_node ? 4 : 0;
3330
+ let cell_cursor = row_start + wrapper_prefix_len;
3331
+ let first_cell = true;
3332
+ for (const cell of row.cells) {
3333
+ if (seen_cells.has(cell)) continue;
3334
+ seen_cells.add(cell);
3335
+ if (!first_cell) cell_cursor += 3;
3336
+ const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor);
3337
+ cell_texts.push(cell_content);
3338
+ cell_cursor += cell_content.length;
3339
+ first_cell = false;
3340
+ }
3341
+ let row_str = cell_texts.join(" | ");
3342
+ if (!cleanView) {
3343
+ if (ins) row_str = `{++ ${row_str} |Chg:${ins.getAttribute("w:id")}++}`;
3344
+ else if (del_node) row_str = `{-- ${row_str} |Chg:${del_node.getAttribute("w:id")}--}`;
3345
+ }
3346
+ rows_text.push(row_str);
3347
+ local_cursor = row_start + row_str.length;
3348
+ rows_processed++;
3349
+ }
3350
+ return rows_text.join("\n");
3351
+ }
3352
+ function build_paragraph_text(paragraph, comments_map, cleanView, style_cache, default_pstyle) {
3353
+ const parts = [];
3354
+ const active_ins = {};
3355
+ const active_del = {};
3356
+ const active_comments = /* @__PURE__ */ new Set();
3357
+ const active_fmt = {};
3358
+ const deferred_meta_states = [];
3359
+ let pending_text = "";
3360
+ let current_wrappers = ["", ""];
3361
+ let current_style = ["", ""];
3362
+ const items = Array.from(iter_paragraph_content(paragraph));
3363
+ const is_heading = is_heading_paragraph(paragraph, style_cache, default_pstyle);
3364
+ const native_heading = is_native_heading(paragraph, style_cache, default_pstyle);
3365
+ let leading_strip_active = is_heading;
3366
+ for (let i = 0; i < items.length; i++) {
3367
+ const item = items[i];
3368
+ if (item instanceof Run) {
3369
+ const [prefix, suffix] = get_run_style_markers(item, native_heading);
3370
+ const text = get_run_text(item);
3371
+ if (cleanView && Object.keys(active_del).length > 0) continue;
3372
+ if (leading_strip_active) {
3373
+ if (!text || !text.trim()) continue;
3374
+ leading_strip_active = false;
3375
+ }
3376
+ const seg = apply_formatting_to_segments(text, prefix, suffix);
3377
+ if (seg) {
3378
+ const new_wrappers = cleanView ? ["", ""] : _get_wrappers(active_ins, active_del, active_comments, active_fmt);
3379
+ const new_style = [prefix, suffix];
3380
+ if (pending_text && new_wrappers[0] === current_wrappers[0] && new_wrappers[1] === current_wrappers[1]) {
3381
+ if (new_style[0] === current_style[0] && new_style[1] === current_style[1] && current_style[0] !== "" && pending_text.endsWith(current_style[1]) && seg.startsWith(new_style[0])) {
3382
+ pending_text = pending_text.slice(0, -current_style[1].length) + seg.slice(new_style[0].length);
3383
+ } else {
3384
+ pending_text += seg;
3385
+ }
3386
+ current_style = new_style;
3387
+ } else {
3388
+ if (pending_text) parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
3389
+ pending_text = seg;
3390
+ current_wrappers = new_wrappers;
3391
+ current_style = new_style;
3392
+ }
3393
+ if (!cleanView) {
3394
+ const has_meta = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || active_comments.size > 0 || Object.keys(active_fmt).length > 0;
3395
+ if (has_meta) {
3396
+ deferred_meta_states.push([{ ...active_ins }, { ...active_del }, new Set(active_comments), { ...active_fmt }]);
3397
+ }
3398
+ let should_defer = false;
3399
+ const is_redline = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || Object.keys(active_fmt).length > 0;
3400
+ if (is_redline) {
3401
+ let j = i + 1;
3402
+ let next_is_redline = false;
3403
+ let temp_ins = Object.keys(active_ins).length;
3404
+ let temp_del = Object.keys(active_del).length;
3405
+ let temp_fmt = Object.keys(active_fmt).length;
3406
+ while (j < items.length) {
3407
+ const next_item = items[j];
3408
+ if (next_item instanceof Run) {
3409
+ if (!get_run_text(next_item)) {
3410
+ j++;
3411
+ continue;
3412
+ }
3413
+ if (temp_ins > 0 || temp_del > 0 || temp_fmt > 0) next_is_redline = true;
3414
+ break;
3415
+ } else {
3416
+ const ev = next_item;
3417
+ if (ev.type === "ins_start") temp_ins++;
3418
+ else if (ev.type === "ins_end") temp_ins = Math.max(0, temp_ins - 1);
3419
+ else if (ev.type === "del_start") temp_del++;
3420
+ else if (ev.type === "del_end") temp_del = Math.max(0, temp_del - 1);
3421
+ else if (ev.type === "fmt_start") temp_fmt++;
3422
+ else if (ev.type === "fmt_end") temp_fmt = Math.max(0, temp_fmt - 1);
3423
+ }
3424
+ j++;
3425
+ }
3426
+ if (next_is_redline) should_defer = true;
3427
+ }
3428
+ if (!should_defer && deferred_meta_states.length > 0) {
3429
+ const meta_block = _build_merged_meta_block(deferred_meta_states, comments_map);
3430
+ if (meta_block) {
3431
+ if (pending_text) {
3432
+ parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
3433
+ pending_text = "";
3434
+ current_wrappers = ["", ""];
3435
+ current_style = ["", ""];
3436
+ }
3437
+ parts.push(`{>>${meta_block}<<}`);
3438
+ }
3439
+ deferred_meta_states.length = 0;
3440
+ }
3441
+ }
3442
+ }
3443
+ } else {
3444
+ const ev = item;
3445
+ leading_strip_active = false;
3446
+ if (!["ins_start", "ins_end", "del_start", "del_end", "fmt_start", "fmt_end"].includes(ev.type)) {
3447
+ if (pending_text) {
3448
+ parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
3449
+ pending_text = "";
3450
+ current_wrappers = ["", ""];
3451
+ current_style = ["", ""];
3452
+ }
3453
+ }
3454
+ if (ev.type === "start") active_comments.add(ev.id);
3455
+ else if (ev.type === "end") active_comments.delete(ev.id);
3456
+ else if (ev.type === "ins_start") active_ins[ev.id] = ev;
3457
+ else if (ev.type === "ins_end") delete active_ins[ev.id];
3458
+ else if (ev.type === "del_start") active_del[ev.id] = ev;
3459
+ else if (ev.type === "del_end") delete active_del[ev.id];
3460
+ else if (ev.type === "fmt_start") active_fmt[ev.id] = ev;
3461
+ else if (ev.type === "fmt_end") delete active_fmt[ev.id];
3462
+ else if (ev.type === "footnote" || ev.type === "endnote") {
3463
+ if (pending_text) {
3464
+ parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
3465
+ pending_text = "";
3466
+ current_wrappers = ["", ""];
3467
+ current_style = ["", ""];
3468
+ }
3469
+ parts.push(`[^${ev.type === "footnote" ? "fn" : "en"}-${ev.id}]`);
3470
+ } else if (ev.type === "hyperlink_start") {
3471
+ if (pending_text) {
3472
+ parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
3473
+ pending_text = "";
3474
+ current_wrappers = ["", ""];
3475
+ current_style = ["", ""];
3476
+ }
3477
+ parts.push("[");
3478
+ } else if (ev.type === "hyperlink_end") {
3479
+ if (pending_text) {
3480
+ parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
3481
+ pending_text = "";
3482
+ current_wrappers = ["", ""];
3483
+ current_style = ["", ""];
3484
+ }
3485
+ parts.push(`](${ev.date})`);
3486
+ } else if (ev.type === "xref_start") {
3487
+ if (pending_text) {
3488
+ parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
3489
+ pending_text = "";
3490
+ current_wrappers = ["", ""];
3491
+ current_style = ["", ""];
3492
+ }
3493
+ parts.push("[~");
3494
+ } else if (ev.type === "xref_end") {
3495
+ if (pending_text) {
3496
+ parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
3497
+ pending_text = "";
3498
+ current_wrappers = ["", ""];
3499
+ current_style = ["", ""];
3500
+ }
3501
+ parts.push(`~](#${ev.id})`);
3502
+ } else if (ev.type === "bookmark") {
3503
+ if (pending_text) {
3504
+ parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
3505
+ pending_text = "";
3506
+ current_wrappers = ["", ""];
3507
+ current_style = ["", ""];
3508
+ }
3509
+ parts.push(`{#${ev.id}}`);
3510
+ }
3511
+ }
3199
3512
  }
3200
- if (block_start < n) {
3201
- const block_text = text.substring(block_start, n);
3202
- if (block_text) blocks.push([block_text, block_start]);
3513
+ if (pending_text) parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
3514
+ if (deferred_meta_states.length > 0) {
3515
+ const meta_block = _build_merged_meta_block(deferred_meta_states, comments_map);
3516
+ if (meta_block) parts.push(`{>>${meta_block}<<}`);
3203
3517
  }
3204
- return blocks;
3518
+ return parts.join("");
3205
3519
  }
3206
- function _merge_footnote_sections(blocks) {
3207
- if (!blocks.length) return blocks;
3208
- const merged = [];
3209
- let i = 0;
3210
- while (i < blocks.length) {
3211
- const [block_text, block_offset] = blocks[i];
3212
- const stripped = block_text.trimStart();
3213
- const is_section_header = stripped.startsWith("## Footnotes") || stripped.startsWith("## Endnotes");
3214
- if (!is_section_header) {
3215
- merged.push([block_text, block_offset]);
3216
- i++;
3217
- continue;
3520
+ function _get_wrappers(ins, del, comments, fmt) {
3521
+ if (Object.keys(del).length > 0) return ["{--", "--}"];
3522
+ if (Object.keys(ins).length > 0) return ["{++", "++}"];
3523
+ if (comments.size > 0 || Object.keys(fmt).length > 0) return ["{==", "==}"];
3524
+ return ["", ""];
3525
+ }
3526
+ function _build_merged_meta_block(states_list, comments_map) {
3527
+ const change_lines = [];
3528
+ const comment_lines = [];
3529
+ const seen_sigs = /* @__PURE__ */ new Set();
3530
+ for (const [ins_map, del_map, comments_set, fmt_map] of states_list) {
3531
+ let render_comment2 = function(cid) {
3532
+ if (!comments_map[cid]) return;
3533
+ const sig = `Com:${cid}`;
3534
+ if (seen_sigs.has(sig)) return;
3535
+ const data = comments_map[cid];
3536
+ let header = `[${sig}] ${data.author}`;
3537
+ if (data.date) header += ` @ ${data.date}`;
3538
+ if (data.resolved) header += `(RESOLVED)`;
3539
+ comment_lines.push(`${header}: ${data.text}`);
3540
+ seen_sigs.add(sig);
3541
+ if (children_map[cid]) {
3542
+ const children = children_map[cid].sort((a, b) => (comments_map[a]?.date || "").localeCompare(comments_map[b]?.date || ""));
3543
+ for (const child_id of children) {
3544
+ render_comment2(child_id);
3545
+ }
3546
+ }
3547
+ };
3548
+ var render_comment = render_comment2;
3549
+ for (const [uid, meta] of Object.entries(ins_map)) {
3550
+ const sig = `Chg:${uid}`;
3551
+ if (!seen_sigs.has(sig)) {
3552
+ change_lines.push(`[${sig} insert] ${meta.author || "Unknown"}`);
3553
+ seen_sigs.add(sig);
3554
+ }
3218
3555
  }
3219
- let accumulated_text = block_text;
3220
- let j = i + 1;
3221
- while (j < blocks.length) {
3222
- const [next_text] = blocks[j];
3223
- const next_stripped = next_text.trimStart();
3224
- if (next_stripped.startsWith("[^fn-") || next_stripped.startsWith("[^en-")) {
3225
- accumulated_text = `${accumulated_text}
3226
-
3227
- ${next_text}`;
3228
- j++;
3229
- } else {
3230
- break;
3556
+ for (const [uid, meta] of Object.entries(del_map)) {
3557
+ const sig = `Chg:${uid}`;
3558
+ if (!seen_sigs.has(sig)) {
3559
+ change_lines.push(`[${sig} delete] ${meta.author || "Unknown"}`);
3560
+ seen_sigs.add(sig);
3231
3561
  }
3232
3562
  }
3233
- merged.push([accumulated_text, block_offset]);
3234
- i = j;
3235
- }
3236
- return merged;
3237
- }
3238
- function _assemble_pages(block_records) {
3239
- if (!block_records.length) return [[""], [0]];
3240
- const pages = [];
3241
- const page_starts = [];
3242
- let current_blocks = [];
3243
- let current_size = 0;
3244
- let current_start = -1;
3245
- const flush_current = () => {
3246
- if (current_blocks.length > 0) {
3247
- pages.push(current_blocks.join("\n\n"));
3248
- page_starts.push(current_start);
3563
+ for (const [uid, meta] of Object.entries(fmt_map)) {
3564
+ const sig = `Chg:${uid}`;
3565
+ if (!seen_sigs.has(sig)) {
3566
+ change_lines.push(`[${sig} format] ${meta.author || "Unknown"}`);
3567
+ seen_sigs.add(sig);
3568
+ }
3249
3569
  }
3250
- current_blocks = [];
3251
- current_size = 0;
3252
- current_start = -1;
3253
- };
3254
- for (const [block_text, block_offset] of block_records) {
3255
- const block_size = block_text.length;
3256
- const added_size = block_size + (current_blocks.length > 0 ? 2 : 0);
3257
- if (current_blocks.length > 0 && current_size + added_size > PAGE_TARGET_CHARS) {
3258
- flush_current();
3570
+ const children_map = {};
3571
+ for (const [c_id, data] of Object.entries(comments_map)) {
3572
+ const p_id = data.parent_id;
3573
+ if (p_id) {
3574
+ if (!children_map[p_id]) children_map[p_id] = [];
3575
+ children_map[p_id].push(c_id);
3576
+ }
3259
3577
  }
3260
- if (current_blocks.length === 0 && block_size > PAGE_TARGET_CHARS) {
3261
- pages.push(block_text);
3262
- page_starts.push(block_offset);
3263
- continue;
3578
+ const sorted_ids = Array.from(comments_set).sort();
3579
+ for (const c_id of sorted_ids) {
3580
+ render_comment2(c_id);
3264
3581
  }
3265
- if (current_blocks.length === 0) current_start = block_offset;
3266
- current_blocks.push(block_text);
3267
- current_size += current_size > 0 ? added_size : block_size;
3268
3582
  }
3269
- flush_current();
3270
- if (!pages.length) return [[""], [0]];
3271
- return [pages, page_starts];
3272
- }
3273
- function _count_tracked_changes(page_content) {
3274
- const matches = [...page_content.matchAll(_CHG_ID_PATTERN)];
3275
- const distinct = new Set(matches.map((m) => m[1]));
3276
- return distinct.size;
3583
+ return [...change_lines, ...comment_lines].join("\n");
3277
3584
  }
3278
3585
 
3279
3586
  // src/outline.ts
@@ -3560,16 +3867,524 @@ function _offset_to_page(offset, body_page_offsets) {
3560
3867
  return page;
3561
3868
  }
3562
3869
 
3870
+ // src/sanitize/report.ts
3871
+ var SanitizeReport = class {
3872
+ filename;
3873
+ mode;
3874
+ author;
3875
+ tracked_changes_found = 0;
3876
+ tracked_changes_accepted = 0;
3877
+ tracked_changes_kept = 0;
3878
+ change_lines = [];
3879
+ comments_removed = 0;
3880
+ comments_kept = 0;
3881
+ removed_comment_lines = [];
3882
+ kept_comment_lines = [];
3883
+ metadata_lines = [];
3884
+ structural_lines = [];
3885
+ warnings = [];
3886
+ status = "clean";
3887
+ blocked_reason = null;
3888
+ constructor(filename, mode = "full", author = null) {
3889
+ this.filename = filename;
3890
+ this.mode = mode;
3891
+ this.author = author;
3892
+ }
3893
+ add_transform_lines(lines) {
3894
+ for (const line of lines) {
3895
+ const lower = line.toLowerCase();
3896
+ if (lower.includes("tracked change") || lower.includes("insertion") || lower.includes("deletion") || lower.includes("accepted")) {
3897
+ this.change_lines.push(line);
3898
+ } else if (lower.includes("comment") || lower.includes("[open]") || lower.includes("[resolved]")) {
3899
+ if (lower.includes("kept") || lower.includes("visible")) {
3900
+ this.kept_comment_lines.push(line);
3901
+ } else {
3902
+ this.removed_comment_lines.push(line);
3903
+ }
3904
+ } else if (lower.includes("author") || lower.includes("template") || lower.includes("company") || lower.includes("manager") || lower.includes("metadata") || lower.includes("timestamp") || lower.includes("custom xml") || lower.includes("last modified by") || lower.includes("revision count") || lower.includes("last printed")) {
3905
+ this.metadata_lines.push(line);
3906
+ } else if (lower.includes("hyperlink") || lower.includes("warning")) {
3907
+ this.warnings.push(line);
3908
+ } else {
3909
+ this.structural_lines.push(line);
3910
+ }
3911
+ }
3912
+ }
3913
+ render() {
3914
+ const sep = "\u2550".repeat(50);
3915
+ const lines = [sep, `Finalization Report: ${this.filename}`];
3916
+ const flags = [];
3917
+ if (this.mode === "keep-markup") flags.push("--keep-markup");
3918
+ if (this.author) flags.push(`--author "${this.author}"`);
3919
+ if (this.tracked_changes_accepted > 0) flags.push("--accept-all");
3920
+ if (flags.length > 0) lines.push(flags.join(" "));
3921
+ lines.push(sep);
3922
+ if (this.status === "blocked") {
3923
+ lines.push("");
3924
+ lines.push(`BLOCKED: ${this.blocked_reason}`);
3925
+ lines.push(sep);
3926
+ return lines.join("\n");
3927
+ }
3928
+ if (this.mode === "keep-markup" && (this.tracked_changes_kept > 0 || this.comments_kept > 0)) {
3929
+ lines.push("");
3930
+ lines.push("VISIBLE TO COUNTERPARTY");
3931
+ if (this.tracked_changes_kept > 0) lines.push(` Tracked changes: ${this.tracked_changes_kept}`);
3932
+ if (this.comments_kept > 0) {
3933
+ lines.push(` Open comments: ${this.comments_kept}`);
3934
+ for (const cl of this.kept_comment_lines) lines.push(` ${cl}`);
3935
+ }
3936
+ if (this.author) lines.push(` Author on all markup: "${this.author}"`);
3937
+ }
3938
+ if (this.change_lines.length > 0) {
3939
+ lines.push("");
3940
+ lines.push("TRACKED CHANGES");
3941
+ for (const cl of this.change_lines) lines.push(` ${cl}`);
3942
+ }
3943
+ if (this.removed_comment_lines.length > 0) {
3944
+ lines.push("");
3945
+ lines.push("COMMENTS (stripped)");
3946
+ for (const cl of this.removed_comment_lines) lines.push(` ${cl}`);
3947
+ }
3948
+ if (this.metadata_lines.length > 0) {
3949
+ lines.push("");
3950
+ lines.push("METADATA");
3951
+ for (const ml of this.metadata_lines) lines.push(` ${ml}`);
3952
+ }
3953
+ if (this.structural_lines.length > 0) {
3954
+ lines.push("");
3955
+ lines.push("STRUCTURAL & PROTECTION");
3956
+ for (const sl of this.structural_lines) lines.push(` ${sl}`);
3957
+ }
3958
+ if (this.warnings.length > 0) {
3959
+ lines.push("");
3960
+ lines.push("WARNINGS");
3961
+ for (const w of this.warnings) lines.push(` \u26A0 ${w}`);
3962
+ }
3963
+ lines.push("");
3964
+ lines.push(sep);
3965
+ if (this.warnings.length > 0) {
3966
+ lines.push(`Result: CLEAN WITH WARNINGS (${this.warnings.length} warning${this.warnings.length > 1 ? "s" : ""})`);
3967
+ } else {
3968
+ lines.push("Result: SECURE & READY TO SEND");
3969
+ }
3970
+ lines.push(sep);
3971
+ return lines.join("\n");
3972
+ }
3973
+ };
3974
+
3975
+ // src/sanitize/transforms.ts
3976
+ function findDescendantsByLocalName(element, localName) {
3977
+ const result = [];
3978
+ const all = element.getElementsByTagName("*");
3979
+ for (let i = 0; i < all.length; i++) {
3980
+ const tag = all[i].tagName;
3981
+ if (tag === localName || tag.endsWith(":" + localName)) {
3982
+ result.push(all[i]);
3983
+ }
3984
+ }
3985
+ return result;
3986
+ }
3987
+ function strip_rsid(doc) {
3988
+ let count = 0;
3989
+ const rsidAttrs = ["w:rsidR", "w:rsidRPr", "w:rsidRDefault", "w:rsidP", "w:rsidDel", "w:rsidSect", "w:rsidTr"];
3990
+ const all = doc.element.getElementsByTagName("*");
3991
+ for (let i = 0; i < all.length; i++) {
3992
+ for (const attr of rsidAttrs) {
3993
+ if (all[i].hasAttribute(attr)) {
3994
+ all[i].removeAttribute(attr);
3995
+ count++;
3996
+ }
3997
+ }
3998
+ }
3999
+ const rsidsElements = findAllDescendants(doc.element, "w:rsids");
4000
+ for (const el of rsidsElements) {
4001
+ if (el.parentNode) {
4002
+ el.parentNode.removeChild(el);
4003
+ count++;
4004
+ }
4005
+ }
4006
+ return count ? [`rsid attributes: ${count} removed`] : [];
4007
+ }
4008
+ function strip_para_ids(doc) {
4009
+ let count = 0;
4010
+ const attrs = ["w14:paraId", "w14:textId"];
4011
+ const all = doc.element.getElementsByTagName("*");
4012
+ for (let i = 0; i < all.length; i++) {
4013
+ for (const attr of attrs) {
4014
+ if (all[i].hasAttribute(attr)) {
4015
+ all[i].removeAttribute(attr);
4016
+ count++;
4017
+ }
4018
+ }
4019
+ }
4020
+ return count ? [`Paragraph/text IDs: ${count} removed`] : [];
4021
+ }
4022
+ function strip_proof_errors(doc) {
4023
+ const elements = findAllDescendants(doc.element, "w:proofErr");
4024
+ elements.forEach((el) => el.parentNode?.removeChild(el));
4025
+ return elements.length ? [`Spell check markers: ${elements.length} removed`] : [];
4026
+ }
4027
+ function strip_empty_properties(doc) {
4028
+ let count = 0;
4029
+ for (const tag of ["w:rPr", "w:pPr"]) {
4030
+ const elements = findAllDescendants(doc.element, tag);
4031
+ for (const el of elements) {
4032
+ if (el.childNodes.length === 0 || el.childNodes.length === 1 && el.childNodes[0].nodeType === 3 && !el.childNodes[0].textContent?.trim()) {
4033
+ el.parentNode?.removeChild(el);
4034
+ count++;
4035
+ }
4036
+ }
4037
+ }
4038
+ return count ? [`Empty property elements: ${count} removed`] : [];
4039
+ }
4040
+ function strip_hidden_text(doc) {
4041
+ let count = 0;
4042
+ const elements = findAllDescendants(doc.element, "w:rPr");
4043
+ for (const rPr of elements) {
4044
+ if (findChild(rPr, "w:vanish") || findChild(rPr, "w:webHidden")) {
4045
+ const run = rPr.parentNode;
4046
+ if (run && run.tagName === "w:r" && run.parentNode) {
4047
+ run.parentNode.removeChild(run);
4048
+ count++;
4049
+ }
4050
+ }
4051
+ }
4052
+ return count ? [`Hidden text runs: ${count} removed`] : [];
4053
+ }
4054
+ function count_tracked_changes(doc) {
4055
+ const ins = findAllDescendants(doc.element, "w:ins").length;
4056
+ const del = findAllDescendants(doc.element, "w:del").length;
4057
+ const fmt = findAllDescendants(doc.element, "w:rPrChange").length + findAllDescendants(doc.element, "w:pPrChange").length + findAllDescendants(doc.element, "w:sectPrChange").length;
4058
+ return [ins, del, fmt];
4059
+ }
4060
+ function get_track_change_authors(doc) {
4061
+ const authors = /* @__PURE__ */ new Set();
4062
+ for (const tag of ["w:ins", "w:del", "w:rPrChange", "w:pPrChange", "w:sectPrChange"]) {
4063
+ for (const el of findAllDescendants(doc.element, tag)) {
4064
+ const author = el.getAttribute("w:author");
4065
+ if (author) authors.add(author);
4066
+ }
4067
+ }
4068
+ return authors;
4069
+ }
4070
+ function _getElementText(el) {
4071
+ const texts = [];
4072
+ const ts = findAllDescendants(el, "w:t");
4073
+ for (const t of ts) if (t.textContent) texts.push(t.textContent);
4074
+ const dts = findAllDescendants(el, "w:delText");
4075
+ for (const dt of dts) if (dt.textContent) texts.push(dt.textContent);
4076
+ return texts.join("");
4077
+ }
4078
+ function _truncate(text, maxLen = 60) {
4079
+ const clean = text.replace(/\n/g, " ").trim();
4080
+ if (clean.length <= maxLen) return clean;
4081
+ return clean.substring(0, maxLen - 3) + "...";
4082
+ }
4083
+ function accept_all_tracked_changes(doc) {
4084
+ const lines = [];
4085
+ const insEls = findAllDescendants(doc.element, "w:ins");
4086
+ const delEls = findAllDescendants(doc.element, "w:del");
4087
+ for (const ins of insEls) {
4088
+ const text = _getElementText(ins).trim();
4089
+ if (text) lines.push(` Accepted insertion: "${_truncate(text, 60)}"`);
4090
+ }
4091
+ for (const del of delEls) {
4092
+ const text = _getElementText(del).trim();
4093
+ if (text) lines.push(` Accepted deletion of: "${_truncate(text, 60)}"`);
4094
+ }
4095
+ const engine = new RedlineEngine(doc);
4096
+ engine.accept_all_revisions();
4097
+ for (const tag of ["w:rPrChange", "w:pPrChange", "w:sectPrChange"]) {
4098
+ for (const el of findAllDescendants(doc.element, tag)) {
4099
+ el.parentNode?.removeChild(el);
4100
+ }
4101
+ }
4102
+ const total = insEls.length + delEls.length;
4103
+ if (total) {
4104
+ return [`Tracked changes auto-accepted: ${total}`].concat(lines);
4105
+ }
4106
+ return [];
4107
+ }
4108
+ function get_comments_summary(doc) {
4109
+ const data = extract_comments_data(doc.pkg);
4110
+ const comments = [];
4111
+ let openCount = 0;
4112
+ let resolvedCount = 0;
4113
+ for (const [cId, info] of Object.entries(data)) {
4114
+ if (info.resolved) resolvedCount++;
4115
+ else openCount++;
4116
+ comments.push({ id: cId, ...info });
4117
+ }
4118
+ return { total: comments.length, open: openCount, resolved: resolvedCount, comments };
4119
+ }
4120
+ function remove_all_comments(doc) {
4121
+ const data = extract_comments_data(doc.pkg);
4122
+ const keys = Object.keys(data);
4123
+ if (keys.length === 0) return [];
4124
+ const lines = [];
4125
+ const cm = new CommentsManager(doc);
4126
+ for (const [cId, info] of Object.entries(data)) {
4127
+ const status = info.resolved ? "[Resolved]" : "[Open]";
4128
+ lines.push(` ${status} "${_truncate(info.text || "", 60)}" (${info.author || "Unknown"})`);
4129
+ cm.deleteComment(cId);
4130
+ }
4131
+ for (const tag of ["w:commentRangeStart", "w:commentRangeEnd", "w:commentReference"]) {
4132
+ for (const el of findAllDescendants(doc.element, tag)) {
4133
+ el.parentNode?.removeChild(el);
4134
+ }
4135
+ }
4136
+ const resolvedCount = Object.values(data).filter((c) => c.resolved).length;
4137
+ const openCount = Object.values(data).filter((c) => !c.resolved).length;
4138
+ return [`Comments removed: ${keys.length} (${resolvedCount} resolved, ${openCount} open)`].concat(lines);
4139
+ }
4140
+ function replace_comment_authors(doc, newAuthor) {
4141
+ const cm = new CommentsManager(doc);
4142
+ if (!cm.commentsPart) return [];
4143
+ const original = /* @__PURE__ */ new Set();
4144
+ const comments = findAllDescendants(cm.commentsPart._element, "w:comment");
4145
+ for (const c of comments) {
4146
+ const author = c.getAttribute("w:author");
4147
+ if (author) {
4148
+ original.add(author);
4149
+ c.setAttribute("w:author", newAuthor);
4150
+ }
4151
+ if (c.hasAttribute("w:initials")) {
4152
+ const initials = newAuthor.split(" ").filter(Boolean).map((p) => p[0]).join("").toUpperCase();
4153
+ c.setAttribute("w:initials", initials);
4154
+ }
4155
+ }
4156
+ return original.size ? [`Comment authors replaced: ${Array.from(original).sort().join(", ")} \u2192 "${newAuthor}"`] : [];
4157
+ }
4158
+ function replace_change_authors(doc, newAuthor) {
4159
+ const original = /* @__PURE__ */ new Set();
4160
+ for (const tag of ["w:ins", "w:del", "w:rPrChange", "w:pPrChange"]) {
4161
+ for (const el of findAllDescendants(doc.element, tag)) {
4162
+ const author = el.getAttribute("w:author");
4163
+ if (author) {
4164
+ original.add(author);
4165
+ el.setAttribute("w:author", newAuthor);
4166
+ }
4167
+ }
4168
+ }
4169
+ return original.size ? [`Track change authors replaced: ${Array.from(original).sort().join(", ")} \u2192 "${newAuthor}"`] : [];
4170
+ }
4171
+ function normalize_change_dates(doc) {
4172
+ let count = 0;
4173
+ const fixed = "2025-01-01T00:00:00Z";
4174
+ for (const tag of ["w:ins", "w:del", "w:rPrChange", "w:pPrChange"]) {
4175
+ for (const el of findAllDescendants(doc.element, tag)) {
4176
+ if (el.hasAttribute("w:date")) {
4177
+ el.setAttribute("w:date", fixed);
4178
+ count++;
4179
+ }
4180
+ }
4181
+ }
4182
+ return count ? [`Track change timestamps: ${count} normalized`] : [];
4183
+ }
4184
+ function scrub_doc_properties(doc) {
4185
+ const lines = [];
4186
+ const corePart = doc.pkg.getPartByPath("docProps/core.xml");
4187
+ if (corePart) {
4188
+ const creators = findDescendantsByLocalName(corePart._element, "creator");
4189
+ creators.forEach((c) => {
4190
+ if (c.textContent) {
4191
+ lines.push(`Author: ${c.textContent}`);
4192
+ c.textContent = "";
4193
+ }
4194
+ });
4195
+ const modifiers = findDescendantsByLocalName(corePart._element, "lastModifiedBy");
4196
+ modifiers.forEach((c) => {
4197
+ if (c.textContent) {
4198
+ lines.push(`Last modified by: ${c.textContent}`);
4199
+ c.textContent = "";
4200
+ }
4201
+ });
4202
+ const revisions = findDescendantsByLocalName(corePart._element, "revision");
4203
+ revisions.forEach((c) => {
4204
+ if (c.textContent && parseInt(c.textContent) > 1) {
4205
+ lines.push(`Revision count: ${c.textContent} \u2192 1`);
4206
+ c.textContent = "1";
4207
+ }
4208
+ });
4209
+ }
4210
+ const appPart = doc.pkg.getPartByPath("docProps/app.xml");
4211
+ if (appPart) {
4212
+ const docEl = appPart._element;
4213
+ const intFields = ["TotalTime", "Words", "Characters", "Paragraphs", "Lines", "CharactersWithSpaces"];
4214
+ for (const f of intFields) {
4215
+ findDescendantsByLocalName(docEl, f).forEach((el) => {
4216
+ if (el.textContent && el.textContent !== "0") {
4217
+ if (f === "TotalTime") lines.push(`Total editing time: ${el.textContent} minutes`);
4218
+ el.textContent = "0";
4219
+ }
4220
+ });
4221
+ }
4222
+ const strFields = ["Template", "Manager", "Company"];
4223
+ for (const f of strFields) {
4224
+ findDescendantsByLocalName(docEl, f).forEach((el) => {
4225
+ if (el.textContent) {
4226
+ lines.push(`${f}: ${el.textContent}`);
4227
+ el.textContent = "";
4228
+ }
4229
+ });
4230
+ }
4231
+ }
4232
+ return lines.length ? ["Metadata scrubbed:", ...lines.map((l) => ` ${l}`)] : [];
4233
+ }
4234
+ function scrub_timestamps(doc) {
4235
+ let modified = false;
4236
+ const epoch = "1970-01-01T00:00:00Z";
4237
+ const corePart = doc.pkg.getPartByPath("docProps/core.xml");
4238
+ if (corePart) {
4239
+ for (const tag of ["created", "modified", "lastPrinted"]) {
4240
+ findDescendantsByLocalName(corePart._element, tag).forEach((el) => {
4241
+ if (el.textContent && el.textContent !== epoch) {
4242
+ el.textContent = epoch;
4243
+ modified = true;
4244
+ }
4245
+ });
4246
+ }
4247
+ }
4248
+ return modified ? ["Timestamps normalized to epoch"] : [];
4249
+ }
4250
+ function strip_custom_xml(doc) {
4251
+ const customParts = doc.pkg.parts.filter((p) => p.partname.includes("/customXml"));
4252
+ if (customParts.length === 0) return [];
4253
+ const partnames = new Set(customParts.map((p) => p.partname));
4254
+ doc.pkg.parts = doc.pkg.parts.filter((p) => !partnames.has(p.partname));
4255
+ const removeRelationsTo = (relsPart) => {
4256
+ const toRemove = [];
4257
+ for (const rel of findAllDescendants(relsPart._element, "Relationship")) {
4258
+ const target = rel.getAttribute("Target");
4259
+ if (target && target.includes("customXml")) toRemove.push(rel);
4260
+ }
4261
+ toRemove.forEach((r) => r.parentNode?.removeChild(r));
4262
+ };
4263
+ const rootRels = doc.pkg.getPartByPath("_rels/.rels");
4264
+ if (rootRels) removeRelationsTo(rootRels);
4265
+ const docRels = doc.pkg.getOrCreateRelsPart(doc.part.partname);
4266
+ if (docRels) removeRelationsTo(docRels);
4267
+ for (const sdtPr of findAllDescendants(doc.element, "w:sdtPr")) {
4268
+ findChildren(sdtPr, "w:dataBinding").forEach((b) => sdtPr.removeChild(b));
4269
+ }
4270
+ return [`Custom XML parts: ${customParts.length} removed`];
4271
+ }
4272
+ function strip_image_alt_text(doc) {
4273
+ let count = 0;
4274
+ for (const docPr of findDescendantsByLocalName(doc.element, "docPr")) {
4275
+ const descr = docPr.getAttribute("descr");
4276
+ if (descr) {
4277
+ const isShort = descr.length < 10;
4278
+ const isFile = descr.includes(".") && descr.length < 60;
4279
+ if (isShort || isFile) {
4280
+ docPr.removeAttribute("descr");
4281
+ count++;
4282
+ }
4283
+ }
4284
+ }
4285
+ return count ? [`Image alt text: ${count} auto-generated descriptions removed`] : [];
4286
+ }
4287
+ function audit_hyperlinks(doc) {
4288
+ const internal = ["sharepoint.com", "onedrive.com", ".internal", "intranet", "localhost", "10.", "192.168.", "172.16."];
4289
+ const warnings = [];
4290
+ const docRels = doc.pkg.getOrCreateRelsPart(doc.part.partname);
4291
+ for (const rel of findAllDescendants(docRels._element, "Relationship")) {
4292
+ if (rel.getAttribute("TargetMode") === "External") {
4293
+ const url = rel.getAttribute("Target") || "";
4294
+ for (const pattern of internal) {
4295
+ if (url.toLowerCase().includes(pattern.toLowerCase())) {
4296
+ warnings.push(`Hyperlink targets internal URL: ${_truncate(url, 80)}`);
4297
+ break;
4298
+ }
4299
+ }
4300
+ }
4301
+ }
4302
+ return warnings;
4303
+ }
4304
+
4305
+ // src/sanitize/core.ts
4306
+ async function finalize_document(doc, options) {
4307
+ const report = new SanitizeReport(options.filename, options.sanitize_mode || "full", options.author || null);
4308
+ if (options.sanitize_mode === "full") {
4309
+ const counts = count_tracked_changes(doc);
4310
+ const total = counts[0] + counts[1] + counts[2];
4311
+ report.tracked_changes_found = total;
4312
+ if (total > 0 && !options.accept_all) {
4313
+ report.status = "blocked";
4314
+ report.blocked_reason = `Document contains ${total} unresolved tracked changes (${counts[0]} insertions, ${counts[1]} deletions, ${counts[2]} formatting). Review in Word first, or set accept_all=true.`;
4315
+ return { reportText: report.render() };
4316
+ }
4317
+ if (total > 0) {
4318
+ const authors = get_track_change_authors(doc);
4319
+ if (authors.size > 1) {
4320
+ report.warnings.push(`Multiple authors detected in tracked changes: ${Array.from(authors).sort().join(", ")}. Review per-change list before sending.`);
4321
+ }
4322
+ report.add_transform_lines(accept_all_tracked_changes(doc));
4323
+ report.tracked_changes_accepted = total;
4324
+ }
4325
+ const commentsSummary = get_comments_summary(doc);
4326
+ report.comments_removed = commentsSummary.total;
4327
+ report.add_transform_lines(remove_all_comments(doc));
4328
+ } else if (options.sanitize_mode === "keep-markup") {
4329
+ const counts = count_tracked_changes(doc);
4330
+ report.tracked_changes_found = counts[0] + counts[1] + counts[2];
4331
+ report.tracked_changes_kept = report.tracked_changes_found;
4332
+ if (options.author) {
4333
+ report.add_transform_lines(replace_comment_authors(doc, options.author));
4334
+ report.add_transform_lines(replace_change_authors(doc, options.author));
4335
+ }
4336
+ }
4337
+ report.add_transform_lines(strip_rsid(doc));
4338
+ report.add_transform_lines(strip_para_ids(doc));
4339
+ report.add_transform_lines(strip_proof_errors(doc));
4340
+ report.add_transform_lines(strip_empty_properties(doc));
4341
+ report.add_transform_lines(strip_hidden_text(doc));
4342
+ report.add_transform_lines(scrub_doc_properties(doc));
4343
+ report.add_transform_lines(scrub_timestamps(doc));
4344
+ report.add_transform_lines(strip_custom_xml(doc));
4345
+ report.add_transform_lines(strip_image_alt_text(doc));
4346
+ const warnings = audit_hyperlinks(doc);
4347
+ for (const w of warnings) report.warnings.push(w);
4348
+ report.add_transform_lines(normalize_change_dates(doc));
4349
+ if (options.protection_mode === "read_only" || options.protection_mode === "encrypt") {
4350
+ if (options.protection_mode === "encrypt") {
4351
+ report.warnings.push("Encryption mode (AES compound wrappers) is strictly unsupported in the zero-dependency Node engine. Falling back to native Word Read-Only lock.");
4352
+ }
4353
+ const settingsPart = doc.pkg.getPartByPath("word/settings.xml");
4354
+ if (settingsPart) {
4355
+ const docEl = settingsPart._element.ownerDocument;
4356
+ let prot = findDescendantsByLocalName(settingsPart._element, "documentProtection")[0];
4357
+ if (!prot) {
4358
+ prot = docEl.createElement("w:documentProtection");
4359
+ settingsPart._element.appendChild(prot);
4360
+ }
4361
+ prot.setAttribute("w:edit", "readOnly");
4362
+ prot.setAttribute("w:enforcement", "1");
4363
+ report.structural_lines.push("Document locked (Read-Only enforcement injected into settings.xml)");
4364
+ }
4365
+ }
4366
+ if (options.export_pdf) {
4367
+ report.warnings.push("PDF export requires the Python/Word COM environment and is skipped in this zero-dependency Node agent.");
4368
+ }
4369
+ if (report.warnings.length > 0) report.status = "clean_with_warnings";
4370
+ const outBuffer = await doc.save();
4371
+ return { reportText: report.render(), outBuffer };
4372
+ }
4373
+
3563
4374
  // src/index.ts
3564
- var identifyEngine = () => "adeu-core-node";
4375
+ function identifyEngine() {
4376
+ return "adeu-core-node";
4377
+ }
3565
4378
  export {
3566
4379
  BatchValidationError,
3567
4380
  DocumentMapper,
3568
4381
  DocumentObject,
3569
4382
  RedlineEngine,
3570
4383
  apply_edits_to_markdown,
4384
+ create_unified_diff,
3571
4385
  extractTextFromBuffer,
3572
4386
  extract_outline,
4387
+ finalize_document,
3573
4388
  generate_edits_from_text,
3574
4389
  identifyEngine,
3575
4390
  paginate,