@adeu/core 1.10.0 → 1.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -34,6 +34,7 @@ __export(index_exports, {
34
34
  DocumentMapper: () => DocumentMapper,
35
35
  DocumentObject: () => DocumentObject,
36
36
  RedlineEngine: () => RedlineEngine,
37
+ _extractTextFromDoc: () => _extractTextFromDoc,
37
38
  apply_edits_to_markdown: () => apply_edits_to_markdown,
38
39
  create_unified_diff: () => create_unified_diff,
39
40
  create_word_patch_diff: () => create_word_patch_diff,
@@ -1266,14 +1267,16 @@ function* iter_paragraph_content(paragraph) {
1266
1267
  var DocumentMapper = class {
1267
1268
  doc;
1268
1269
  clean_view;
1270
+ original_view;
1269
1271
  comments_map;
1270
1272
  full_text = "";
1271
1273
  spans = [];
1272
1274
  appendix_start_index = -1;
1273
1275
  _text_chunks = [];
1274
- constructor(doc, clean_view = false) {
1276
+ constructor(doc, clean_view = false, original_view = false) {
1275
1277
  this.doc = doc;
1276
1278
  this.clean_view = clean_view;
1279
+ this.original_view = original_view;
1277
1280
  this.comments_map = extract_comments_data(doc.pkg);
1278
1281
  this._build_map();
1279
1282
  }
@@ -1355,14 +1358,15 @@ ${header}`;
1355
1358
  const ins = trPr ? findChild(trPr, "w:ins") : null;
1356
1359
  const del_node = trPr ? findChild(trPr, "w:del") : null;
1357
1360
  if (this.clean_view && del_node) continue;
1361
+ if (this.original_view && ins) continue;
1358
1362
  if (rows_processed > 0) {
1359
1363
  this._add_virtual_text("\n", current, null);
1360
1364
  current += 1;
1361
1365
  }
1362
- if (ins && !this.clean_view) {
1366
+ if (ins && !this.clean_view && !this.original_view) {
1363
1367
  this._add_virtual_text("{++ ", current, null);
1364
1368
  current += 4;
1365
- } else if (del_node && !this.clean_view) {
1369
+ } else if (del_node && !this.clean_view && !this.original_view) {
1366
1370
  this._add_virtual_text("{-- ", current, null);
1367
1371
  current += 4;
1368
1372
  }
@@ -1378,11 +1382,11 @@ ${header}`;
1378
1382
  current = this._map_blocks(cell, current);
1379
1383
  cells_processed += 1;
1380
1384
  }
1381
- if (ins && !this.clean_view) {
1385
+ if (ins && !this.clean_view && !this.original_view) {
1382
1386
  const suffix = ` |Chg:${ins.getAttribute("w:id")}++}`;
1383
1387
  this._add_virtual_text(suffix, current, null);
1384
1388
  current += suffix.length;
1385
- } else if (del_node && !this.clean_view) {
1389
+ } else if (del_node && !this.clean_view && !this.original_view) {
1386
1390
  const suffix = ` |Chg:${del_node.getAttribute("w:id")}--}`;
1387
1391
  this._add_virtual_text(suffix, current, null);
1388
1392
  current += suffix.length;
@@ -1476,11 +1480,13 @@ ${header}`;
1476
1480
  }
1477
1481
  if (this.clean_view && Object.keys(active_del).length > 0) {
1478
1482
  }
1483
+ if (this.original_view && Object.keys(active_ins).length > 0) {
1484
+ }
1479
1485
  const full_seg_text = run_parts.map((x) => x[1]).join("");
1480
1486
  const curr_ins_id = Object.keys(active_ins).pop() || null;
1481
1487
  const curr_del_id = Object.keys(active_del).pop() || null;
1482
- if (full_seg_text && !(this.clean_view && curr_del_id)) {
1483
- const new_wrappers = this.clean_view ? ["", ""] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
1488
+ if (full_seg_text && !(this.clean_view && curr_del_id) && !(this.original_view && curr_ins_id)) {
1489
+ const new_wrappers = this.clean_view || this.original_view ? ["", ""] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
1484
1490
  const new_style = [prefix, suffix];
1485
1491
  if (pending_runs.length > 0 && new_wrappers[0] === current_wrappers[0] && new_wrappers[1] === current_wrappers[1]) {
1486
1492
  let skip_leading_prefix = false;
@@ -1505,7 +1511,7 @@ ${header}`;
1505
1511
  }
1506
1512
  }
1507
1513
  }
1508
- if (!this.clean_view) {
1514
+ if (!this.clean_view && !this.original_view) {
1509
1515
  const has_meta = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || active_ids.size > 0 || Object.keys(active_fmt).length > 0;
1510
1516
  if (has_meta) {
1511
1517
  deferred_meta_states.push([{ ...active_ins }, { ...active_del }, new Set(active_ids), { ...active_fmt }]);
@@ -2681,6 +2687,7 @@ var RedlineEngine = class {
2681
2687
  mapper;
2682
2688
  comments_manager;
2683
2689
  clean_mapper = null;
2690
+ original_mapper = null;
2684
2691
  skipped_details = [];
2685
2692
  constructor(doc, author = "Adeu AI (TS)") {
2686
2693
  this.doc = doc;
@@ -2720,7 +2727,10 @@ var RedlineEngine = class {
2720
2727
  if (!full_text) return [null, null];
2721
2728
  const before_start = Math.max(0, start_idx - 30);
2722
2729
  const context_before = full_text.substring(before_start, start_idx);
2723
- const context_after = full_text.substring(start_idx + length, start_idx + length + 30);
2730
+ const context_after = full_text.substring(
2731
+ start_idx + length,
2732
+ start_idx + length + 30
2733
+ );
2724
2734
  const critic_markup = `${context_before}{--${target_text}--}{++${new_text}++}${context_after}`;
2725
2735
  let clean_text = critic_markup;
2726
2736
  clean_text = clean_text.replace(/\{>>.*?<<\}/gs, "");
@@ -2892,7 +2902,9 @@ var RedlineEngine = class {
2892
2902
  overrideEl.parentNode?.removeChild(overrideEl);
2893
2903
  }
2894
2904
  }
2895
- pkg.parts = pkg.parts.filter((p) => !p.partname.toLowerCase().includes("comments"));
2905
+ pkg.parts = pkg.parts.filter(
2906
+ (p) => !p.partname.toLowerCase().includes("comments")
2907
+ );
2896
2908
  for (const key of Object.keys(pkg.unzipped)) {
2897
2909
  if (key.toLowerCase().includes("comments")) {
2898
2910
  delete pkg.unzipped[key];
@@ -3437,13 +3449,17 @@ var RedlineEngine = class {
3437
3449
  if (!edit.target_text) continue;
3438
3450
  let matches = this.mapper.find_all_match_indices(edit.target_text);
3439
3451
  let activeText = this.mapper.full_text;
3452
+ let target_mapper = this.mapper;
3440
3453
  if (matches.length === 0) {
3441
3454
  if (!this.clean_mapper)
3442
3455
  this.clean_mapper = new DocumentMapper(this.doc, true);
3443
3456
  matches = this.clean_mapper.find_all_match_indices(edit.target_text);
3444
- if (matches.length > 0) activeText = this.clean_mapper.full_text;
3457
+ if (matches.length > 0) {
3458
+ activeText = this.clean_mapper.full_text;
3459
+ target_mapper = this.clean_mapper;
3460
+ }
3445
3461
  }
3446
- if (activeText === this.mapper.full_text && matches.length > 1) {
3462
+ if (activeText === this.mapper.full_text && matches.length > 0) {
3447
3463
  const liveMatches = matches.filter(([start, length]) => {
3448
3464
  const realSpans = this.mapper.spans.filter(
3449
3465
  (s) => s.run !== null && s.end > start && s.start < start + length
@@ -3451,13 +3467,51 @@ var RedlineEngine = class {
3451
3467
  if (realSpans.length === 0) return true;
3452
3468
  return realSpans.some((s) => !s.del_id);
3453
3469
  });
3454
- if (liveMatches.length > 0) matches = liveMatches;
3470
+ matches = liveMatches;
3455
3471
  }
3472
+ let is_deleted_text = false;
3473
+ const deleted_authors = /* @__PURE__ */ new Set();
3456
3474
  if (matches.length === 0) {
3457
- errors.push(
3458
- `- Edit ${i + 1} Failed: Target text not found in document:
3475
+ if (!this.original_mapper) {
3476
+ this.original_mapper = new DocumentMapper(this.doc, false, true);
3477
+ }
3478
+ const orig_matches = this.original_mapper.find_all_match_indices(edit.target_text);
3479
+ if (orig_matches.length > 0) {
3480
+ is_deleted_text = true;
3481
+ for (const [start, length] of orig_matches) {
3482
+ const spans = this.original_mapper.spans.filter(
3483
+ (s) => s.end > start && s.start < start + length
3484
+ );
3485
+ for (const s of spans) {
3486
+ if (s.run !== null) {
3487
+ let parent = s.run._element;
3488
+ while (parent) {
3489
+ if (parent.nodeType === 1 && parent.tagName === "w:del") {
3490
+ const auth = parent.getAttribute("w:author");
3491
+ if (auth) {
3492
+ deleted_authors.add(auth);
3493
+ }
3494
+ break;
3495
+ }
3496
+ parent = parent.parentNode;
3497
+ }
3498
+ }
3499
+ }
3500
+ }
3501
+ }
3502
+ }
3503
+ if (matches.length === 0) {
3504
+ if (is_deleted_text) {
3505
+ const author_phrase = deleted_authors.size > 0 ? `by ${Array.from(deleted_authors).sort().join(", ")}` : "by an existing revision";
3506
+ errors.push(
3507
+ `- Edit ${i + 1} Failed: Target text matches text inside a tracked deletion ${author_phrase}. Reject/accept that change first or target the active replacement text instead.`
3508
+ );
3509
+ } else {
3510
+ errors.push(
3511
+ `- Edit ${i + 1} Failed: Target text not found in document:
3459
3512
  "${edit.target_text}"`
3460
- );
3513
+ );
3514
+ }
3461
3515
  } else if (matches.length > 1) {
3462
3516
  const positions = matches.map(([start, length]) => [
3463
3517
  start,
@@ -3478,7 +3532,10 @@ var RedlineEngine = class {
3478
3532
  const [pfx, sfx] = trim_common_context(matched, edit.new_text || "");
3479
3533
  const t_end = matched.length - sfx;
3480
3534
  const final_target = matched.substring(pfx, t_end);
3481
- const final_new = (edit.new_text || "").substring(pfx, (edit.new_text || "").length - sfx);
3535
+ const final_new = (edit.new_text || "").substring(
3536
+ pfx,
3537
+ (edit.new_text || "").length - sfx
3538
+ );
3482
3539
  if (final_target.includes("\n\n")) {
3483
3540
  if (final_new.includes("\n\n")) {
3484
3541
  const parts = matched.split("\n\n");
@@ -3634,7 +3691,9 @@ var RedlineEngine = class {
3634
3691
  if (dry_run_mode) {
3635
3692
  for (const edit of edits) {
3636
3693
  const single_errors = this.validate_edits([edit]);
3637
- const warning = this._check_punctuation_warning(edit.target_text || "");
3694
+ const warning = this._check_punctuation_warning(
3695
+ edit.target_text || ""
3696
+ );
3638
3697
  if (single_errors.length > 0) {
3639
3698
  skipped_edits++;
3640
3699
  edits_reports.push({
@@ -3688,7 +3747,9 @@ var RedlineEngine = class {
3688
3747
  for (const edit of cloned_edits) {
3689
3748
  const success = edit._applied_status || false;
3690
3749
  const error_msg = edit._error_msg || null;
3691
- const warning = this._check_punctuation_warning(edit.target_text || "");
3750
+ const warning = this._check_punctuation_warning(
3751
+ edit.target_text || ""
3752
+ );
3692
3753
  let critic_markup = null;
3693
3754
  let clean_text = null;
3694
3755
  if (success) {
@@ -3716,7 +3777,7 @@ var RedlineEngine = class {
3716
3777
  skipped_details: this.skipped_details,
3717
3778
  edits: edits_reports,
3718
3779
  engine: "node",
3719
- version: "1.9.0"
3780
+ version: "1.10.0"
3720
3781
  };
3721
3782
  }
3722
3783
  apply_edits(edits) {
@@ -4239,7 +4300,10 @@ var RedlineEngine = class {
4239
4300
  if (result.first_node.tagName === "w:p") {
4240
4301
  first_anchor_target = findAllDescendants(result.first_node, "w:ins")[0] || result.first_node;
4241
4302
  }
4242
- const anchor = ascend_to_paragraph_child(first_anchor_target, host_p);
4303
+ const anchor = ascend_to_paragraph_child(
4304
+ first_anchor_target,
4305
+ host_p
4306
+ );
4243
4307
  this._attach_comment(host_p, anchor, anchor, edit.comment);
4244
4308
  }
4245
4309
  }
@@ -4251,7 +4315,10 @@ var RedlineEngine = class {
4251
4315
  length,
4252
4316
  rebuild_map
4253
4317
  );
4254
- const virtual_spans = active_mapper.get_virtual_spans_in_range(start_idx, length);
4318
+ const virtual_spans = active_mapper.get_virtual_spans_in_range(
4319
+ start_idx,
4320
+ length
4321
+ );
4255
4322
  if (target_runs.length === 0 && virtual_spans.length === 0) return false;
4256
4323
  const affected_ps = /* @__PURE__ */ new Set();
4257
4324
  for (const run of target_runs) {
@@ -4334,7 +4401,10 @@ var RedlineEngine = class {
4334
4401
  let pPr = findChild(p1_element, "w:pPr");
4335
4402
  if (!pPr) {
4336
4403
  pPr = p1_element.ownerDocument.createElement("w:pPr");
4337
- p1_element.insertBefore(pPr, p1_element.firstChild);
4404
+ p1_element.insertBefore(
4405
+ pPr,
4406
+ p1_element.firstChild
4407
+ );
4338
4408
  }
4339
4409
  let rPr = findChild(pPr, "w:rPr");
4340
4410
  if (!rPr) {
@@ -4863,13 +4933,14 @@ async function extractTextFromBuffer(buffer, cleanView = false) {
4863
4933
  const doc = await DocumentObject.load(buffer);
4864
4934
  return _extractTextFromDoc(doc, cleanView);
4865
4935
  }
4866
- function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
4936
+ function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true, return_paragraph_offsets = false) {
4867
4937
  const comments_map = extract_comments_data(doc.pkg);
4868
4938
  const full_text = [];
4939
+ const paragraph_offsets = /* @__PURE__ */ new Map();
4869
4940
  let cursor = 0;
4870
4941
  for (const part of iter_document_parts(doc)) {
4871
4942
  const part_cursor = full_text.length > 0 ? cursor + 2 : cursor;
4872
- const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor);
4943
+ const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor, return_paragraph_offsets ? paragraph_offsets : void 0);
4873
4944
  if (part_text) {
4874
4945
  if (full_text.length > 0) cursor += 2;
4875
4946
  full_text.push(part_text);
@@ -4881,9 +4952,12 @@ function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
4881
4952
  const appendix = build_structural_appendix(doc, base_text);
4882
4953
  if (appendix) base_text += appendix;
4883
4954
  }
4955
+ if (return_paragraph_offsets) {
4956
+ return { text: base_text, paragraph_offsets };
4957
+ }
4884
4958
  return base_text;
4885
4959
  }
4886
- function _extract_blocks(container, comments_map, cleanView, cursor) {
4960
+ function _extract_blocks(container, comments_map, cleanView, cursor, paragraph_offsets) {
4887
4961
  const part = container.part || container;
4888
4962
  const [style_cache, default_pstyle] = _get_style_cache(part);
4889
4963
  const blocks = [];
@@ -4902,7 +4976,7 @@ ${header}`;
4902
4976
  if (!is_first_block) local_cursor += 2;
4903
4977
  const block_start = local_cursor;
4904
4978
  if (item.constructor.name === "FootnoteItem") {
4905
- const fn_text = _extract_blocks(item, comments_map, cleanView, block_start);
4979
+ const fn_text = _extract_blocks(item, comments_map, cleanView, block_start, paragraph_offsets);
4906
4980
  if (fn_text) {
4907
4981
  blocks.push(fn_text);
4908
4982
  local_cursor = block_start + fn_text.length;
@@ -4918,11 +4992,14 @@ ${header}`;
4918
4992
  const p_text = build_paragraph_text(item, comments_map, cleanView, style_cache, default_pstyle);
4919
4993
  const full_block = prefix + p_text;
4920
4994
  blocks.push(full_block);
4995
+ if (paragraph_offsets) {
4996
+ paragraph_offsets.set(item._element, [block_start, full_block.length]);
4997
+ }
4921
4998
  local_cursor = block_start + full_block.length;
4922
4999
  is_first_para = false;
4923
5000
  is_first_block = false;
4924
5001
  } else if (item instanceof Table) {
4925
- const table_text = extract_table(item, comments_map, cleanView, block_start);
5002
+ const table_text = extract_table(item, comments_map, cleanView, block_start, paragraph_offsets);
4926
5003
  if (table_text) {
4927
5004
  blocks.push(table_text);
4928
5005
  local_cursor = block_start + table_text.length;
@@ -4935,7 +5012,7 @@ ${header}`;
4935
5012
  }
4936
5013
  return blocks.join("\n\n");
4937
5014
  }
4938
- function extract_table(table, comments_map, cleanView, cursor) {
5015
+ function extract_table(table, comments_map, cleanView, cursor, paragraph_offsets) {
4939
5016
  const rows_text = [];
4940
5017
  let rows_processed = 0;
4941
5018
  let local_cursor = cursor;
@@ -4954,7 +5031,7 @@ function extract_table(table, comments_map, cleanView, cursor) {
4954
5031
  if (seen_cells.has(cell)) continue;
4955
5032
  seen_cells.add(cell);
4956
5033
  if (!first_cell) cell_cursor += 3;
4957
- const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor);
5034
+ const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor, paragraph_offsets);
4958
5035
  cell_texts.push(cell_content);
4959
5036
  cell_cursor += cell_content.length;
4960
5037
  first_cell = false;
@@ -5211,6 +5288,9 @@ function extract_outline(doc, projected_body, body_pages, body_page_offsets, par
5211
5288
  if (body_pages.length !== body_page_offsets.length) {
5212
5289
  throw new Error("body_pages and body_page_offsets length mismatch");
5213
5290
  }
5291
+ if (paragraph_offsets) {
5292
+ return _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets);
5293
+ }
5214
5294
  const comments_map = extract_comments_data(doc.pkg);
5215
5295
  const block_records = _walk_doc_body(doc, comments_map);
5216
5296
  const heading_indices = [];
@@ -5486,6 +5566,7 @@ function _determine_heading_style(paragraph) {
5486
5566
  if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
5487
5567
  }
5488
5568
  let outline_level = null;
5569
+ let outline_level_from_style = false;
5489
5570
  if (pPr) {
5490
5571
  const oLvl = findChild(pPr, "w:outlineLvl");
5491
5572
  if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
@@ -5494,6 +5575,7 @@ function _determine_heading_style(paragraph) {
5494
5575
  }
5495
5576
  if (outline_level === null && style_id && style_cache && style_cache[style_id]) {
5496
5577
  outline_level = style_cache[style_id].outline_level;
5578
+ outline_level_from_style = true;
5497
5579
  }
5498
5580
  const style_name = style_id && style_cache && style_cache[style_id] ? style_cache[style_id].name : style_id;
5499
5581
  let normalized_style_name = style_name;
@@ -5504,6 +5586,12 @@ function _determine_heading_style(paragraph) {
5504
5586
  normalized_style_name = "Title";
5505
5587
  }
5506
5588
  }
5589
+ if (outline_level_from_style && outline_level !== null) {
5590
+ const is_heading_or_title = normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title");
5591
+ if (!is_heading_or_title) {
5592
+ outline_level = null;
5593
+ }
5594
+ }
5507
5595
  if (outline_level !== null && outline_level >= 0 && outline_level <= 8) {
5508
5596
  if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")) {
5509
5597
  return normalized_style_name;
@@ -5552,6 +5640,146 @@ function _offset_to_page(offset, body_page_offsets) {
5552
5640
  }
5553
5641
  return page;
5554
5642
  }
5643
+ function _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets) {
5644
+ const paragraphs_and_tables = [];
5645
+ const seen_cells = /* @__PURE__ */ new Set();
5646
+ function walk(container) {
5647
+ for (const item of iter_block_items(container)) {
5648
+ const i_type = item.constructor.name;
5649
+ if (i_type === "FootnoteItem") {
5650
+ walk(item);
5651
+ } else if (item instanceof Paragraph) {
5652
+ paragraphs_and_tables.push(["p", item]);
5653
+ } else if (item instanceof Table) {
5654
+ paragraphs_and_tables.push(["t", item]);
5655
+ for (const row of item.rows) {
5656
+ for (const cell of row.cells) {
5657
+ if (seen_cells.has(cell._element)) {
5658
+ continue;
5659
+ }
5660
+ seen_cells.add(cell._element);
5661
+ walk(cell);
5662
+ }
5663
+ }
5664
+ }
5665
+ }
5666
+ }
5667
+ walk(doc);
5668
+ const heading_indices = [];
5669
+ for (let idx = 0; idx < paragraphs_and_tables.length; idx++) {
5670
+ const [kind, item] = paragraphs_and_tables[idx];
5671
+ if (kind !== "p") continue;
5672
+ let hasOffset = false;
5673
+ if (paragraph_offsets instanceof Map) {
5674
+ hasOffset = paragraph_offsets.has(item._element);
5675
+ } else {
5676
+ hasOffset = item._element in paragraph_offsets;
5677
+ }
5678
+ if (!hasOffset) {
5679
+ continue;
5680
+ }
5681
+ if (!_is_heading(item)) continue;
5682
+ if (!_heading_passes_quality_filter_fast(item, projected_body, paragraph_offsets)) continue;
5683
+ heading_indices.push(idx);
5684
+ }
5685
+ if (heading_indices.length === 0) return [];
5686
+ const nodes = [];
5687
+ for (let h_pos = 0; h_pos < heading_indices.length; h_pos++) {
5688
+ const item_idx = heading_indices[h_pos];
5689
+ const paragraph = paragraphs_and_tables[item_idx][1];
5690
+ const level = _heading_level(paragraph);
5691
+ const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
5692
+ const style = _determine_heading_style(paragraph);
5693
+ let owned_end = item_idx;
5694
+ for (let next_h_pos = h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
5695
+ const next_idx = heading_indices[next_h_pos];
5696
+ const next_paragraph = paragraphs_and_tables[next_idx][1];
5697
+ if (_heading_level(next_paragraph) <= level) {
5698
+ owned_end = next_idx;
5699
+ break;
5700
+ }
5701
+ }
5702
+ if (owned_end === item_idx) {
5703
+ owned_end = paragraphs_and_tables.length;
5704
+ }
5705
+ const owned = paragraphs_and_tables.slice(item_idx + 1, owned_end);
5706
+ let has_table = false;
5707
+ for (const [kind2, item2] of owned) {
5708
+ if (kind2 === "p" && _is_heading(item2)) {
5709
+ break;
5710
+ }
5711
+ if (kind2 === "t") {
5712
+ has_table = true;
5713
+ break;
5714
+ }
5715
+ }
5716
+ const footnote_ids = _collect_footnote_ids_fast(owned);
5717
+ let para_offset;
5718
+ if (paragraph_offsets instanceof Map) {
5719
+ para_offset = paragraph_offsets.get(paragraph._element);
5720
+ } else {
5721
+ para_offset = paragraph_offsets[paragraph._element];
5722
+ }
5723
+ let page_num = 1;
5724
+ if (para_offset !== void 0) {
5725
+ const [start_offset] = para_offset;
5726
+ page_num = _offset_to_page(start_offset, body_page_offsets);
5727
+ }
5728
+ nodes.push({
5729
+ level,
5730
+ text,
5731
+ page: page_num,
5732
+ style,
5733
+ has_table,
5734
+ footnote_ids
5735
+ });
5736
+ }
5737
+ return nodes;
5738
+ }
5739
+ function _heading_passes_quality_filter_fast(paragraph, projected_body, paragraph_offsets) {
5740
+ const style = _determine_heading_style(paragraph);
5741
+ if (style !== "(heuristic)") return true;
5742
+ const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
5743
+ if (!text) return false;
5744
+ const words = text.match(/\w+/g) || [];
5745
+ return words.length >= _HEURISTIC_MIN_WORDS;
5746
+ }
5747
+ function _heading_text_fast(paragraph, projected_body, paragraph_offsets) {
5748
+ let offset;
5749
+ if (paragraph_offsets instanceof Map) {
5750
+ offset = paragraph_offsets.get(paragraph._element);
5751
+ } else {
5752
+ offset = paragraph_offsets[paragraph._element];
5753
+ }
5754
+ if (offset === void 0) {
5755
+ return "";
5756
+ }
5757
+ const [start, length] = offset;
5758
+ const raw = projected_body.substring(start, start + length);
5759
+ let cleaned = _strip_critic_markup(raw);
5760
+ cleaned = _strip_inline_formatting(cleaned);
5761
+ cleaned = cleaned.replace(/^#+\s+/, "");
5762
+ return cleaned.trim();
5763
+ }
5764
+ function _collect_footnote_ids_fast(owned_items) {
5765
+ const seen = /* @__PURE__ */ new Set();
5766
+ const ordered = [];
5767
+ for (const [kind, item] of owned_items) {
5768
+ if (kind !== "p") continue;
5769
+ for (const event of iter_paragraph_content(item)) {
5770
+ if (!("type" in event)) continue;
5771
+ let fn_id = "";
5772
+ if (event.type === "footnote") fn_id = `fn-${event.id}`;
5773
+ else if (event.type === "endnote") fn_id = `en-${event.id}`;
5774
+ else continue;
5775
+ if (!seen.has(fn_id)) {
5776
+ seen.add(fn_id);
5777
+ ordered.push(fn_id);
5778
+ }
5779
+ }
5780
+ }
5781
+ return ordered;
5782
+ }
5555
5783
 
5556
5784
  // src/sanitize/report.ts
5557
5785
  var SanitizeReport = class {
@@ -6239,6 +6467,7 @@ function identifyEngine() {
6239
6467
  DocumentMapper,
6240
6468
  DocumentObject,
6241
6469
  RedlineEngine,
6470
+ _extractTextFromDoc,
6242
6471
  apply_edits_to_markdown,
6243
6472
  create_unified_diff,
6244
6473
  create_word_patch_diff,