@adeu/core 1.10.1 → 1.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -34,6 +34,7 @@ __export(index_exports, {
34
34
  DocumentMapper: () => DocumentMapper,
35
35
  DocumentObject: () => DocumentObject,
36
36
  RedlineEngine: () => RedlineEngine,
37
+ _extractTextFromDoc: () => _extractTextFromDoc,
37
38
  apply_edits_to_markdown: () => apply_edits_to_markdown,
38
39
  create_unified_diff: () => create_unified_diff,
39
40
  create_word_patch_diff: () => create_word_patch_diff,
@@ -1266,14 +1267,16 @@ function* iter_paragraph_content(paragraph) {
1266
1267
  var DocumentMapper = class {
1267
1268
  doc;
1268
1269
  clean_view;
1270
+ original_view;
1269
1271
  comments_map;
1270
1272
  full_text = "";
1271
1273
  spans = [];
1272
1274
  appendix_start_index = -1;
1273
1275
  _text_chunks = [];
1274
- constructor(doc, clean_view = false) {
1276
+ constructor(doc, clean_view = false, original_view = false) {
1275
1277
  this.doc = doc;
1276
1278
  this.clean_view = clean_view;
1279
+ this.original_view = original_view;
1277
1280
  this.comments_map = extract_comments_data(doc.pkg);
1278
1281
  this._build_map();
1279
1282
  }
@@ -1355,14 +1358,15 @@ ${header}`;
1355
1358
  const ins = trPr ? findChild(trPr, "w:ins") : null;
1356
1359
  const del_node = trPr ? findChild(trPr, "w:del") : null;
1357
1360
  if (this.clean_view && del_node) continue;
1361
+ if (this.original_view && ins) continue;
1358
1362
  if (rows_processed > 0) {
1359
1363
  this._add_virtual_text("\n", current, null);
1360
1364
  current += 1;
1361
1365
  }
1362
- if (ins && !this.clean_view) {
1366
+ if (ins && !this.clean_view && !this.original_view) {
1363
1367
  this._add_virtual_text("{++ ", current, null);
1364
1368
  current += 4;
1365
- } else if (del_node && !this.clean_view) {
1369
+ } else if (del_node && !this.clean_view && !this.original_view) {
1366
1370
  this._add_virtual_text("{-- ", current, null);
1367
1371
  current += 4;
1368
1372
  }
@@ -1378,11 +1382,11 @@ ${header}`;
1378
1382
  current = this._map_blocks(cell, current);
1379
1383
  cells_processed += 1;
1380
1384
  }
1381
- if (ins && !this.clean_view) {
1385
+ if (ins && !this.clean_view && !this.original_view) {
1382
1386
  const suffix = ` |Chg:${ins.getAttribute("w:id")}++}`;
1383
1387
  this._add_virtual_text(suffix, current, null);
1384
1388
  current += suffix.length;
1385
- } else if (del_node && !this.clean_view) {
1389
+ } else if (del_node && !this.clean_view && !this.original_view) {
1386
1390
  const suffix = ` |Chg:${del_node.getAttribute("w:id")}--}`;
1387
1391
  this._add_virtual_text(suffix, current, null);
1388
1392
  current += suffix.length;
@@ -1476,11 +1480,13 @@ ${header}`;
1476
1480
  }
1477
1481
  if (this.clean_view && Object.keys(active_del).length > 0) {
1478
1482
  }
1483
+ if (this.original_view && Object.keys(active_ins).length > 0) {
1484
+ }
1479
1485
  const full_seg_text = run_parts.map((x) => x[1]).join("");
1480
1486
  const curr_ins_id = Object.keys(active_ins).pop() || null;
1481
1487
  const curr_del_id = Object.keys(active_del).pop() || null;
1482
- if (full_seg_text && !(this.clean_view && curr_del_id)) {
1483
- const new_wrappers = this.clean_view ? ["", ""] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
1488
+ if (full_seg_text && !(this.clean_view && curr_del_id) && !(this.original_view && curr_ins_id)) {
1489
+ const new_wrappers = this.clean_view || this.original_view ? ["", ""] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
1484
1490
  const new_style = [prefix, suffix];
1485
1491
  if (pending_runs.length > 0 && new_wrappers[0] === current_wrappers[0] && new_wrappers[1] === current_wrappers[1]) {
1486
1492
  let skip_leading_prefix = false;
@@ -1505,7 +1511,7 @@ ${header}`;
1505
1511
  }
1506
1512
  }
1507
1513
  }
1508
- if (!this.clean_view) {
1514
+ if (!this.clean_view && !this.original_view) {
1509
1515
  const has_meta = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || active_ids.size > 0 || Object.keys(active_fmt).length > 0;
1510
1516
  if (has_meta) {
1511
1517
  deferred_meta_states.push([{ ...active_ins }, { ...active_del }, new Set(active_ids), { ...active_fmt }]);
@@ -2681,6 +2687,7 @@ var RedlineEngine = class {
2681
2687
  mapper;
2682
2688
  comments_manager;
2683
2689
  clean_mapper = null;
2690
+ original_mapper = null;
2684
2691
  skipped_details = [];
2685
2692
  constructor(doc, author = "Adeu AI (TS)") {
2686
2693
  this.doc = doc;
@@ -3442,13 +3449,17 @@ var RedlineEngine = class {
3442
3449
  if (!edit.target_text) continue;
3443
3450
  let matches = this.mapper.find_all_match_indices(edit.target_text);
3444
3451
  let activeText = this.mapper.full_text;
3452
+ let target_mapper = this.mapper;
3445
3453
  if (matches.length === 0) {
3446
3454
  if (!this.clean_mapper)
3447
3455
  this.clean_mapper = new DocumentMapper(this.doc, true);
3448
3456
  matches = this.clean_mapper.find_all_match_indices(edit.target_text);
3449
- if (matches.length > 0) activeText = this.clean_mapper.full_text;
3457
+ if (matches.length > 0) {
3458
+ activeText = this.clean_mapper.full_text;
3459
+ target_mapper = this.clean_mapper;
3460
+ }
3450
3461
  }
3451
- if (activeText === this.mapper.full_text && matches.length > 1) {
3462
+ if (activeText === this.mapper.full_text && matches.length > 0) {
3452
3463
  const liveMatches = matches.filter(([start, length]) => {
3453
3464
  const realSpans = this.mapper.spans.filter(
3454
3465
  (s) => s.run !== null && s.end > start && s.start < start + length
@@ -3456,13 +3467,51 @@ var RedlineEngine = class {
3456
3467
  if (realSpans.length === 0) return true;
3457
3468
  return realSpans.some((s) => !s.del_id);
3458
3469
  });
3459
- if (liveMatches.length > 0) matches = liveMatches;
3470
+ matches = liveMatches;
3460
3471
  }
3472
+ let is_deleted_text = false;
3473
+ const deleted_authors = /* @__PURE__ */ new Set();
3461
3474
  if (matches.length === 0) {
3462
- errors.push(
3463
- `- Edit ${i + 1} Failed: Target text not found in document:
3475
+ if (!this.original_mapper) {
3476
+ this.original_mapper = new DocumentMapper(this.doc, false, true);
3477
+ }
3478
+ const orig_matches = this.original_mapper.find_all_match_indices(edit.target_text);
3479
+ if (orig_matches.length > 0) {
3480
+ is_deleted_text = true;
3481
+ for (const [start, length] of orig_matches) {
3482
+ const spans = this.original_mapper.spans.filter(
3483
+ (s) => s.end > start && s.start < start + length
3484
+ );
3485
+ for (const s of spans) {
3486
+ if (s.run !== null) {
3487
+ let parent = s.run._element;
3488
+ while (parent) {
3489
+ if (parent.nodeType === 1 && parent.tagName === "w:del") {
3490
+ const auth = parent.getAttribute("w:author");
3491
+ if (auth) {
3492
+ deleted_authors.add(auth);
3493
+ }
3494
+ break;
3495
+ }
3496
+ parent = parent.parentNode;
3497
+ }
3498
+ }
3499
+ }
3500
+ }
3501
+ }
3502
+ }
3503
+ if (matches.length === 0) {
3504
+ if (is_deleted_text) {
3505
+ const author_phrase = deleted_authors.size > 0 ? `by ${Array.from(deleted_authors).sort().join(", ")}` : "by an existing revision";
3506
+ errors.push(
3507
+ `- Edit ${i + 1} Failed: Target text matches text inside a tracked deletion ${author_phrase}. Reject/accept that change first or target the active replacement text instead.`
3508
+ );
3509
+ } else {
3510
+ errors.push(
3511
+ `- Edit ${i + 1} Failed: Target text not found in document:
3464
3512
  "${edit.target_text}"`
3465
- );
3513
+ );
3514
+ }
3466
3515
  } else if (matches.length > 1) {
3467
3516
  const positions = matches.map(([start, length]) => [
3468
3517
  start,
@@ -4884,13 +4933,14 @@ async function extractTextFromBuffer(buffer, cleanView = false) {
4884
4933
  const doc = await DocumentObject.load(buffer);
4885
4934
  return _extractTextFromDoc(doc, cleanView);
4886
4935
  }
4887
- function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
4936
+ function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true, return_paragraph_offsets = false) {
4888
4937
  const comments_map = extract_comments_data(doc.pkg);
4889
4938
  const full_text = [];
4939
+ const paragraph_offsets = /* @__PURE__ */ new Map();
4890
4940
  let cursor = 0;
4891
4941
  for (const part of iter_document_parts(doc)) {
4892
4942
  const part_cursor = full_text.length > 0 ? cursor + 2 : cursor;
4893
- const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor);
4943
+ const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor, return_paragraph_offsets ? paragraph_offsets : void 0);
4894
4944
  if (part_text) {
4895
4945
  if (full_text.length > 0) cursor += 2;
4896
4946
  full_text.push(part_text);
@@ -4902,9 +4952,12 @@ function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
4902
4952
  const appendix = build_structural_appendix(doc, base_text);
4903
4953
  if (appendix) base_text += appendix;
4904
4954
  }
4955
+ if (return_paragraph_offsets) {
4956
+ return { text: base_text, paragraph_offsets };
4957
+ }
4905
4958
  return base_text;
4906
4959
  }
4907
- function _extract_blocks(container, comments_map, cleanView, cursor) {
4960
+ function _extract_blocks(container, comments_map, cleanView, cursor, paragraph_offsets) {
4908
4961
  const part = container.part || container;
4909
4962
  const [style_cache, default_pstyle] = _get_style_cache(part);
4910
4963
  const blocks = [];
@@ -4923,7 +4976,7 @@ ${header}`;
4923
4976
  if (!is_first_block) local_cursor += 2;
4924
4977
  const block_start = local_cursor;
4925
4978
  if (item.constructor.name === "FootnoteItem") {
4926
- const fn_text = _extract_blocks(item, comments_map, cleanView, block_start);
4979
+ const fn_text = _extract_blocks(item, comments_map, cleanView, block_start, paragraph_offsets);
4927
4980
  if (fn_text) {
4928
4981
  blocks.push(fn_text);
4929
4982
  local_cursor = block_start + fn_text.length;
@@ -4939,11 +4992,14 @@ ${header}`;
4939
4992
  const p_text = build_paragraph_text(item, comments_map, cleanView, style_cache, default_pstyle);
4940
4993
  const full_block = prefix + p_text;
4941
4994
  blocks.push(full_block);
4995
+ if (paragraph_offsets) {
4996
+ paragraph_offsets.set(item._element, [block_start, full_block.length]);
4997
+ }
4942
4998
  local_cursor = block_start + full_block.length;
4943
4999
  is_first_para = false;
4944
5000
  is_first_block = false;
4945
5001
  } else if (item instanceof Table) {
4946
- const table_text = extract_table(item, comments_map, cleanView, block_start);
5002
+ const table_text = extract_table(item, comments_map, cleanView, block_start, paragraph_offsets);
4947
5003
  if (table_text) {
4948
5004
  blocks.push(table_text);
4949
5005
  local_cursor = block_start + table_text.length;
@@ -4956,7 +5012,7 @@ ${header}`;
4956
5012
  }
4957
5013
  return blocks.join("\n\n");
4958
5014
  }
4959
- function extract_table(table, comments_map, cleanView, cursor) {
5015
+ function extract_table(table, comments_map, cleanView, cursor, paragraph_offsets) {
4960
5016
  const rows_text = [];
4961
5017
  let rows_processed = 0;
4962
5018
  let local_cursor = cursor;
@@ -4975,7 +5031,7 @@ function extract_table(table, comments_map, cleanView, cursor) {
4975
5031
  if (seen_cells.has(cell)) continue;
4976
5032
  seen_cells.add(cell);
4977
5033
  if (!first_cell) cell_cursor += 3;
4978
- const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor);
5034
+ const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor, paragraph_offsets);
4979
5035
  cell_texts.push(cell_content);
4980
5036
  cell_cursor += cell_content.length;
4981
5037
  first_cell = false;
@@ -5232,6 +5288,9 @@ function extract_outline(doc, projected_body, body_pages, body_page_offsets, par
5232
5288
  if (body_pages.length !== body_page_offsets.length) {
5233
5289
  throw new Error("body_pages and body_page_offsets length mismatch");
5234
5290
  }
5291
+ if (paragraph_offsets) {
5292
+ return _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets);
5293
+ }
5235
5294
  const comments_map = extract_comments_data(doc.pkg);
5236
5295
  const block_records = _walk_doc_body(doc, comments_map);
5237
5296
  const heading_indices = [];
@@ -5507,6 +5566,7 @@ function _determine_heading_style(paragraph) {
5507
5566
  if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
5508
5567
  }
5509
5568
  let outline_level = null;
5569
+ let outline_level_from_style = false;
5510
5570
  if (pPr) {
5511
5571
  const oLvl = findChild(pPr, "w:outlineLvl");
5512
5572
  if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
@@ -5515,6 +5575,7 @@ function _determine_heading_style(paragraph) {
5515
5575
  }
5516
5576
  if (outline_level === null && style_id && style_cache && style_cache[style_id]) {
5517
5577
  outline_level = style_cache[style_id].outline_level;
5578
+ outline_level_from_style = true;
5518
5579
  }
5519
5580
  const style_name = style_id && style_cache && style_cache[style_id] ? style_cache[style_id].name : style_id;
5520
5581
  let normalized_style_name = style_name;
@@ -5525,6 +5586,12 @@ function _determine_heading_style(paragraph) {
5525
5586
  normalized_style_name = "Title";
5526
5587
  }
5527
5588
  }
5589
+ if (outline_level_from_style && outline_level !== null) {
5590
+ const is_heading_or_title = normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title");
5591
+ if (!is_heading_or_title) {
5592
+ outline_level = null;
5593
+ }
5594
+ }
5528
5595
  if (outline_level !== null && outline_level >= 0 && outline_level <= 8) {
5529
5596
  if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")) {
5530
5597
  return normalized_style_name;
@@ -5573,6 +5640,146 @@ function _offset_to_page(offset, body_page_offsets) {
5573
5640
  }
5574
5641
  return page;
5575
5642
  }
5643
+ function _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets) {
5644
+ const paragraphs_and_tables = [];
5645
+ const seen_cells = /* @__PURE__ */ new Set();
5646
+ function walk(container) {
5647
+ for (const item of iter_block_items(container)) {
5648
+ const i_type = item.constructor.name;
5649
+ if (i_type === "FootnoteItem") {
5650
+ walk(item);
5651
+ } else if (item instanceof Paragraph) {
5652
+ paragraphs_and_tables.push(["p", item]);
5653
+ } else if (item instanceof Table) {
5654
+ paragraphs_and_tables.push(["t", item]);
5655
+ for (const row of item.rows) {
5656
+ for (const cell of row.cells) {
5657
+ if (seen_cells.has(cell._element)) {
5658
+ continue;
5659
+ }
5660
+ seen_cells.add(cell._element);
5661
+ walk(cell);
5662
+ }
5663
+ }
5664
+ }
5665
+ }
5666
+ }
5667
+ walk(doc);
5668
+ const heading_indices = [];
5669
+ for (let idx = 0; idx < paragraphs_and_tables.length; idx++) {
5670
+ const [kind, item] = paragraphs_and_tables[idx];
5671
+ if (kind !== "p") continue;
5672
+ let hasOffset = false;
5673
+ if (paragraph_offsets instanceof Map) {
5674
+ hasOffset = paragraph_offsets.has(item._element);
5675
+ } else {
5676
+ hasOffset = item._element in paragraph_offsets;
5677
+ }
5678
+ if (!hasOffset) {
5679
+ continue;
5680
+ }
5681
+ if (!_is_heading(item)) continue;
5682
+ if (!_heading_passes_quality_filter_fast(item, projected_body, paragraph_offsets)) continue;
5683
+ heading_indices.push(idx);
5684
+ }
5685
+ if (heading_indices.length === 0) return [];
5686
+ const nodes = [];
5687
+ for (let h_pos = 0; h_pos < heading_indices.length; h_pos++) {
5688
+ const item_idx = heading_indices[h_pos];
5689
+ const paragraph = paragraphs_and_tables[item_idx][1];
5690
+ const level = _heading_level(paragraph);
5691
+ const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
5692
+ const style = _determine_heading_style(paragraph);
5693
+ let owned_end = item_idx;
5694
+ for (let next_h_pos = h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
5695
+ const next_idx = heading_indices[next_h_pos];
5696
+ const next_paragraph = paragraphs_and_tables[next_idx][1];
5697
+ if (_heading_level(next_paragraph) <= level) {
5698
+ owned_end = next_idx;
5699
+ break;
5700
+ }
5701
+ }
5702
+ if (owned_end === item_idx) {
5703
+ owned_end = paragraphs_and_tables.length;
5704
+ }
5705
+ const owned = paragraphs_and_tables.slice(item_idx + 1, owned_end);
5706
+ let has_table = false;
5707
+ for (const [kind2, item2] of owned) {
5708
+ if (kind2 === "p" && _is_heading(item2)) {
5709
+ break;
5710
+ }
5711
+ if (kind2 === "t") {
5712
+ has_table = true;
5713
+ break;
5714
+ }
5715
+ }
5716
+ const footnote_ids = _collect_footnote_ids_fast(owned);
5717
+ let para_offset;
5718
+ if (paragraph_offsets instanceof Map) {
5719
+ para_offset = paragraph_offsets.get(paragraph._element);
5720
+ } else {
5721
+ para_offset = paragraph_offsets[paragraph._element];
5722
+ }
5723
+ let page_num = 1;
5724
+ if (para_offset !== void 0) {
5725
+ const [start_offset] = para_offset;
5726
+ page_num = _offset_to_page(start_offset, body_page_offsets);
5727
+ }
5728
+ nodes.push({
5729
+ level,
5730
+ text,
5731
+ page: page_num,
5732
+ style,
5733
+ has_table,
5734
+ footnote_ids
5735
+ });
5736
+ }
5737
+ return nodes;
5738
+ }
5739
+ function _heading_passes_quality_filter_fast(paragraph, projected_body, paragraph_offsets) {
5740
+ const style = _determine_heading_style(paragraph);
5741
+ if (style !== "(heuristic)") return true;
5742
+ const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
5743
+ if (!text) return false;
5744
+ const words = text.match(/\w+/g) || [];
5745
+ return words.length >= _HEURISTIC_MIN_WORDS;
5746
+ }
5747
+ function _heading_text_fast(paragraph, projected_body, paragraph_offsets) {
5748
+ let offset;
5749
+ if (paragraph_offsets instanceof Map) {
5750
+ offset = paragraph_offsets.get(paragraph._element);
5751
+ } else {
5752
+ offset = paragraph_offsets[paragraph._element];
5753
+ }
5754
+ if (offset === void 0) {
5755
+ return "";
5756
+ }
5757
+ const [start, length] = offset;
5758
+ const raw = projected_body.substring(start, start + length);
5759
+ let cleaned = _strip_critic_markup(raw);
5760
+ cleaned = _strip_inline_formatting(cleaned);
5761
+ cleaned = cleaned.replace(/^#+\s+/, "");
5762
+ return cleaned.trim();
5763
+ }
5764
+ function _collect_footnote_ids_fast(owned_items) {
5765
+ const seen = /* @__PURE__ */ new Set();
5766
+ const ordered = [];
5767
+ for (const [kind, item] of owned_items) {
5768
+ if (kind !== "p") continue;
5769
+ for (const event of iter_paragraph_content(item)) {
5770
+ if (!("type" in event)) continue;
5771
+ let fn_id = "";
5772
+ if (event.type === "footnote") fn_id = `fn-${event.id}`;
5773
+ else if (event.type === "endnote") fn_id = `en-${event.id}`;
5774
+ else continue;
5775
+ if (!seen.has(fn_id)) {
5776
+ seen.add(fn_id);
5777
+ ordered.push(fn_id);
5778
+ }
5779
+ }
5780
+ }
5781
+ return ordered;
5782
+ }
5576
5783
 
5577
5784
  // src/sanitize/report.ts
5578
5785
  var SanitizeReport = class {
@@ -6260,6 +6467,7 @@ function identifyEngine() {
6260
6467
  DocumentMapper,
6261
6468
  DocumentObject,
6262
6469
  RedlineEngine,
6470
+ _extractTextFromDoc,
6263
6471
  apply_edits_to_markdown,
6264
6472
  create_unified_diff,
6265
6473
  create_word_patch_diff,