@adeu/core 1.10.1 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +229 -21
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +9 -3
- package/dist/index.d.ts +9 -3
- package/dist/index.js +228 -21
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/engine.bugs.test.ts +2 -2
- package/src/engine.ts +53 -6
- package/src/index.ts +1 -1
- package/src/ingest.ts +32 -8
- package/src/mapper.ts +14 -8
- package/src/outline.ts +196 -1
- package/src/parity_gaps.test.ts +98 -0
package/dist/index.cjs
CHANGED
|
@@ -34,6 +34,7 @@ __export(index_exports, {
|
|
|
34
34
|
DocumentMapper: () => DocumentMapper,
|
|
35
35
|
DocumentObject: () => DocumentObject,
|
|
36
36
|
RedlineEngine: () => RedlineEngine,
|
|
37
|
+
_extractTextFromDoc: () => _extractTextFromDoc,
|
|
37
38
|
apply_edits_to_markdown: () => apply_edits_to_markdown,
|
|
38
39
|
create_unified_diff: () => create_unified_diff,
|
|
39
40
|
create_word_patch_diff: () => create_word_patch_diff,
|
|
@@ -1266,14 +1267,16 @@ function* iter_paragraph_content(paragraph) {
|
|
|
1266
1267
|
var DocumentMapper = class {
|
|
1267
1268
|
doc;
|
|
1268
1269
|
clean_view;
|
|
1270
|
+
original_view;
|
|
1269
1271
|
comments_map;
|
|
1270
1272
|
full_text = "";
|
|
1271
1273
|
spans = [];
|
|
1272
1274
|
appendix_start_index = -1;
|
|
1273
1275
|
_text_chunks = [];
|
|
1274
|
-
constructor(doc, clean_view = false) {
|
|
1276
|
+
constructor(doc, clean_view = false, original_view = false) {
|
|
1275
1277
|
this.doc = doc;
|
|
1276
1278
|
this.clean_view = clean_view;
|
|
1279
|
+
this.original_view = original_view;
|
|
1277
1280
|
this.comments_map = extract_comments_data(doc.pkg);
|
|
1278
1281
|
this._build_map();
|
|
1279
1282
|
}
|
|
@@ -1355,14 +1358,15 @@ ${header}`;
|
|
|
1355
1358
|
const ins = trPr ? findChild(trPr, "w:ins") : null;
|
|
1356
1359
|
const del_node = trPr ? findChild(trPr, "w:del") : null;
|
|
1357
1360
|
if (this.clean_view && del_node) continue;
|
|
1361
|
+
if (this.original_view && ins) continue;
|
|
1358
1362
|
if (rows_processed > 0) {
|
|
1359
1363
|
this._add_virtual_text("\n", current, null);
|
|
1360
1364
|
current += 1;
|
|
1361
1365
|
}
|
|
1362
|
-
if (ins && !this.clean_view) {
|
|
1366
|
+
if (ins && !this.clean_view && !this.original_view) {
|
|
1363
1367
|
this._add_virtual_text("{++ ", current, null);
|
|
1364
1368
|
current += 4;
|
|
1365
|
-
} else if (del_node && !this.clean_view) {
|
|
1369
|
+
} else if (del_node && !this.clean_view && !this.original_view) {
|
|
1366
1370
|
this._add_virtual_text("{-- ", current, null);
|
|
1367
1371
|
current += 4;
|
|
1368
1372
|
}
|
|
@@ -1378,11 +1382,11 @@ ${header}`;
|
|
|
1378
1382
|
current = this._map_blocks(cell, current);
|
|
1379
1383
|
cells_processed += 1;
|
|
1380
1384
|
}
|
|
1381
|
-
if (ins && !this.clean_view) {
|
|
1385
|
+
if (ins && !this.clean_view && !this.original_view) {
|
|
1382
1386
|
const suffix = ` |Chg:${ins.getAttribute("w:id")}++}`;
|
|
1383
1387
|
this._add_virtual_text(suffix, current, null);
|
|
1384
1388
|
current += suffix.length;
|
|
1385
|
-
} else if (del_node && !this.clean_view) {
|
|
1389
|
+
} else if (del_node && !this.clean_view && !this.original_view) {
|
|
1386
1390
|
const suffix = ` |Chg:${del_node.getAttribute("w:id")}--}`;
|
|
1387
1391
|
this._add_virtual_text(suffix, current, null);
|
|
1388
1392
|
current += suffix.length;
|
|
@@ -1476,11 +1480,13 @@ ${header}`;
|
|
|
1476
1480
|
}
|
|
1477
1481
|
if (this.clean_view && Object.keys(active_del).length > 0) {
|
|
1478
1482
|
}
|
|
1483
|
+
if (this.original_view && Object.keys(active_ins).length > 0) {
|
|
1484
|
+
}
|
|
1479
1485
|
const full_seg_text = run_parts.map((x) => x[1]).join("");
|
|
1480
1486
|
const curr_ins_id = Object.keys(active_ins).pop() || null;
|
|
1481
1487
|
const curr_del_id = Object.keys(active_del).pop() || null;
|
|
1482
|
-
if (full_seg_text && !(this.clean_view && curr_del_id)) {
|
|
1483
|
-
const new_wrappers = this.clean_view ? ["", ""] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
|
|
1488
|
+
if (full_seg_text && !(this.clean_view && curr_del_id) && !(this.original_view && curr_ins_id)) {
|
|
1489
|
+
const new_wrappers = this.clean_view || this.original_view ? ["", ""] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
|
|
1484
1490
|
const new_style = [prefix, suffix];
|
|
1485
1491
|
if (pending_runs.length > 0 && new_wrappers[0] === current_wrappers[0] && new_wrappers[1] === current_wrappers[1]) {
|
|
1486
1492
|
let skip_leading_prefix = false;
|
|
@@ -1505,7 +1511,7 @@ ${header}`;
|
|
|
1505
1511
|
}
|
|
1506
1512
|
}
|
|
1507
1513
|
}
|
|
1508
|
-
if (!this.clean_view) {
|
|
1514
|
+
if (!this.clean_view && !this.original_view) {
|
|
1509
1515
|
const has_meta = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || active_ids.size > 0 || Object.keys(active_fmt).length > 0;
|
|
1510
1516
|
if (has_meta) {
|
|
1511
1517
|
deferred_meta_states.push([{ ...active_ins }, { ...active_del }, new Set(active_ids), { ...active_fmt }]);
|
|
@@ -2681,6 +2687,7 @@ var RedlineEngine = class {
|
|
|
2681
2687
|
mapper;
|
|
2682
2688
|
comments_manager;
|
|
2683
2689
|
clean_mapper = null;
|
|
2690
|
+
original_mapper = null;
|
|
2684
2691
|
skipped_details = [];
|
|
2685
2692
|
constructor(doc, author = "Adeu AI (TS)") {
|
|
2686
2693
|
this.doc = doc;
|
|
@@ -3442,13 +3449,17 @@ var RedlineEngine = class {
|
|
|
3442
3449
|
if (!edit.target_text) continue;
|
|
3443
3450
|
let matches = this.mapper.find_all_match_indices(edit.target_text);
|
|
3444
3451
|
let activeText = this.mapper.full_text;
|
|
3452
|
+
let target_mapper = this.mapper;
|
|
3445
3453
|
if (matches.length === 0) {
|
|
3446
3454
|
if (!this.clean_mapper)
|
|
3447
3455
|
this.clean_mapper = new DocumentMapper(this.doc, true);
|
|
3448
3456
|
matches = this.clean_mapper.find_all_match_indices(edit.target_text);
|
|
3449
|
-
if (matches.length > 0)
|
|
3457
|
+
if (matches.length > 0) {
|
|
3458
|
+
activeText = this.clean_mapper.full_text;
|
|
3459
|
+
target_mapper = this.clean_mapper;
|
|
3460
|
+
}
|
|
3450
3461
|
}
|
|
3451
|
-
if (activeText === this.mapper.full_text && matches.length >
|
|
3462
|
+
if (activeText === this.mapper.full_text && matches.length > 0) {
|
|
3452
3463
|
const liveMatches = matches.filter(([start, length]) => {
|
|
3453
3464
|
const realSpans = this.mapper.spans.filter(
|
|
3454
3465
|
(s) => s.run !== null && s.end > start && s.start < start + length
|
|
@@ -3456,13 +3467,51 @@ var RedlineEngine = class {
|
|
|
3456
3467
|
if (realSpans.length === 0) return true;
|
|
3457
3468
|
return realSpans.some((s) => !s.del_id);
|
|
3458
3469
|
});
|
|
3459
|
-
|
|
3470
|
+
matches = liveMatches;
|
|
3460
3471
|
}
|
|
3472
|
+
let is_deleted_text = false;
|
|
3473
|
+
const deleted_authors = /* @__PURE__ */ new Set();
|
|
3461
3474
|
if (matches.length === 0) {
|
|
3462
|
-
|
|
3463
|
-
|
|
3475
|
+
if (!this.original_mapper) {
|
|
3476
|
+
this.original_mapper = new DocumentMapper(this.doc, false, true);
|
|
3477
|
+
}
|
|
3478
|
+
const orig_matches = this.original_mapper.find_all_match_indices(edit.target_text);
|
|
3479
|
+
if (orig_matches.length > 0) {
|
|
3480
|
+
is_deleted_text = true;
|
|
3481
|
+
for (const [start, length] of orig_matches) {
|
|
3482
|
+
const spans = this.original_mapper.spans.filter(
|
|
3483
|
+
(s) => s.end > start && s.start < start + length
|
|
3484
|
+
);
|
|
3485
|
+
for (const s of spans) {
|
|
3486
|
+
if (s.run !== null) {
|
|
3487
|
+
let parent = s.run._element;
|
|
3488
|
+
while (parent) {
|
|
3489
|
+
if (parent.nodeType === 1 && parent.tagName === "w:del") {
|
|
3490
|
+
const auth = parent.getAttribute("w:author");
|
|
3491
|
+
if (auth) {
|
|
3492
|
+
deleted_authors.add(auth);
|
|
3493
|
+
}
|
|
3494
|
+
break;
|
|
3495
|
+
}
|
|
3496
|
+
parent = parent.parentNode;
|
|
3497
|
+
}
|
|
3498
|
+
}
|
|
3499
|
+
}
|
|
3500
|
+
}
|
|
3501
|
+
}
|
|
3502
|
+
}
|
|
3503
|
+
if (matches.length === 0) {
|
|
3504
|
+
if (is_deleted_text) {
|
|
3505
|
+
const author_phrase = deleted_authors.size > 0 ? `by ${Array.from(deleted_authors).sort().join(", ")}` : "by an existing revision";
|
|
3506
|
+
errors.push(
|
|
3507
|
+
`- Edit ${i + 1} Failed: Target text matches text inside a tracked deletion ${author_phrase}. Reject/accept that change first or target the active replacement text instead.`
|
|
3508
|
+
);
|
|
3509
|
+
} else {
|
|
3510
|
+
errors.push(
|
|
3511
|
+
`- Edit ${i + 1} Failed: Target text not found in document:
|
|
3464
3512
|
"${edit.target_text}"`
|
|
3465
|
-
|
|
3513
|
+
);
|
|
3514
|
+
}
|
|
3466
3515
|
} else if (matches.length > 1) {
|
|
3467
3516
|
const positions = matches.map(([start, length]) => [
|
|
3468
3517
|
start,
|
|
@@ -4884,13 +4933,14 @@ async function extractTextFromBuffer(buffer, cleanView = false) {
|
|
|
4884
4933
|
const doc = await DocumentObject.load(buffer);
|
|
4885
4934
|
return _extractTextFromDoc(doc, cleanView);
|
|
4886
4935
|
}
|
|
4887
|
-
function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
|
|
4936
|
+
function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true, return_paragraph_offsets = false) {
|
|
4888
4937
|
const comments_map = extract_comments_data(doc.pkg);
|
|
4889
4938
|
const full_text = [];
|
|
4939
|
+
const paragraph_offsets = /* @__PURE__ */ new Map();
|
|
4890
4940
|
let cursor = 0;
|
|
4891
4941
|
for (const part of iter_document_parts(doc)) {
|
|
4892
4942
|
const part_cursor = full_text.length > 0 ? cursor + 2 : cursor;
|
|
4893
|
-
const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor);
|
|
4943
|
+
const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor, return_paragraph_offsets ? paragraph_offsets : void 0);
|
|
4894
4944
|
if (part_text) {
|
|
4895
4945
|
if (full_text.length > 0) cursor += 2;
|
|
4896
4946
|
full_text.push(part_text);
|
|
@@ -4902,9 +4952,12 @@ function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
|
|
|
4902
4952
|
const appendix = build_structural_appendix(doc, base_text);
|
|
4903
4953
|
if (appendix) base_text += appendix;
|
|
4904
4954
|
}
|
|
4955
|
+
if (return_paragraph_offsets) {
|
|
4956
|
+
return { text: base_text, paragraph_offsets };
|
|
4957
|
+
}
|
|
4905
4958
|
return base_text;
|
|
4906
4959
|
}
|
|
4907
|
-
function _extract_blocks(container, comments_map, cleanView, cursor) {
|
|
4960
|
+
function _extract_blocks(container, comments_map, cleanView, cursor, paragraph_offsets) {
|
|
4908
4961
|
const part = container.part || container;
|
|
4909
4962
|
const [style_cache, default_pstyle] = _get_style_cache(part);
|
|
4910
4963
|
const blocks = [];
|
|
@@ -4923,7 +4976,7 @@ ${header}`;
|
|
|
4923
4976
|
if (!is_first_block) local_cursor += 2;
|
|
4924
4977
|
const block_start = local_cursor;
|
|
4925
4978
|
if (item.constructor.name === "FootnoteItem") {
|
|
4926
|
-
const fn_text = _extract_blocks(item, comments_map, cleanView, block_start);
|
|
4979
|
+
const fn_text = _extract_blocks(item, comments_map, cleanView, block_start, paragraph_offsets);
|
|
4927
4980
|
if (fn_text) {
|
|
4928
4981
|
blocks.push(fn_text);
|
|
4929
4982
|
local_cursor = block_start + fn_text.length;
|
|
@@ -4939,11 +4992,14 @@ ${header}`;
|
|
|
4939
4992
|
const p_text = build_paragraph_text(item, comments_map, cleanView, style_cache, default_pstyle);
|
|
4940
4993
|
const full_block = prefix + p_text;
|
|
4941
4994
|
blocks.push(full_block);
|
|
4995
|
+
if (paragraph_offsets) {
|
|
4996
|
+
paragraph_offsets.set(item._element, [block_start, full_block.length]);
|
|
4997
|
+
}
|
|
4942
4998
|
local_cursor = block_start + full_block.length;
|
|
4943
4999
|
is_first_para = false;
|
|
4944
5000
|
is_first_block = false;
|
|
4945
5001
|
} else if (item instanceof Table) {
|
|
4946
|
-
const table_text = extract_table(item, comments_map, cleanView, block_start);
|
|
5002
|
+
const table_text = extract_table(item, comments_map, cleanView, block_start, paragraph_offsets);
|
|
4947
5003
|
if (table_text) {
|
|
4948
5004
|
blocks.push(table_text);
|
|
4949
5005
|
local_cursor = block_start + table_text.length;
|
|
@@ -4956,7 +5012,7 @@ ${header}`;
|
|
|
4956
5012
|
}
|
|
4957
5013
|
return blocks.join("\n\n");
|
|
4958
5014
|
}
|
|
4959
|
-
function extract_table(table, comments_map, cleanView, cursor) {
|
|
5015
|
+
function extract_table(table, comments_map, cleanView, cursor, paragraph_offsets) {
|
|
4960
5016
|
const rows_text = [];
|
|
4961
5017
|
let rows_processed = 0;
|
|
4962
5018
|
let local_cursor = cursor;
|
|
@@ -4975,7 +5031,7 @@ function extract_table(table, comments_map, cleanView, cursor) {
|
|
|
4975
5031
|
if (seen_cells.has(cell)) continue;
|
|
4976
5032
|
seen_cells.add(cell);
|
|
4977
5033
|
if (!first_cell) cell_cursor += 3;
|
|
4978
|
-
const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor);
|
|
5034
|
+
const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor, paragraph_offsets);
|
|
4979
5035
|
cell_texts.push(cell_content);
|
|
4980
5036
|
cell_cursor += cell_content.length;
|
|
4981
5037
|
first_cell = false;
|
|
@@ -5232,6 +5288,9 @@ function extract_outline(doc, projected_body, body_pages, body_page_offsets, par
|
|
|
5232
5288
|
if (body_pages.length !== body_page_offsets.length) {
|
|
5233
5289
|
throw new Error("body_pages and body_page_offsets length mismatch");
|
|
5234
5290
|
}
|
|
5291
|
+
if (paragraph_offsets) {
|
|
5292
|
+
return _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets);
|
|
5293
|
+
}
|
|
5235
5294
|
const comments_map = extract_comments_data(doc.pkg);
|
|
5236
5295
|
const block_records = _walk_doc_body(doc, comments_map);
|
|
5237
5296
|
const heading_indices = [];
|
|
@@ -5507,6 +5566,7 @@ function _determine_heading_style(paragraph) {
|
|
|
5507
5566
|
if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
|
|
5508
5567
|
}
|
|
5509
5568
|
let outline_level = null;
|
|
5569
|
+
let outline_level_from_style = false;
|
|
5510
5570
|
if (pPr) {
|
|
5511
5571
|
const oLvl = findChild(pPr, "w:outlineLvl");
|
|
5512
5572
|
if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
|
|
@@ -5515,6 +5575,7 @@ function _determine_heading_style(paragraph) {
|
|
|
5515
5575
|
}
|
|
5516
5576
|
if (outline_level === null && style_id && style_cache && style_cache[style_id]) {
|
|
5517
5577
|
outline_level = style_cache[style_id].outline_level;
|
|
5578
|
+
outline_level_from_style = true;
|
|
5518
5579
|
}
|
|
5519
5580
|
const style_name = style_id && style_cache && style_cache[style_id] ? style_cache[style_id].name : style_id;
|
|
5520
5581
|
let normalized_style_name = style_name;
|
|
@@ -5525,6 +5586,12 @@ function _determine_heading_style(paragraph) {
|
|
|
5525
5586
|
normalized_style_name = "Title";
|
|
5526
5587
|
}
|
|
5527
5588
|
}
|
|
5589
|
+
if (outline_level_from_style && outline_level !== null) {
|
|
5590
|
+
const is_heading_or_title = normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title");
|
|
5591
|
+
if (!is_heading_or_title) {
|
|
5592
|
+
outline_level = null;
|
|
5593
|
+
}
|
|
5594
|
+
}
|
|
5528
5595
|
if (outline_level !== null && outline_level >= 0 && outline_level <= 8) {
|
|
5529
5596
|
if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")) {
|
|
5530
5597
|
return normalized_style_name;
|
|
@@ -5573,6 +5640,146 @@ function _offset_to_page(offset, body_page_offsets) {
|
|
|
5573
5640
|
}
|
|
5574
5641
|
return page;
|
|
5575
5642
|
}
|
|
5643
|
+
function _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets) {
|
|
5644
|
+
const paragraphs_and_tables = [];
|
|
5645
|
+
const seen_cells = /* @__PURE__ */ new Set();
|
|
5646
|
+
function walk(container) {
|
|
5647
|
+
for (const item of iter_block_items(container)) {
|
|
5648
|
+
const i_type = item.constructor.name;
|
|
5649
|
+
if (i_type === "FootnoteItem") {
|
|
5650
|
+
walk(item);
|
|
5651
|
+
} else if (item instanceof Paragraph) {
|
|
5652
|
+
paragraphs_and_tables.push(["p", item]);
|
|
5653
|
+
} else if (item instanceof Table) {
|
|
5654
|
+
paragraphs_and_tables.push(["t", item]);
|
|
5655
|
+
for (const row of item.rows) {
|
|
5656
|
+
for (const cell of row.cells) {
|
|
5657
|
+
if (seen_cells.has(cell._element)) {
|
|
5658
|
+
continue;
|
|
5659
|
+
}
|
|
5660
|
+
seen_cells.add(cell._element);
|
|
5661
|
+
walk(cell);
|
|
5662
|
+
}
|
|
5663
|
+
}
|
|
5664
|
+
}
|
|
5665
|
+
}
|
|
5666
|
+
}
|
|
5667
|
+
walk(doc);
|
|
5668
|
+
const heading_indices = [];
|
|
5669
|
+
for (let idx = 0; idx < paragraphs_and_tables.length; idx++) {
|
|
5670
|
+
const [kind, item] = paragraphs_and_tables[idx];
|
|
5671
|
+
if (kind !== "p") continue;
|
|
5672
|
+
let hasOffset = false;
|
|
5673
|
+
if (paragraph_offsets instanceof Map) {
|
|
5674
|
+
hasOffset = paragraph_offsets.has(item._element);
|
|
5675
|
+
} else {
|
|
5676
|
+
hasOffset = item._element in paragraph_offsets;
|
|
5677
|
+
}
|
|
5678
|
+
if (!hasOffset) {
|
|
5679
|
+
continue;
|
|
5680
|
+
}
|
|
5681
|
+
if (!_is_heading(item)) continue;
|
|
5682
|
+
if (!_heading_passes_quality_filter_fast(item, projected_body, paragraph_offsets)) continue;
|
|
5683
|
+
heading_indices.push(idx);
|
|
5684
|
+
}
|
|
5685
|
+
if (heading_indices.length === 0) return [];
|
|
5686
|
+
const nodes = [];
|
|
5687
|
+
for (let h_pos = 0; h_pos < heading_indices.length; h_pos++) {
|
|
5688
|
+
const item_idx = heading_indices[h_pos];
|
|
5689
|
+
const paragraph = paragraphs_and_tables[item_idx][1];
|
|
5690
|
+
const level = _heading_level(paragraph);
|
|
5691
|
+
const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
|
|
5692
|
+
const style = _determine_heading_style(paragraph);
|
|
5693
|
+
let owned_end = item_idx;
|
|
5694
|
+
for (let next_h_pos = h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
|
|
5695
|
+
const next_idx = heading_indices[next_h_pos];
|
|
5696
|
+
const next_paragraph = paragraphs_and_tables[next_idx][1];
|
|
5697
|
+
if (_heading_level(next_paragraph) <= level) {
|
|
5698
|
+
owned_end = next_idx;
|
|
5699
|
+
break;
|
|
5700
|
+
}
|
|
5701
|
+
}
|
|
5702
|
+
if (owned_end === item_idx) {
|
|
5703
|
+
owned_end = paragraphs_and_tables.length;
|
|
5704
|
+
}
|
|
5705
|
+
const owned = paragraphs_and_tables.slice(item_idx + 1, owned_end);
|
|
5706
|
+
let has_table = false;
|
|
5707
|
+
for (const [kind2, item2] of owned) {
|
|
5708
|
+
if (kind2 === "p" && _is_heading(item2)) {
|
|
5709
|
+
break;
|
|
5710
|
+
}
|
|
5711
|
+
if (kind2 === "t") {
|
|
5712
|
+
has_table = true;
|
|
5713
|
+
break;
|
|
5714
|
+
}
|
|
5715
|
+
}
|
|
5716
|
+
const footnote_ids = _collect_footnote_ids_fast(owned);
|
|
5717
|
+
let para_offset;
|
|
5718
|
+
if (paragraph_offsets instanceof Map) {
|
|
5719
|
+
para_offset = paragraph_offsets.get(paragraph._element);
|
|
5720
|
+
} else {
|
|
5721
|
+
para_offset = paragraph_offsets[paragraph._element];
|
|
5722
|
+
}
|
|
5723
|
+
let page_num = 1;
|
|
5724
|
+
if (para_offset !== void 0) {
|
|
5725
|
+
const [start_offset] = para_offset;
|
|
5726
|
+
page_num = _offset_to_page(start_offset, body_page_offsets);
|
|
5727
|
+
}
|
|
5728
|
+
nodes.push({
|
|
5729
|
+
level,
|
|
5730
|
+
text,
|
|
5731
|
+
page: page_num,
|
|
5732
|
+
style,
|
|
5733
|
+
has_table,
|
|
5734
|
+
footnote_ids
|
|
5735
|
+
});
|
|
5736
|
+
}
|
|
5737
|
+
return nodes;
|
|
5738
|
+
}
|
|
5739
|
+
function _heading_passes_quality_filter_fast(paragraph, projected_body, paragraph_offsets) {
|
|
5740
|
+
const style = _determine_heading_style(paragraph);
|
|
5741
|
+
if (style !== "(heuristic)") return true;
|
|
5742
|
+
const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
|
|
5743
|
+
if (!text) return false;
|
|
5744
|
+
const words = text.match(/\w+/g) || [];
|
|
5745
|
+
return words.length >= _HEURISTIC_MIN_WORDS;
|
|
5746
|
+
}
|
|
5747
|
+
function _heading_text_fast(paragraph, projected_body, paragraph_offsets) {
|
|
5748
|
+
let offset;
|
|
5749
|
+
if (paragraph_offsets instanceof Map) {
|
|
5750
|
+
offset = paragraph_offsets.get(paragraph._element);
|
|
5751
|
+
} else {
|
|
5752
|
+
offset = paragraph_offsets[paragraph._element];
|
|
5753
|
+
}
|
|
5754
|
+
if (offset === void 0) {
|
|
5755
|
+
return "";
|
|
5756
|
+
}
|
|
5757
|
+
const [start, length] = offset;
|
|
5758
|
+
const raw = projected_body.substring(start, start + length);
|
|
5759
|
+
let cleaned = _strip_critic_markup(raw);
|
|
5760
|
+
cleaned = _strip_inline_formatting(cleaned);
|
|
5761
|
+
cleaned = cleaned.replace(/^#+\s+/, "");
|
|
5762
|
+
return cleaned.trim();
|
|
5763
|
+
}
|
|
5764
|
+
function _collect_footnote_ids_fast(owned_items) {
|
|
5765
|
+
const seen = /* @__PURE__ */ new Set();
|
|
5766
|
+
const ordered = [];
|
|
5767
|
+
for (const [kind, item] of owned_items) {
|
|
5768
|
+
if (kind !== "p") continue;
|
|
5769
|
+
for (const event of iter_paragraph_content(item)) {
|
|
5770
|
+
if (!("type" in event)) continue;
|
|
5771
|
+
let fn_id = "";
|
|
5772
|
+
if (event.type === "footnote") fn_id = `fn-${event.id}`;
|
|
5773
|
+
else if (event.type === "endnote") fn_id = `en-${event.id}`;
|
|
5774
|
+
else continue;
|
|
5775
|
+
if (!seen.has(fn_id)) {
|
|
5776
|
+
seen.add(fn_id);
|
|
5777
|
+
ordered.push(fn_id);
|
|
5778
|
+
}
|
|
5779
|
+
}
|
|
5780
|
+
}
|
|
5781
|
+
return ordered;
|
|
5782
|
+
}
|
|
5576
5783
|
|
|
5577
5784
|
// src/sanitize/report.ts
|
|
5578
5785
|
var SanitizeReport = class {
|
|
@@ -6260,6 +6467,7 @@ function identifyEngine() {
|
|
|
6260
6467
|
DocumentMapper,
|
|
6261
6468
|
DocumentObject,
|
|
6262
6469
|
RedlineEngine,
|
|
6470
|
+
_extractTextFromDoc,
|
|
6263
6471
|
apply_edits_to_markdown,
|
|
6264
6472
|
create_unified_diff,
|
|
6265
6473
|
create_word_patch_diff,
|