@adeu/core 1.10.0 → 1.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +259 -30
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +9 -3
- package/dist/index.d.ts +9 -3
- package/dist/index.js +258 -30
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/engine.bugs.test.ts +2 -2
- package/src/engine.ts +179 -49
- package/src/index.ts +1 -1
- package/src/ingest.ts +32 -8
- package/src/mapper.ts +14 -8
- package/src/outline.ts +196 -1
- package/src/parity_gaps.test.ts +98 -0
package/dist/index.cjs
CHANGED
|
@@ -34,6 +34,7 @@ __export(index_exports, {
|
|
|
34
34
|
DocumentMapper: () => DocumentMapper,
|
|
35
35
|
DocumentObject: () => DocumentObject,
|
|
36
36
|
RedlineEngine: () => RedlineEngine,
|
|
37
|
+
_extractTextFromDoc: () => _extractTextFromDoc,
|
|
37
38
|
apply_edits_to_markdown: () => apply_edits_to_markdown,
|
|
38
39
|
create_unified_diff: () => create_unified_diff,
|
|
39
40
|
create_word_patch_diff: () => create_word_patch_diff,
|
|
@@ -1266,14 +1267,16 @@ function* iter_paragraph_content(paragraph) {
|
|
|
1266
1267
|
var DocumentMapper = class {
|
|
1267
1268
|
doc;
|
|
1268
1269
|
clean_view;
|
|
1270
|
+
original_view;
|
|
1269
1271
|
comments_map;
|
|
1270
1272
|
full_text = "";
|
|
1271
1273
|
spans = [];
|
|
1272
1274
|
appendix_start_index = -1;
|
|
1273
1275
|
_text_chunks = [];
|
|
1274
|
-
constructor(doc, clean_view = false) {
|
|
1276
|
+
constructor(doc, clean_view = false, original_view = false) {
|
|
1275
1277
|
this.doc = doc;
|
|
1276
1278
|
this.clean_view = clean_view;
|
|
1279
|
+
this.original_view = original_view;
|
|
1277
1280
|
this.comments_map = extract_comments_data(doc.pkg);
|
|
1278
1281
|
this._build_map();
|
|
1279
1282
|
}
|
|
@@ -1355,14 +1358,15 @@ ${header}`;
|
|
|
1355
1358
|
const ins = trPr ? findChild(trPr, "w:ins") : null;
|
|
1356
1359
|
const del_node = trPr ? findChild(trPr, "w:del") : null;
|
|
1357
1360
|
if (this.clean_view && del_node) continue;
|
|
1361
|
+
if (this.original_view && ins) continue;
|
|
1358
1362
|
if (rows_processed > 0) {
|
|
1359
1363
|
this._add_virtual_text("\n", current, null);
|
|
1360
1364
|
current += 1;
|
|
1361
1365
|
}
|
|
1362
|
-
if (ins && !this.clean_view) {
|
|
1366
|
+
if (ins && !this.clean_view && !this.original_view) {
|
|
1363
1367
|
this._add_virtual_text("{++ ", current, null);
|
|
1364
1368
|
current += 4;
|
|
1365
|
-
} else if (del_node && !this.clean_view) {
|
|
1369
|
+
} else if (del_node && !this.clean_view && !this.original_view) {
|
|
1366
1370
|
this._add_virtual_text("{-- ", current, null);
|
|
1367
1371
|
current += 4;
|
|
1368
1372
|
}
|
|
@@ -1378,11 +1382,11 @@ ${header}`;
|
|
|
1378
1382
|
current = this._map_blocks(cell, current);
|
|
1379
1383
|
cells_processed += 1;
|
|
1380
1384
|
}
|
|
1381
|
-
if (ins && !this.clean_view) {
|
|
1385
|
+
if (ins && !this.clean_view && !this.original_view) {
|
|
1382
1386
|
const suffix = ` |Chg:${ins.getAttribute("w:id")}++}`;
|
|
1383
1387
|
this._add_virtual_text(suffix, current, null);
|
|
1384
1388
|
current += suffix.length;
|
|
1385
|
-
} else if (del_node && !this.clean_view) {
|
|
1389
|
+
} else if (del_node && !this.clean_view && !this.original_view) {
|
|
1386
1390
|
const suffix = ` |Chg:${del_node.getAttribute("w:id")}--}`;
|
|
1387
1391
|
this._add_virtual_text(suffix, current, null);
|
|
1388
1392
|
current += suffix.length;
|
|
@@ -1476,11 +1480,13 @@ ${header}`;
|
|
|
1476
1480
|
}
|
|
1477
1481
|
if (this.clean_view && Object.keys(active_del).length > 0) {
|
|
1478
1482
|
}
|
|
1483
|
+
if (this.original_view && Object.keys(active_ins).length > 0) {
|
|
1484
|
+
}
|
|
1479
1485
|
const full_seg_text = run_parts.map((x) => x[1]).join("");
|
|
1480
1486
|
const curr_ins_id = Object.keys(active_ins).pop() || null;
|
|
1481
1487
|
const curr_del_id = Object.keys(active_del).pop() || null;
|
|
1482
|
-
if (full_seg_text && !(this.clean_view && curr_del_id)) {
|
|
1483
|
-
const new_wrappers = this.clean_view ? ["", ""] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
|
|
1488
|
+
if (full_seg_text && !(this.clean_view && curr_del_id) && !(this.original_view && curr_ins_id)) {
|
|
1489
|
+
const new_wrappers = this.clean_view || this.original_view ? ["", ""] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
|
|
1484
1490
|
const new_style = [prefix, suffix];
|
|
1485
1491
|
if (pending_runs.length > 0 && new_wrappers[0] === current_wrappers[0] && new_wrappers[1] === current_wrappers[1]) {
|
|
1486
1492
|
let skip_leading_prefix = false;
|
|
@@ -1505,7 +1511,7 @@ ${header}`;
|
|
|
1505
1511
|
}
|
|
1506
1512
|
}
|
|
1507
1513
|
}
|
|
1508
|
-
if (!this.clean_view) {
|
|
1514
|
+
if (!this.clean_view && !this.original_view) {
|
|
1509
1515
|
const has_meta = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || active_ids.size > 0 || Object.keys(active_fmt).length > 0;
|
|
1510
1516
|
if (has_meta) {
|
|
1511
1517
|
deferred_meta_states.push([{ ...active_ins }, { ...active_del }, new Set(active_ids), { ...active_fmt }]);
|
|
@@ -2681,6 +2687,7 @@ var RedlineEngine = class {
|
|
|
2681
2687
|
mapper;
|
|
2682
2688
|
comments_manager;
|
|
2683
2689
|
clean_mapper = null;
|
|
2690
|
+
original_mapper = null;
|
|
2684
2691
|
skipped_details = [];
|
|
2685
2692
|
constructor(doc, author = "Adeu AI (TS)") {
|
|
2686
2693
|
this.doc = doc;
|
|
@@ -2720,7 +2727,10 @@ var RedlineEngine = class {
|
|
|
2720
2727
|
if (!full_text) return [null, null];
|
|
2721
2728
|
const before_start = Math.max(0, start_idx - 30);
|
|
2722
2729
|
const context_before = full_text.substring(before_start, start_idx);
|
|
2723
|
-
const context_after = full_text.substring(
|
|
2730
|
+
const context_after = full_text.substring(
|
|
2731
|
+
start_idx + length,
|
|
2732
|
+
start_idx + length + 30
|
|
2733
|
+
);
|
|
2724
2734
|
const critic_markup = `${context_before}{--${target_text}--}{++${new_text}++}${context_after}`;
|
|
2725
2735
|
let clean_text = critic_markup;
|
|
2726
2736
|
clean_text = clean_text.replace(/\{>>.*?<<\}/gs, "");
|
|
@@ -2892,7 +2902,9 @@ var RedlineEngine = class {
|
|
|
2892
2902
|
overrideEl.parentNode?.removeChild(overrideEl);
|
|
2893
2903
|
}
|
|
2894
2904
|
}
|
|
2895
|
-
pkg.parts = pkg.parts.filter(
|
|
2905
|
+
pkg.parts = pkg.parts.filter(
|
|
2906
|
+
(p) => !p.partname.toLowerCase().includes("comments")
|
|
2907
|
+
);
|
|
2896
2908
|
for (const key of Object.keys(pkg.unzipped)) {
|
|
2897
2909
|
if (key.toLowerCase().includes("comments")) {
|
|
2898
2910
|
delete pkg.unzipped[key];
|
|
@@ -3437,13 +3449,17 @@ var RedlineEngine = class {
|
|
|
3437
3449
|
if (!edit.target_text) continue;
|
|
3438
3450
|
let matches = this.mapper.find_all_match_indices(edit.target_text);
|
|
3439
3451
|
let activeText = this.mapper.full_text;
|
|
3452
|
+
let target_mapper = this.mapper;
|
|
3440
3453
|
if (matches.length === 0) {
|
|
3441
3454
|
if (!this.clean_mapper)
|
|
3442
3455
|
this.clean_mapper = new DocumentMapper(this.doc, true);
|
|
3443
3456
|
matches = this.clean_mapper.find_all_match_indices(edit.target_text);
|
|
3444
|
-
if (matches.length > 0)
|
|
3457
|
+
if (matches.length > 0) {
|
|
3458
|
+
activeText = this.clean_mapper.full_text;
|
|
3459
|
+
target_mapper = this.clean_mapper;
|
|
3460
|
+
}
|
|
3445
3461
|
}
|
|
3446
|
-
if (activeText === this.mapper.full_text && matches.length >
|
|
3462
|
+
if (activeText === this.mapper.full_text && matches.length > 0) {
|
|
3447
3463
|
const liveMatches = matches.filter(([start, length]) => {
|
|
3448
3464
|
const realSpans = this.mapper.spans.filter(
|
|
3449
3465
|
(s) => s.run !== null && s.end > start && s.start < start + length
|
|
@@ -3451,13 +3467,51 @@ var RedlineEngine = class {
|
|
|
3451
3467
|
if (realSpans.length === 0) return true;
|
|
3452
3468
|
return realSpans.some((s) => !s.del_id);
|
|
3453
3469
|
});
|
|
3454
|
-
|
|
3470
|
+
matches = liveMatches;
|
|
3455
3471
|
}
|
|
3472
|
+
let is_deleted_text = false;
|
|
3473
|
+
const deleted_authors = /* @__PURE__ */ new Set();
|
|
3456
3474
|
if (matches.length === 0) {
|
|
3457
|
-
|
|
3458
|
-
|
|
3475
|
+
if (!this.original_mapper) {
|
|
3476
|
+
this.original_mapper = new DocumentMapper(this.doc, false, true);
|
|
3477
|
+
}
|
|
3478
|
+
const orig_matches = this.original_mapper.find_all_match_indices(edit.target_text);
|
|
3479
|
+
if (orig_matches.length > 0) {
|
|
3480
|
+
is_deleted_text = true;
|
|
3481
|
+
for (const [start, length] of orig_matches) {
|
|
3482
|
+
const spans = this.original_mapper.spans.filter(
|
|
3483
|
+
(s) => s.end > start && s.start < start + length
|
|
3484
|
+
);
|
|
3485
|
+
for (const s of spans) {
|
|
3486
|
+
if (s.run !== null) {
|
|
3487
|
+
let parent = s.run._element;
|
|
3488
|
+
while (parent) {
|
|
3489
|
+
if (parent.nodeType === 1 && parent.tagName === "w:del") {
|
|
3490
|
+
const auth = parent.getAttribute("w:author");
|
|
3491
|
+
if (auth) {
|
|
3492
|
+
deleted_authors.add(auth);
|
|
3493
|
+
}
|
|
3494
|
+
break;
|
|
3495
|
+
}
|
|
3496
|
+
parent = parent.parentNode;
|
|
3497
|
+
}
|
|
3498
|
+
}
|
|
3499
|
+
}
|
|
3500
|
+
}
|
|
3501
|
+
}
|
|
3502
|
+
}
|
|
3503
|
+
if (matches.length === 0) {
|
|
3504
|
+
if (is_deleted_text) {
|
|
3505
|
+
const author_phrase = deleted_authors.size > 0 ? `by ${Array.from(deleted_authors).sort().join(", ")}` : "by an existing revision";
|
|
3506
|
+
errors.push(
|
|
3507
|
+
`- Edit ${i + 1} Failed: Target text matches text inside a tracked deletion ${author_phrase}. Reject/accept that change first or target the active replacement text instead.`
|
|
3508
|
+
);
|
|
3509
|
+
} else {
|
|
3510
|
+
errors.push(
|
|
3511
|
+
`- Edit ${i + 1} Failed: Target text not found in document:
|
|
3459
3512
|
"${edit.target_text}"`
|
|
3460
|
-
|
|
3513
|
+
);
|
|
3514
|
+
}
|
|
3461
3515
|
} else if (matches.length > 1) {
|
|
3462
3516
|
const positions = matches.map(([start, length]) => [
|
|
3463
3517
|
start,
|
|
@@ -3478,7 +3532,10 @@ var RedlineEngine = class {
|
|
|
3478
3532
|
const [pfx, sfx] = trim_common_context(matched, edit.new_text || "");
|
|
3479
3533
|
const t_end = matched.length - sfx;
|
|
3480
3534
|
const final_target = matched.substring(pfx, t_end);
|
|
3481
|
-
const final_new = (edit.new_text || "").substring(
|
|
3535
|
+
const final_new = (edit.new_text || "").substring(
|
|
3536
|
+
pfx,
|
|
3537
|
+
(edit.new_text || "").length - sfx
|
|
3538
|
+
);
|
|
3482
3539
|
if (final_target.includes("\n\n")) {
|
|
3483
3540
|
if (final_new.includes("\n\n")) {
|
|
3484
3541
|
const parts = matched.split("\n\n");
|
|
@@ -3634,7 +3691,9 @@ var RedlineEngine = class {
|
|
|
3634
3691
|
if (dry_run_mode) {
|
|
3635
3692
|
for (const edit of edits) {
|
|
3636
3693
|
const single_errors = this.validate_edits([edit]);
|
|
3637
|
-
const warning = this._check_punctuation_warning(
|
|
3694
|
+
const warning = this._check_punctuation_warning(
|
|
3695
|
+
edit.target_text || ""
|
|
3696
|
+
);
|
|
3638
3697
|
if (single_errors.length > 0) {
|
|
3639
3698
|
skipped_edits++;
|
|
3640
3699
|
edits_reports.push({
|
|
@@ -3688,7 +3747,9 @@ var RedlineEngine = class {
|
|
|
3688
3747
|
for (const edit of cloned_edits) {
|
|
3689
3748
|
const success = edit._applied_status || false;
|
|
3690
3749
|
const error_msg = edit._error_msg || null;
|
|
3691
|
-
const warning = this._check_punctuation_warning(
|
|
3750
|
+
const warning = this._check_punctuation_warning(
|
|
3751
|
+
edit.target_text || ""
|
|
3752
|
+
);
|
|
3692
3753
|
let critic_markup = null;
|
|
3693
3754
|
let clean_text = null;
|
|
3694
3755
|
if (success) {
|
|
@@ -3716,7 +3777,7 @@ var RedlineEngine = class {
|
|
|
3716
3777
|
skipped_details: this.skipped_details,
|
|
3717
3778
|
edits: edits_reports,
|
|
3718
3779
|
engine: "node",
|
|
3719
|
-
version: "1.
|
|
3780
|
+
version: "1.10.0"
|
|
3720
3781
|
};
|
|
3721
3782
|
}
|
|
3722
3783
|
apply_edits(edits) {
|
|
@@ -4239,7 +4300,10 @@ var RedlineEngine = class {
|
|
|
4239
4300
|
if (result.first_node.tagName === "w:p") {
|
|
4240
4301
|
first_anchor_target = findAllDescendants(result.first_node, "w:ins")[0] || result.first_node;
|
|
4241
4302
|
}
|
|
4242
|
-
const anchor = ascend_to_paragraph_child(
|
|
4303
|
+
const anchor = ascend_to_paragraph_child(
|
|
4304
|
+
first_anchor_target,
|
|
4305
|
+
host_p
|
|
4306
|
+
);
|
|
4243
4307
|
this._attach_comment(host_p, anchor, anchor, edit.comment);
|
|
4244
4308
|
}
|
|
4245
4309
|
}
|
|
@@ -4251,7 +4315,10 @@ var RedlineEngine = class {
|
|
|
4251
4315
|
length,
|
|
4252
4316
|
rebuild_map
|
|
4253
4317
|
);
|
|
4254
|
-
const virtual_spans = active_mapper.get_virtual_spans_in_range(
|
|
4318
|
+
const virtual_spans = active_mapper.get_virtual_spans_in_range(
|
|
4319
|
+
start_idx,
|
|
4320
|
+
length
|
|
4321
|
+
);
|
|
4255
4322
|
if (target_runs.length === 0 && virtual_spans.length === 0) return false;
|
|
4256
4323
|
const affected_ps = /* @__PURE__ */ new Set();
|
|
4257
4324
|
for (const run of target_runs) {
|
|
@@ -4334,7 +4401,10 @@ var RedlineEngine = class {
|
|
|
4334
4401
|
let pPr = findChild(p1_element, "w:pPr");
|
|
4335
4402
|
if (!pPr) {
|
|
4336
4403
|
pPr = p1_element.ownerDocument.createElement("w:pPr");
|
|
4337
|
-
p1_element.insertBefore(
|
|
4404
|
+
p1_element.insertBefore(
|
|
4405
|
+
pPr,
|
|
4406
|
+
p1_element.firstChild
|
|
4407
|
+
);
|
|
4338
4408
|
}
|
|
4339
4409
|
let rPr = findChild(pPr, "w:rPr");
|
|
4340
4410
|
if (!rPr) {
|
|
@@ -4863,13 +4933,14 @@ async function extractTextFromBuffer(buffer, cleanView = false) {
|
|
|
4863
4933
|
const doc = await DocumentObject.load(buffer);
|
|
4864
4934
|
return _extractTextFromDoc(doc, cleanView);
|
|
4865
4935
|
}
|
|
4866
|
-
function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
|
|
4936
|
+
function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true, return_paragraph_offsets = false) {
|
|
4867
4937
|
const comments_map = extract_comments_data(doc.pkg);
|
|
4868
4938
|
const full_text = [];
|
|
4939
|
+
const paragraph_offsets = /* @__PURE__ */ new Map();
|
|
4869
4940
|
let cursor = 0;
|
|
4870
4941
|
for (const part of iter_document_parts(doc)) {
|
|
4871
4942
|
const part_cursor = full_text.length > 0 ? cursor + 2 : cursor;
|
|
4872
|
-
const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor);
|
|
4943
|
+
const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor, return_paragraph_offsets ? paragraph_offsets : void 0);
|
|
4873
4944
|
if (part_text) {
|
|
4874
4945
|
if (full_text.length > 0) cursor += 2;
|
|
4875
4946
|
full_text.push(part_text);
|
|
@@ -4881,9 +4952,12 @@ function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
|
|
|
4881
4952
|
const appendix = build_structural_appendix(doc, base_text);
|
|
4882
4953
|
if (appendix) base_text += appendix;
|
|
4883
4954
|
}
|
|
4955
|
+
if (return_paragraph_offsets) {
|
|
4956
|
+
return { text: base_text, paragraph_offsets };
|
|
4957
|
+
}
|
|
4884
4958
|
return base_text;
|
|
4885
4959
|
}
|
|
4886
|
-
function _extract_blocks(container, comments_map, cleanView, cursor) {
|
|
4960
|
+
function _extract_blocks(container, comments_map, cleanView, cursor, paragraph_offsets) {
|
|
4887
4961
|
const part = container.part || container;
|
|
4888
4962
|
const [style_cache, default_pstyle] = _get_style_cache(part);
|
|
4889
4963
|
const blocks = [];
|
|
@@ -4902,7 +4976,7 @@ ${header}`;
|
|
|
4902
4976
|
if (!is_first_block) local_cursor += 2;
|
|
4903
4977
|
const block_start = local_cursor;
|
|
4904
4978
|
if (item.constructor.name === "FootnoteItem") {
|
|
4905
|
-
const fn_text = _extract_blocks(item, comments_map, cleanView, block_start);
|
|
4979
|
+
const fn_text = _extract_blocks(item, comments_map, cleanView, block_start, paragraph_offsets);
|
|
4906
4980
|
if (fn_text) {
|
|
4907
4981
|
blocks.push(fn_text);
|
|
4908
4982
|
local_cursor = block_start + fn_text.length;
|
|
@@ -4918,11 +4992,14 @@ ${header}`;
|
|
|
4918
4992
|
const p_text = build_paragraph_text(item, comments_map, cleanView, style_cache, default_pstyle);
|
|
4919
4993
|
const full_block = prefix + p_text;
|
|
4920
4994
|
blocks.push(full_block);
|
|
4995
|
+
if (paragraph_offsets) {
|
|
4996
|
+
paragraph_offsets.set(item._element, [block_start, full_block.length]);
|
|
4997
|
+
}
|
|
4921
4998
|
local_cursor = block_start + full_block.length;
|
|
4922
4999
|
is_first_para = false;
|
|
4923
5000
|
is_first_block = false;
|
|
4924
5001
|
} else if (item instanceof Table) {
|
|
4925
|
-
const table_text = extract_table(item, comments_map, cleanView, block_start);
|
|
5002
|
+
const table_text = extract_table(item, comments_map, cleanView, block_start, paragraph_offsets);
|
|
4926
5003
|
if (table_text) {
|
|
4927
5004
|
blocks.push(table_text);
|
|
4928
5005
|
local_cursor = block_start + table_text.length;
|
|
@@ -4935,7 +5012,7 @@ ${header}`;
|
|
|
4935
5012
|
}
|
|
4936
5013
|
return blocks.join("\n\n");
|
|
4937
5014
|
}
|
|
4938
|
-
function extract_table(table, comments_map, cleanView, cursor) {
|
|
5015
|
+
function extract_table(table, comments_map, cleanView, cursor, paragraph_offsets) {
|
|
4939
5016
|
const rows_text = [];
|
|
4940
5017
|
let rows_processed = 0;
|
|
4941
5018
|
let local_cursor = cursor;
|
|
@@ -4954,7 +5031,7 @@ function extract_table(table, comments_map, cleanView, cursor) {
|
|
|
4954
5031
|
if (seen_cells.has(cell)) continue;
|
|
4955
5032
|
seen_cells.add(cell);
|
|
4956
5033
|
if (!first_cell) cell_cursor += 3;
|
|
4957
|
-
const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor);
|
|
5034
|
+
const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor, paragraph_offsets);
|
|
4958
5035
|
cell_texts.push(cell_content);
|
|
4959
5036
|
cell_cursor += cell_content.length;
|
|
4960
5037
|
first_cell = false;
|
|
@@ -5211,6 +5288,9 @@ function extract_outline(doc, projected_body, body_pages, body_page_offsets, par
|
|
|
5211
5288
|
if (body_pages.length !== body_page_offsets.length) {
|
|
5212
5289
|
throw new Error("body_pages and body_page_offsets length mismatch");
|
|
5213
5290
|
}
|
|
5291
|
+
if (paragraph_offsets) {
|
|
5292
|
+
return _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets);
|
|
5293
|
+
}
|
|
5214
5294
|
const comments_map = extract_comments_data(doc.pkg);
|
|
5215
5295
|
const block_records = _walk_doc_body(doc, comments_map);
|
|
5216
5296
|
const heading_indices = [];
|
|
@@ -5486,6 +5566,7 @@ function _determine_heading_style(paragraph) {
|
|
|
5486
5566
|
if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
|
|
5487
5567
|
}
|
|
5488
5568
|
let outline_level = null;
|
|
5569
|
+
let outline_level_from_style = false;
|
|
5489
5570
|
if (pPr) {
|
|
5490
5571
|
const oLvl = findChild(pPr, "w:outlineLvl");
|
|
5491
5572
|
if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
|
|
@@ -5494,6 +5575,7 @@ function _determine_heading_style(paragraph) {
|
|
|
5494
5575
|
}
|
|
5495
5576
|
if (outline_level === null && style_id && style_cache && style_cache[style_id]) {
|
|
5496
5577
|
outline_level = style_cache[style_id].outline_level;
|
|
5578
|
+
outline_level_from_style = true;
|
|
5497
5579
|
}
|
|
5498
5580
|
const style_name = style_id && style_cache && style_cache[style_id] ? style_cache[style_id].name : style_id;
|
|
5499
5581
|
let normalized_style_name = style_name;
|
|
@@ -5504,6 +5586,12 @@ function _determine_heading_style(paragraph) {
|
|
|
5504
5586
|
normalized_style_name = "Title";
|
|
5505
5587
|
}
|
|
5506
5588
|
}
|
|
5589
|
+
if (outline_level_from_style && outline_level !== null) {
|
|
5590
|
+
const is_heading_or_title = normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title");
|
|
5591
|
+
if (!is_heading_or_title) {
|
|
5592
|
+
outline_level = null;
|
|
5593
|
+
}
|
|
5594
|
+
}
|
|
5507
5595
|
if (outline_level !== null && outline_level >= 0 && outline_level <= 8) {
|
|
5508
5596
|
if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")) {
|
|
5509
5597
|
return normalized_style_name;
|
|
@@ -5552,6 +5640,146 @@ function _offset_to_page(offset, body_page_offsets) {
|
|
|
5552
5640
|
}
|
|
5553
5641
|
return page;
|
|
5554
5642
|
}
|
|
5643
|
+
function _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets) {
|
|
5644
|
+
const paragraphs_and_tables = [];
|
|
5645
|
+
const seen_cells = /* @__PURE__ */ new Set();
|
|
5646
|
+
function walk(container) {
|
|
5647
|
+
for (const item of iter_block_items(container)) {
|
|
5648
|
+
const i_type = item.constructor.name;
|
|
5649
|
+
if (i_type === "FootnoteItem") {
|
|
5650
|
+
walk(item);
|
|
5651
|
+
} else if (item instanceof Paragraph) {
|
|
5652
|
+
paragraphs_and_tables.push(["p", item]);
|
|
5653
|
+
} else if (item instanceof Table) {
|
|
5654
|
+
paragraphs_and_tables.push(["t", item]);
|
|
5655
|
+
for (const row of item.rows) {
|
|
5656
|
+
for (const cell of row.cells) {
|
|
5657
|
+
if (seen_cells.has(cell._element)) {
|
|
5658
|
+
continue;
|
|
5659
|
+
}
|
|
5660
|
+
seen_cells.add(cell._element);
|
|
5661
|
+
walk(cell);
|
|
5662
|
+
}
|
|
5663
|
+
}
|
|
5664
|
+
}
|
|
5665
|
+
}
|
|
5666
|
+
}
|
|
5667
|
+
walk(doc);
|
|
5668
|
+
const heading_indices = [];
|
|
5669
|
+
for (let idx = 0; idx < paragraphs_and_tables.length; idx++) {
|
|
5670
|
+
const [kind, item] = paragraphs_and_tables[idx];
|
|
5671
|
+
if (kind !== "p") continue;
|
|
5672
|
+
let hasOffset = false;
|
|
5673
|
+
if (paragraph_offsets instanceof Map) {
|
|
5674
|
+
hasOffset = paragraph_offsets.has(item._element);
|
|
5675
|
+
} else {
|
|
5676
|
+
hasOffset = item._element in paragraph_offsets;
|
|
5677
|
+
}
|
|
5678
|
+
if (!hasOffset) {
|
|
5679
|
+
continue;
|
|
5680
|
+
}
|
|
5681
|
+
if (!_is_heading(item)) continue;
|
|
5682
|
+
if (!_heading_passes_quality_filter_fast(item, projected_body, paragraph_offsets)) continue;
|
|
5683
|
+
heading_indices.push(idx);
|
|
5684
|
+
}
|
|
5685
|
+
if (heading_indices.length === 0) return [];
|
|
5686
|
+
const nodes = [];
|
|
5687
|
+
for (let h_pos = 0; h_pos < heading_indices.length; h_pos++) {
|
|
5688
|
+
const item_idx = heading_indices[h_pos];
|
|
5689
|
+
const paragraph = paragraphs_and_tables[item_idx][1];
|
|
5690
|
+
const level = _heading_level(paragraph);
|
|
5691
|
+
const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
|
|
5692
|
+
const style = _determine_heading_style(paragraph);
|
|
5693
|
+
let owned_end = item_idx;
|
|
5694
|
+
for (let next_h_pos = h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
|
|
5695
|
+
const next_idx = heading_indices[next_h_pos];
|
|
5696
|
+
const next_paragraph = paragraphs_and_tables[next_idx][1];
|
|
5697
|
+
if (_heading_level(next_paragraph) <= level) {
|
|
5698
|
+
owned_end = next_idx;
|
|
5699
|
+
break;
|
|
5700
|
+
}
|
|
5701
|
+
}
|
|
5702
|
+
if (owned_end === item_idx) {
|
|
5703
|
+
owned_end = paragraphs_and_tables.length;
|
|
5704
|
+
}
|
|
5705
|
+
const owned = paragraphs_and_tables.slice(item_idx + 1, owned_end);
|
|
5706
|
+
let has_table = false;
|
|
5707
|
+
for (const [kind2, item2] of owned) {
|
|
5708
|
+
if (kind2 === "p" && _is_heading(item2)) {
|
|
5709
|
+
break;
|
|
5710
|
+
}
|
|
5711
|
+
if (kind2 === "t") {
|
|
5712
|
+
has_table = true;
|
|
5713
|
+
break;
|
|
5714
|
+
}
|
|
5715
|
+
}
|
|
5716
|
+
const footnote_ids = _collect_footnote_ids_fast(owned);
|
|
5717
|
+
let para_offset;
|
|
5718
|
+
if (paragraph_offsets instanceof Map) {
|
|
5719
|
+
para_offset = paragraph_offsets.get(paragraph._element);
|
|
5720
|
+
} else {
|
|
5721
|
+
para_offset = paragraph_offsets[paragraph._element];
|
|
5722
|
+
}
|
|
5723
|
+
let page_num = 1;
|
|
5724
|
+
if (para_offset !== void 0) {
|
|
5725
|
+
const [start_offset] = para_offset;
|
|
5726
|
+
page_num = _offset_to_page(start_offset, body_page_offsets);
|
|
5727
|
+
}
|
|
5728
|
+
nodes.push({
|
|
5729
|
+
level,
|
|
5730
|
+
text,
|
|
5731
|
+
page: page_num,
|
|
5732
|
+
style,
|
|
5733
|
+
has_table,
|
|
5734
|
+
footnote_ids
|
|
5735
|
+
});
|
|
5736
|
+
}
|
|
5737
|
+
return nodes;
|
|
5738
|
+
}
|
|
5739
|
+
function _heading_passes_quality_filter_fast(paragraph, projected_body, paragraph_offsets) {
|
|
5740
|
+
const style = _determine_heading_style(paragraph);
|
|
5741
|
+
if (style !== "(heuristic)") return true;
|
|
5742
|
+
const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
|
|
5743
|
+
if (!text) return false;
|
|
5744
|
+
const words = text.match(/\w+/g) || [];
|
|
5745
|
+
return words.length >= _HEURISTIC_MIN_WORDS;
|
|
5746
|
+
}
|
|
5747
|
+
function _heading_text_fast(paragraph, projected_body, paragraph_offsets) {
|
|
5748
|
+
let offset;
|
|
5749
|
+
if (paragraph_offsets instanceof Map) {
|
|
5750
|
+
offset = paragraph_offsets.get(paragraph._element);
|
|
5751
|
+
} else {
|
|
5752
|
+
offset = paragraph_offsets[paragraph._element];
|
|
5753
|
+
}
|
|
5754
|
+
if (offset === void 0) {
|
|
5755
|
+
return "";
|
|
5756
|
+
}
|
|
5757
|
+
const [start, length] = offset;
|
|
5758
|
+
const raw = projected_body.substring(start, start + length);
|
|
5759
|
+
let cleaned = _strip_critic_markup(raw);
|
|
5760
|
+
cleaned = _strip_inline_formatting(cleaned);
|
|
5761
|
+
cleaned = cleaned.replace(/^#+\s+/, "");
|
|
5762
|
+
return cleaned.trim();
|
|
5763
|
+
}
|
|
5764
|
+
function _collect_footnote_ids_fast(owned_items) {
|
|
5765
|
+
const seen = /* @__PURE__ */ new Set();
|
|
5766
|
+
const ordered = [];
|
|
5767
|
+
for (const [kind, item] of owned_items) {
|
|
5768
|
+
if (kind !== "p") continue;
|
|
5769
|
+
for (const event of iter_paragraph_content(item)) {
|
|
5770
|
+
if (!("type" in event)) continue;
|
|
5771
|
+
let fn_id = "";
|
|
5772
|
+
if (event.type === "footnote") fn_id = `fn-${event.id}`;
|
|
5773
|
+
else if (event.type === "endnote") fn_id = `en-${event.id}`;
|
|
5774
|
+
else continue;
|
|
5775
|
+
if (!seen.has(fn_id)) {
|
|
5776
|
+
seen.add(fn_id);
|
|
5777
|
+
ordered.push(fn_id);
|
|
5778
|
+
}
|
|
5779
|
+
}
|
|
5780
|
+
}
|
|
5781
|
+
return ordered;
|
|
5782
|
+
}
|
|
5555
5783
|
|
|
5556
5784
|
// src/sanitize/report.ts
|
|
5557
5785
|
var SanitizeReport = class {
|
|
@@ -6239,6 +6467,7 @@ function identifyEngine() {
|
|
|
6239
6467
|
DocumentMapper,
|
|
6240
6468
|
DocumentObject,
|
|
6241
6469
|
RedlineEngine,
|
|
6470
|
+
_extractTextFromDoc,
|
|
6242
6471
|
apply_edits_to_markdown,
|
|
6243
6472
|
create_unified_diff,
|
|
6244
6473
|
create_word_patch_diff,
|