@adeu/core 1.6.8 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -0
- package/dist/index.cjs +1833 -540
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +75 -1
- package/dist/index.d.ts +75 -1
- package/dist/index.js +1832 -540
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/consistency.test.ts +134 -0
- package/src/diff.test.ts +13 -1
- package/src/diff.ts +189 -70
- package/src/docx/bridge.ts +99 -57
- package/src/docx/dom.ts +66 -7
- package/src/engine.bugs.test.ts +481 -0
- package/src/engine.ts +1346 -192
- package/src/index.ts +1 -1
- package/src/markup.ts +160 -53
- package/src/outline.ts +199 -69
- package/src/sanitize/core.ts +26 -0
- package/src/sanitize/report.ts +1 -1
- package/src/sanitize/sanitize.test.ts +47 -2
- package/src/sanitize/transforms.ts +87 -0
- package/src/utils/docx.ts +282 -157
package/dist/index.js
CHANGED
|
@@ -29,7 +29,38 @@ function parseXml(xmlString) {
|
|
|
29
29
|
return new DOMParser().parseFromString(xmlString, "text/xml");
|
|
30
30
|
}
|
|
31
31
|
function serializeXml(node) {
|
|
32
|
-
|
|
32
|
+
let xml = new XMLSerializer().serializeToString(node);
|
|
33
|
+
const rootTagRegex = /<([a-zA-Z0-9_:]+)(\s+[^>]+?)(>|\/>)/;
|
|
34
|
+
const match = rootTagRegex.exec(xml);
|
|
35
|
+
if (match && !match[1].startsWith("?")) {
|
|
36
|
+
const index = match.index;
|
|
37
|
+
const textBefore = xml.substring(0, index);
|
|
38
|
+
const isRoot = !textBefore.includes("<") || textBefore.trim().startsWith("<?xml") && (textBefore.match(/</g) || []).length === 1;
|
|
39
|
+
if (isRoot) {
|
|
40
|
+
const fullTag = match[0];
|
|
41
|
+
const elemStart = `<${match[1]}`;
|
|
42
|
+
const attrsStr = match[2];
|
|
43
|
+
const tagEnd = match[3];
|
|
44
|
+
const attrRegex = /([a-zA-Z0-9_:]+)\s*=\s*(["'])(.*?)\2/g;
|
|
45
|
+
const attrs = [];
|
|
46
|
+
let m;
|
|
47
|
+
while ((m = attrRegex.exec(attrsStr)) !== null) {
|
|
48
|
+
attrs.push(m[0].trim());
|
|
49
|
+
}
|
|
50
|
+
attrs.sort((a, b) => {
|
|
51
|
+
const aName = a.split("=")[0].trim();
|
|
52
|
+
const bName = b.split("=")[0].trim();
|
|
53
|
+
const aIsXmlns = aName.startsWith("xmlns");
|
|
54
|
+
const bIsXmlns = bName.startsWith("xmlns");
|
|
55
|
+
if (aIsXmlns && !bIsXmlns) return -1;
|
|
56
|
+
if (!aIsXmlns && bIsXmlns) return 1;
|
|
57
|
+
return aName < bName ? -1 : aName > bName ? 1 : 0;
|
|
58
|
+
});
|
|
59
|
+
const newTag = attrs.length > 0 ? `${elemStart} ${attrs.join(" ")}${tagEnd}` : `${elemStart}${tagEnd}`;
|
|
60
|
+
xml = xml.substring(0, index) + newTag + xml.substring(index + fullTag.length);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return xml;
|
|
33
64
|
}
|
|
34
65
|
|
|
35
66
|
// src/docx/bridge.ts
|
|
@@ -57,12 +88,16 @@ var Part = class {
|
|
|
57
88
|
contentType;
|
|
58
89
|
rels = /* @__PURE__ */ new Map();
|
|
59
90
|
_element;
|
|
91
|
+
package;
|
|
60
92
|
addRelationship(id, type, target, isExternal = false) {
|
|
61
93
|
this.rels.set(id, new Relationship(id, type, target, isExternal));
|
|
62
94
|
if (this.partname.endsWith(".rels")) {
|
|
63
95
|
const doc = this._element.ownerDocument;
|
|
64
96
|
if (doc) {
|
|
65
|
-
const relEl = doc.createElementNS(
|
|
97
|
+
const relEl = doc.createElementNS(
|
|
98
|
+
"http://schemas.openxmlformats.org/package/2006/relationships",
|
|
99
|
+
"Relationship"
|
|
100
|
+
);
|
|
66
101
|
relEl.setAttribute("Id", id);
|
|
67
102
|
relEl.setAttribute("Type", type);
|
|
68
103
|
relEl.setAttribute("Target", target);
|
|
@@ -81,7 +116,9 @@ var DocxPackage = class {
|
|
|
81
116
|
mainDocumentPart;
|
|
82
117
|
getPartByPath(path) {
|
|
83
118
|
const searchPath = path.startsWith("/") ? path.substring(1) : path;
|
|
84
|
-
return this.parts.find(
|
|
119
|
+
return this.parts.find(
|
|
120
|
+
(p) => p.partname === searchPath || p.partname === "/" + searchPath
|
|
121
|
+
);
|
|
85
122
|
}
|
|
86
123
|
nextPartname(pattern) {
|
|
87
124
|
let i = 1;
|
|
@@ -93,7 +130,13 @@ var DocxPackage = class {
|
|
|
93
130
|
}
|
|
94
131
|
addPart(partname, contentType, xmlString) {
|
|
95
132
|
const doc = parseXml(xmlString);
|
|
96
|
-
const part = new Part(
|
|
133
|
+
const part = new Part(
|
|
134
|
+
partname,
|
|
135
|
+
xmlString,
|
|
136
|
+
doc.documentElement,
|
|
137
|
+
contentType
|
|
138
|
+
);
|
|
139
|
+
part.package = this;
|
|
97
140
|
this.parts.push(part);
|
|
98
141
|
const ctPart = this.getPartByPath("[Content_Types].xml");
|
|
99
142
|
if (ctPart) {
|
|
@@ -115,7 +158,11 @@ var DocxPackage = class {
|
|
|
115
158
|
if (!relsPart) {
|
|
116
159
|
const xml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
117
160
|
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"></Relationships>`;
|
|
118
|
-
relsPart = this.addPart(
|
|
161
|
+
relsPart = this.addPart(
|
|
162
|
+
relsPath,
|
|
163
|
+
"application/vnd.openxmlformats-package.relationships+xml",
|
|
164
|
+
xml
|
|
165
|
+
);
|
|
119
166
|
}
|
|
120
167
|
return relsPart;
|
|
121
168
|
}
|
|
@@ -127,7 +174,9 @@ var DocumentObject = class _DocumentObject {
|
|
|
127
174
|
}
|
|
128
175
|
pkg;
|
|
129
176
|
part;
|
|
130
|
-
settings = {
|
|
177
|
+
settings = {
|
|
178
|
+
oddAndEvenPagesHeaderFooter: false
|
|
179
|
+
};
|
|
131
180
|
// Simplification for the TS port: sections hold header/footer refs
|
|
132
181
|
sections = [];
|
|
133
182
|
get element() {
|
|
@@ -154,6 +203,7 @@ var DocumentObject = class _DocumentObject {
|
|
|
154
203
|
const doc = parseXml(text);
|
|
155
204
|
const cType = contentTypes["/" + path] || "application/xml";
|
|
156
205
|
const part = new Part("/" + path, text, doc.documentElement, cType);
|
|
206
|
+
part.package = pkg;
|
|
157
207
|
pkg.parts.push(part);
|
|
158
208
|
}
|
|
159
209
|
}
|
|
@@ -169,7 +219,10 @@ var DocumentObject = class _DocumentObject {
|
|
|
169
219
|
const type = rel.getAttribute("Type");
|
|
170
220
|
const targetMode = rel.getAttribute("TargetMode");
|
|
171
221
|
if (rId && target && type) {
|
|
172
|
-
mainPart.rels.set(
|
|
222
|
+
mainPart.rels.set(
|
|
223
|
+
rId,
|
|
224
|
+
new Relationship(rId, type, target, targetMode === "External")
|
|
225
|
+
);
|
|
173
226
|
}
|
|
174
227
|
}
|
|
175
228
|
}
|
|
@@ -735,7 +788,14 @@ function _get_style_cache(part) {
|
|
|
735
788
|
const is_default = s.getAttribute("w:default") === "1" || s.getAttribute("w:default") === "true";
|
|
736
789
|
if (s_type === "paragraph" && is_default) default_pstyle = s_id;
|
|
737
790
|
const name_el = findChild(s, "w:name");
|
|
738
|
-
|
|
791
|
+
let name = name_el ? name_el.getAttribute("w:val") : s_id;
|
|
792
|
+
if (name && typeof name === "string") {
|
|
793
|
+
if (name.toLowerCase().startsWith("heading")) {
|
|
794
|
+
name = name.replace(/^heading/i, "Heading");
|
|
795
|
+
} else if (name.toLowerCase() === "title") {
|
|
796
|
+
name = "Title";
|
|
797
|
+
}
|
|
798
|
+
}
|
|
739
799
|
const based_on_el = findChild(s, "w:basedOn");
|
|
740
800
|
const based_on = based_on_el ? based_on_el.getAttribute("w:val") : null;
|
|
741
801
|
let outline_lvl = null;
|
|
@@ -760,7 +820,8 @@ function _get_style_cache(part) {
|
|
|
760
820
|
}
|
|
761
821
|
const resolve_style = (s_id, visited) => {
|
|
762
822
|
if (cache[s_id]) return cache[s_id];
|
|
763
|
-
if (visited.has(s_id) || !raw_styles[s_id])
|
|
823
|
+
if (visited.has(s_id) || !raw_styles[s_id])
|
|
824
|
+
return { name: s_id, outline_level: null, bold: false };
|
|
764
825
|
visited.add(s_id);
|
|
765
826
|
const raw = raw_styles[s_id];
|
|
766
827
|
const based_on_id = raw.based_on;
|
|
@@ -787,7 +848,9 @@ function _detect_heading_level_from_name(name) {
|
|
|
787
848
|
}
|
|
788
849
|
function is_native_heading(paragraph, style_cache, default_pstyle) {
|
|
789
850
|
if (!style_cache) {
|
|
790
|
-
[style_cache, default_pstyle] = _get_style_cache(
|
|
851
|
+
[style_cache, default_pstyle] = _get_style_cache(
|
|
852
|
+
paragraph._parent.part || paragraph._parent
|
|
853
|
+
);
|
|
791
854
|
}
|
|
792
855
|
const pPr = findChild(paragraph._element, QN_W_PPR);
|
|
793
856
|
if (pPr) {
|
|
@@ -809,7 +872,10 @@ function is_native_heading(paragraph, style_cache, default_pstyle) {
|
|
|
809
872
|
if (style_info && style_info.outline_level !== null && style_info.outline_level >= 0 && style_info.outline_level <= 8) {
|
|
810
873
|
return true;
|
|
811
874
|
}
|
|
812
|
-
|
|
875
|
+
let style_name = style_info ? style_info.name : style_id;
|
|
876
|
+
if (style_name && typeof style_name === "string" && style_name.toLowerCase().startsWith("heading")) {
|
|
877
|
+
style_name = style_name.replace(/^heading/i, "Heading");
|
|
878
|
+
}
|
|
813
879
|
if (style_name?.startsWith("Heading")) return true;
|
|
814
880
|
if (style_name === "Title") return true;
|
|
815
881
|
if (style_name && style_name !== "Normal") {
|
|
@@ -819,7 +885,9 @@ function is_native_heading(paragraph, style_cache, default_pstyle) {
|
|
|
819
885
|
}
|
|
820
886
|
function get_paragraph_prefix(paragraph, style_cache, default_pstyle) {
|
|
821
887
|
if (!style_cache) {
|
|
822
|
-
[style_cache, default_pstyle] = _get_style_cache(
|
|
888
|
+
[style_cache, default_pstyle] = _get_style_cache(
|
|
889
|
+
paragraph._parent.part || paragraph._parent
|
|
890
|
+
);
|
|
823
891
|
}
|
|
824
892
|
const pPr = findChild(paragraph._element, QN_W_PPR);
|
|
825
893
|
if (pPr) {
|
|
@@ -841,7 +909,10 @@ function get_paragraph_prefix(paragraph, style_cache, default_pstyle) {
|
|
|
841
909
|
if (style_info && style_info.outline_level !== null && style_info.outline_level >= 0 && style_info.outline_level <= 8) {
|
|
842
910
|
return "#".repeat(style_info.outline_level + 1) + " ";
|
|
843
911
|
}
|
|
844
|
-
|
|
912
|
+
let style_name = style_info ? style_info.name : style_id;
|
|
913
|
+
if (style_name && typeof style_name === "string" && style_name.toLowerCase().startsWith("heading")) {
|
|
914
|
+
style_name = style_name.replace(/^heading/i, "Heading");
|
|
915
|
+
}
|
|
845
916
|
if (style_name?.startsWith("Heading")) {
|
|
846
917
|
const match = style_name.replace("Heading", "").trim();
|
|
847
918
|
if (/^\d+$/.test(match)) return "#".repeat(parseInt(match, 10)) + " ";
|
|
@@ -908,9 +979,11 @@ function get_run_style_markers(run, is_heading = null) {
|
|
|
908
979
|
let is_italic = false;
|
|
909
980
|
if (rPr) {
|
|
910
981
|
const b = findChild(rPr, QN_W_B);
|
|
911
|
-
if (b && b.getAttribute(QN_W_VAL) !== "0" && b.getAttribute(QN_W_VAL) !== "false")
|
|
982
|
+
if (b && b.getAttribute(QN_W_VAL) !== "0" && b.getAttribute(QN_W_VAL) !== "false")
|
|
983
|
+
is_bold = true;
|
|
912
984
|
const i = findChild(rPr, QN_W_I);
|
|
913
|
-
if (i && i.getAttribute(QN_W_VAL) !== "0" && i.getAttribute(QN_W_VAL) !== "false")
|
|
985
|
+
if (i && i.getAttribute(QN_W_VAL) !== "0" && i.getAttribute(QN_W_VAL) !== "false")
|
|
986
|
+
is_italic = true;
|
|
914
987
|
}
|
|
915
988
|
if (is_heading === null) {
|
|
916
989
|
const parent = run._parent;
|
|
@@ -955,7 +1028,8 @@ function* iter_block_items(parent) {
|
|
|
955
1028
|
const tag = parent.note_type === "fn" ? "w:footnote" : "w:endnote";
|
|
956
1029
|
const notes = findAllDescendants(parent_elm, tag);
|
|
957
1030
|
for (const child of notes) {
|
|
958
|
-
if (child.getAttribute("w:type") === "separator" || child.getAttribute("w:type") === "continuationSeparator")
|
|
1031
|
+
if (child.getAttribute("w:type") === "separator" || child.getAttribute("w:type") === "continuationSeparator")
|
|
1032
|
+
continue;
|
|
959
1033
|
yield new FootnoteItem(child, parent, parent.note_type);
|
|
960
1034
|
}
|
|
961
1035
|
return;
|
|
@@ -971,7 +1045,15 @@ function* iter_block_items(parent) {
|
|
|
971
1045
|
}
|
|
972
1046
|
}
|
|
973
1047
|
function* iter_document_parts(doc) {
|
|
1048
|
+
const headers = doc.pkg.parts.filter(
|
|
1049
|
+
(p) => p.contentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"
|
|
1050
|
+
);
|
|
1051
|
+
for (const h of headers) yield h;
|
|
974
1052
|
yield doc;
|
|
1053
|
+
const footers = doc.pkg.parts.filter(
|
|
1054
|
+
(p) => p.contentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"
|
|
1055
|
+
);
|
|
1056
|
+
for (const f of footers) yield f;
|
|
975
1057
|
const fnPart = doc.pkg.getPartByPath("word/footnotes.xml");
|
|
976
1058
|
const enPart = doc.pkg.getPartByPath("word/endnotes.xml");
|
|
977
1059
|
if (fnPart) yield new NotesPart(fnPart, "fn");
|
|
@@ -985,7 +1067,8 @@ function _is_page_instr(instr) {
|
|
|
985
1067
|
function _get_part(parent) {
|
|
986
1068
|
if (!parent) return null;
|
|
987
1069
|
if (parent.part) return parent.part;
|
|
988
|
-
if (parent.pkg && parent.pkg.mainDocumentPart)
|
|
1070
|
+
if (parent.pkg && parent.pkg.mainDocumentPart)
|
|
1071
|
+
return parent.pkg.mainDocumentPart;
|
|
989
1072
|
if (parent._parent) return _get_part(parent._parent);
|
|
990
1073
|
return null;
|
|
991
1074
|
}
|
|
@@ -1000,7 +1083,12 @@ function* iter_paragraph_content(paragraph) {
|
|
|
1000
1083
|
const rPrChange = findChild(rPr, QN_W_RPRCHANGE);
|
|
1001
1084
|
if (rPrChange) {
|
|
1002
1085
|
c_id = rPrChange.getAttribute(QN_W_ID);
|
|
1003
|
-
yield {
|
|
1086
|
+
yield {
|
|
1087
|
+
type: "fmt_start",
|
|
1088
|
+
id: c_id,
|
|
1089
|
+
author: rPrChange.getAttribute(QN_W_AUTHOR) || void 0,
|
|
1090
|
+
date: rPrChange.getAttribute(QN_W_DATE) || void 0
|
|
1091
|
+
};
|
|
1004
1092
|
}
|
|
1005
1093
|
}
|
|
1006
1094
|
for (let i = 0; i < r_element.childNodes.length; i++) {
|
|
@@ -1025,12 +1113,14 @@ function* iter_paragraph_content(paragraph) {
|
|
|
1025
1113
|
if (_is_page_instr(current_instr)) hide_result = true;
|
|
1026
1114
|
else {
|
|
1027
1115
|
const parts = current_instr.trim().split(/\s+/);
|
|
1028
|
-
if (parts.length > 1 && parts[0] === "REF")
|
|
1116
|
+
if (parts.length > 1 && parts[0] === "REF")
|
|
1117
|
+
yield { type: "xref_start", id: parts[1] };
|
|
1029
1118
|
}
|
|
1030
1119
|
} else if (fld_type === "end") {
|
|
1031
1120
|
if (!hide_result) {
|
|
1032
1121
|
const parts = current_instr.trim().split(/\s+/);
|
|
1033
|
-
if (parts.length > 1 && parts[0] === "REF")
|
|
1122
|
+
if (parts.length > 1 && parts[0] === "REF")
|
|
1123
|
+
yield { type: "xref_end", id: parts[1] };
|
|
1034
1124
|
}
|
|
1035
1125
|
in_complex_field = false;
|
|
1036
1126
|
current_instr = "";
|
|
@@ -1051,16 +1141,28 @@ function* iter_paragraph_content(paragraph) {
|
|
|
1051
1141
|
if (tag === QN_W_R) yield* process_run_element(child);
|
|
1052
1142
|
else if (tag === QN_W_INS) {
|
|
1053
1143
|
const i_id = child.getAttribute(QN_W_ID);
|
|
1054
|
-
yield {
|
|
1144
|
+
yield {
|
|
1145
|
+
type: "ins_start",
|
|
1146
|
+
id: i_id,
|
|
1147
|
+
author: child.getAttribute(QN_W_AUTHOR) || void 0,
|
|
1148
|
+
date: child.getAttribute(QN_W_DATE) || void 0
|
|
1149
|
+
};
|
|
1055
1150
|
yield* traverse_node(child);
|
|
1056
1151
|
yield { type: "ins_end", id: i_id };
|
|
1057
1152
|
} else if (tag === QN_W_DEL) {
|
|
1058
1153
|
const d_id = child.getAttribute(QN_W_ID);
|
|
1059
|
-
yield {
|
|
1154
|
+
yield {
|
|
1155
|
+
type: "del_start",
|
|
1156
|
+
id: d_id,
|
|
1157
|
+
author: child.getAttribute(QN_W_AUTHOR) || void 0,
|
|
1158
|
+
date: child.getAttribute(QN_W_DATE) || void 0
|
|
1159
|
+
};
|
|
1060
1160
|
yield* traverse_node(child);
|
|
1061
1161
|
yield { type: "del_end", id: d_id };
|
|
1062
|
-
} else if (tag === QN_W_COMMENTRANGESTART)
|
|
1063
|
-
|
|
1162
|
+
} else if (tag === QN_W_COMMENTRANGESTART)
|
|
1163
|
+
yield { type: "start", id: child.getAttribute(QN_W_ID) };
|
|
1164
|
+
else if (tag === QN_W_COMMENTRANGEEND)
|
|
1165
|
+
yield { type: "end", id: child.getAttribute(QN_W_ID) };
|
|
1064
1166
|
else if (tag === QN_W_HYPERLINK) {
|
|
1065
1167
|
const rId = child.getAttribute(QN_R_ID) || child.getAttribute("id");
|
|
1066
1168
|
let url = "";
|
|
@@ -1081,7 +1183,8 @@ function* iter_paragraph_content(paragraph) {
|
|
|
1081
1183
|
if (target) yield { type: "xref_end", id: target };
|
|
1082
1184
|
} else if (tag === QN_W_BOOKMARKSTART) {
|
|
1083
1185
|
const b_name = child.getAttribute(QN_W_NAME);
|
|
1084
|
-
if (b_name && (!b_name.startsWith("_") || b_name.startsWith("_Ref")))
|
|
1186
|
+
if (b_name && (!b_name.startsWith("_") || b_name.startsWith("_Ref")))
|
|
1187
|
+
yield { type: "bookmark", id: b_name };
|
|
1085
1188
|
} else if (tag === QN_W_SDT || tag === QN_W_SMARTTAG || tag === QN_W_SDTCONTENT) {
|
|
1086
1189
|
yield* traverse_node(child);
|
|
1087
1190
|
}
|
|
@@ -1893,13 +1996,18 @@ function _words_to_chars(text1, text2) {
|
|
|
1893
1996
|
}
|
|
1894
1997
|
function generate_edits_from_text(original_text, modified_text) {
|
|
1895
1998
|
const dmp = new diff_match_patch.diff_match_patch();
|
|
1896
|
-
|
|
1999
|
+
dmp.Diff_Timeout = 2;
|
|
2000
|
+
const [chars1, chars2, token_array] = _words_to_chars(
|
|
2001
|
+
original_text,
|
|
2002
|
+
modified_text
|
|
2003
|
+
);
|
|
1897
2004
|
const diffs = dmp.diff_main(chars1, chars2, false);
|
|
1898
2005
|
dmp.diff_cleanupSemantic(diffs);
|
|
1899
2006
|
for (let i = 0; i < diffs.length; i++) {
|
|
1900
2007
|
const chars = diffs[i][1];
|
|
1901
2008
|
let text = "";
|
|
1902
|
-
for (let j = 0; j < chars.length; j++)
|
|
2009
|
+
for (let j = 0; j < chars.length; j++)
|
|
2010
|
+
text += token_array[chars.charCodeAt(j)];
|
|
1903
2011
|
diffs[i][1] = text;
|
|
1904
2012
|
}
|
|
1905
2013
|
const edits = [];
|
|
@@ -1909,7 +2017,13 @@ function generate_edits_from_text(original_text, modified_text) {
|
|
|
1909
2017
|
if (op === 0) {
|
|
1910
2018
|
if (pending_delete) {
|
|
1911
2019
|
const [idx, del_txt] = pending_delete;
|
|
1912
|
-
edits.push({
|
|
2020
|
+
edits.push({
|
|
2021
|
+
type: "modify",
|
|
2022
|
+
target_text: del_txt,
|
|
2023
|
+
new_text: "",
|
|
2024
|
+
comment: "Diff: Text deleted",
|
|
2025
|
+
_match_start_index: idx
|
|
2026
|
+
});
|
|
1913
2027
|
pending_delete = null;
|
|
1914
2028
|
}
|
|
1915
2029
|
current_original_index += text.length;
|
|
@@ -1919,21 +2033,40 @@ function generate_edits_from_text(original_text, modified_text) {
|
|
|
1919
2033
|
} else if (op === 1) {
|
|
1920
2034
|
if (pending_delete) {
|
|
1921
2035
|
const [idx, del_txt] = pending_delete;
|
|
1922
|
-
edits.push({
|
|
2036
|
+
edits.push({
|
|
2037
|
+
type: "modify",
|
|
2038
|
+
target_text: del_txt,
|
|
2039
|
+
new_text: text,
|
|
2040
|
+
comment: "Diff: Replacement",
|
|
2041
|
+
_match_start_index: idx
|
|
2042
|
+
});
|
|
1923
2043
|
pending_delete = null;
|
|
1924
2044
|
} else {
|
|
1925
|
-
edits.push({
|
|
2045
|
+
edits.push({
|
|
2046
|
+
type: "modify",
|
|
2047
|
+
target_text: "",
|
|
2048
|
+
new_text: text,
|
|
2049
|
+
comment: "Diff: Text inserted",
|
|
2050
|
+
_match_start_index: current_original_index
|
|
2051
|
+
});
|
|
1926
2052
|
}
|
|
1927
2053
|
}
|
|
1928
2054
|
}
|
|
1929
2055
|
if (pending_delete) {
|
|
1930
2056
|
const [idx, del_txt] = pending_delete;
|
|
1931
|
-
edits.push({
|
|
2057
|
+
edits.push({
|
|
2058
|
+
type: "modify",
|
|
2059
|
+
target_text: del_txt,
|
|
2060
|
+
new_text: "",
|
|
2061
|
+
comment: "Diff: Text deleted",
|
|
2062
|
+
_match_start_index: idx
|
|
2063
|
+
});
|
|
1932
2064
|
}
|
|
1933
2065
|
return edits;
|
|
1934
2066
|
}
|
|
1935
2067
|
function create_unified_diff(original_text, modified_text, context_lines = 3) {
|
|
1936
2068
|
const dmp = new diff_match_patch.diff_match_patch();
|
|
2069
|
+
dmp.Diff_Timeout = 2;
|
|
1937
2070
|
const a = dmp.diff_linesToChars_(original_text, modified_text);
|
|
1938
2071
|
const diffs = dmp.diff_main(a.chars1, a.chars2, false);
|
|
1939
2072
|
dmp.diff_charsToLines_(diffs, a.lineArray);
|
|
@@ -1976,159 +2109,614 @@ function create_unified_diff(original_text, modified_text, context_lines = 3) {
|
|
|
1976
2109
|
if (output.length === 2) return "";
|
|
1977
2110
|
return output.join("\n");
|
|
1978
2111
|
}
|
|
1979
|
-
|
|
1980
|
-
|
|
1981
|
-
|
|
1982
|
-
|
|
1983
|
-
|
|
2112
|
+
function create_word_patch_diff(original_text, modified_text, original_path = "Original", modified_path = "Modified") {
|
|
2113
|
+
const edits = generate_edits_from_text(original_text, modified_text);
|
|
2114
|
+
const output = [
|
|
2115
|
+
`--- ${original_path}`,
|
|
2116
|
+
`+++ ${modified_path}`,
|
|
2117
|
+
""
|
|
2118
|
+
];
|
|
2119
|
+
const CONTEXT_SIZE = 40;
|
|
2120
|
+
for (const edit of edits) {
|
|
2121
|
+
const raw_start = edit._match_start_index || 0;
|
|
2122
|
+
const raw_target = edit.target_text || "";
|
|
2123
|
+
const raw_new = edit.new_text || "";
|
|
2124
|
+
const [prefix_len, suffix_len] = trim_common_context(raw_target, raw_new);
|
|
2125
|
+
const target_end_in_target = raw_target.length - suffix_len;
|
|
2126
|
+
const new_end_in_new = raw_new.length - suffix_len;
|
|
2127
|
+
const display_target = raw_target.substring(prefix_len, target_end_in_target);
|
|
2128
|
+
const display_new = raw_new.substring(prefix_len, new_end_in_new);
|
|
2129
|
+
const change_start = raw_start + prefix_len;
|
|
2130
|
+
const change_end = change_start + display_target.length;
|
|
2131
|
+
let pre_start = Math.max(0, change_start - CONTEXT_SIZE);
|
|
2132
|
+
let pre_context = original_text.substring(pre_start, change_start);
|
|
2133
|
+
if (pre_start > 0) pre_context = "..." + pre_context;
|
|
2134
|
+
let post_end = Math.min(original_text.length, change_end + CONTEXT_SIZE);
|
|
2135
|
+
let post_context = original_text.substring(change_end, post_end);
|
|
2136
|
+
if (post_end < original_text.length) post_context = post_context + "...";
|
|
2137
|
+
pre_context = pre_context.replace(/\n/g, " ").replace(/\r/g, "");
|
|
2138
|
+
post_context = post_context.replace(/\n/g, " ").replace(/\r/g, "");
|
|
2139
|
+
output.push("@@ Word Patch @@");
|
|
2140
|
+
output.push(` ${pre_context}`);
|
|
2141
|
+
if (display_target) output.push(`- ${display_target}`);
|
|
2142
|
+
if (display_new) output.push(`+ ${display_new}`);
|
|
2143
|
+
output.push(` ${post_context}`);
|
|
2144
|
+
output.push("");
|
|
1984
2145
|
}
|
|
2146
|
+
return output.join("\n");
|
|
1985
2147
|
}
|
|
1986
|
-
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
2148
|
+
|
|
2149
|
+
// src/markup.ts
|
|
2150
|
+
var AMBIGUITY_EXAMPLES_CAP = 5;
|
|
2151
|
+
var AMBIGUITY_CONTEXT_CHARS = 50;
|
|
2152
|
+
function _should_strip_markers(text, marker) {
|
|
2153
|
+
if (!text.startsWith(marker) || !text.endsWith(marker)) return false;
|
|
2154
|
+
if (text.length < marker.length * 2) return false;
|
|
2155
|
+
const inner = text.substring(marker.length, text.length - marker.length);
|
|
2156
|
+
if (!inner) return false;
|
|
2157
|
+
if (inner.includes(marker)) return false;
|
|
2158
|
+
if (!/[a-zA-Z]/.test(inner)) return false;
|
|
2159
|
+
if (marker === "__" && /^\w+$/.test(inner)) return false;
|
|
2160
|
+
if (marker === "_") {
|
|
2161
|
+
if (inner.includes("_")) return false;
|
|
2162
|
+
if (/^[0-9_]+$/.test(inner)) return false;
|
|
1992
2163
|
}
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
} else {
|
|
2010
|
-
errors.push(`- Edit ${i + 1} Failed: Cannot delete footnote/endnote references via text replace. The marker corresponds to a structural XML element.`);
|
|
2011
|
-
}
|
|
2012
|
-
}
|
|
2013
|
-
}
|
|
2014
|
-
if (t_text.includes("](") || n_text.includes("](")) {
|
|
2015
|
-
const t_links = (t_text.match(/\[(?!~)[^\]]+\]\([^)]+\)/g) || []).sort();
|
|
2016
|
-
const n_links = (n_text.match(/\[(?!~)[^\]]+\]\([^)]+\)/g) || []).sort();
|
|
2017
|
-
if (t_links.length !== n_links.length) {
|
|
2018
|
-
if (n_links.length > t_links.length) {
|
|
2019
|
-
errors.push(`- Edit ${i + 1} Failed: Cannot insert hyperlinks via text replace. Use a dedicated structural operation.`);
|
|
2020
|
-
} else {
|
|
2021
|
-
errors.push(`- Edit ${i + 1} Failed: Cannot delete hyperlinks via text replace. The marker corresponds to a structural XML element.`);
|
|
2022
|
-
}
|
|
2023
|
-
} else if (t_links.length > 1 && JSON.stringify(t_links) !== JSON.stringify(n_links)) {
|
|
2024
|
-
errors.push(`- Edit ${i + 1} Failed: Can only edit or retarget one hyperlink per text replacement. Please split into multiple edits.`);
|
|
2025
|
-
}
|
|
2026
|
-
}
|
|
2027
|
-
if (t_text.includes("[~") || n_text.includes("[~")) {
|
|
2028
|
-
const t_xrefs = t_text.match(/\[~[^~]+~\]\(#[^\)]+\)/g) || [];
|
|
2029
|
-
const n_xrefs = n_text.match(/\[~[^~]+~\]\(#[^\)]+\)/g) || [];
|
|
2030
|
-
if (t_xrefs.length !== n_xrefs.length) {
|
|
2031
|
-
if (n_xrefs.length > t_xrefs.length) {
|
|
2032
|
-
errors.push(`- Edit ${i + 1} Failed: Cannot insert cross-references via text replace. Markers are read-only projections.`);
|
|
2033
|
-
} else {
|
|
2034
|
-
errors.push(`- Edit ${i + 1} Failed: Cannot delete cross-references via text replace. The marker corresponds to a structural XML element.`);
|
|
2035
|
-
}
|
|
2036
|
-
} else {
|
|
2037
|
-
if (JSON.stringify(t_xrefs) !== JSON.stringify(n_xrefs)) {
|
|
2038
|
-
errors.push(`- Edit ${i + 1} Failed: Modifying or retargeting cross-reference markers is disallowed to prevent dependency corruption.`);
|
|
2039
|
-
}
|
|
2040
|
-
}
|
|
2164
|
+
return true;
|
|
2165
|
+
}
|
|
2166
|
+
function _strip_balanced_markers(text) {
|
|
2167
|
+
let prefix_markup = "";
|
|
2168
|
+
let suffix_markup = "";
|
|
2169
|
+
let clean_text = text;
|
|
2170
|
+
const markers = ["**", "__", "_", "*"];
|
|
2171
|
+
for (const marker of markers) {
|
|
2172
|
+
if (_should_strip_markers(clean_text, marker)) {
|
|
2173
|
+
prefix_markup += marker;
|
|
2174
|
+
suffix_markup = marker + suffix_markup;
|
|
2175
|
+
clean_text = clean_text.substring(
|
|
2176
|
+
marker.length,
|
|
2177
|
+
clean_text.length - marker.length
|
|
2178
|
+
);
|
|
2179
|
+
break;
|
|
2041
2180
|
}
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2181
|
+
}
|
|
2182
|
+
return [prefix_markup, clean_text, suffix_markup];
|
|
2183
|
+
}
|
|
2184
|
+
function _replace_smart_quotes(text) {
|
|
2185
|
+
return text.replace(/“/g, '"').replace(/”/g, '"').replace(/‘/g, "'").replace(/’/g, "'");
|
|
2186
|
+
}
|
|
2187
|
+
function _find_safe_boundaries(text, start, end) {
|
|
2188
|
+
let new_start = start;
|
|
2189
|
+
let new_end = end;
|
|
2190
|
+
const expand_if_unbalanced = (marker) => {
|
|
2191
|
+
const current_match = text.substring(new_start, new_end);
|
|
2192
|
+
const count = (current_match.match(new RegExp(marker.replace(/\*/g, "\\*"), "g")) || []).length;
|
|
2193
|
+
if (count % 2 !== 0) {
|
|
2194
|
+
const suffix = text.substring(new_end);
|
|
2195
|
+
if (suffix.startsWith(marker)) {
|
|
2196
|
+
new_end += marker.length;
|
|
2197
|
+
return;
|
|
2050
2198
|
}
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
const stripped = line.trimStart();
|
|
2056
|
-
if (stripped.startsWith("#######")) {
|
|
2057
|
-
const level = stripped.length - stripped.replace(/^#+/, "").length;
|
|
2058
|
-
if (stripped.substring(level).startsWith(" ") || stripped.substring(level) === "") {
|
|
2059
|
-
errors.push(`- Edit ${i + 1} Failed: Heading level ${level} is not supported (maximum is 6).`);
|
|
2060
|
-
break;
|
|
2061
|
-
}
|
|
2062
|
-
}
|
|
2199
|
+
const prefix = text.substring(0, new_start);
|
|
2200
|
+
if (prefix.endsWith(marker)) {
|
|
2201
|
+
new_start -= marker.length;
|
|
2202
|
+
return;
|
|
2063
2203
|
}
|
|
2064
2204
|
}
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2205
|
+
};
|
|
2206
|
+
for (let i = 0; i < 2; i++) {
|
|
2207
|
+
expand_if_unbalanced("**");
|
|
2208
|
+
expand_if_unbalanced("__");
|
|
2209
|
+
expand_if_unbalanced("_");
|
|
2210
|
+
expand_if_unbalanced("*");
|
|
2068
2211
|
}
|
|
2069
|
-
return
|
|
2212
|
+
return [new_start, new_end];
|
|
2070
2213
|
}
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2075
|
-
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
for (const part of this.doc.pkg.parts) {
|
|
2086
|
-
if (part === this.doc.part || part.contentType.includes("wordprocessingml") && part.contentType.endsWith("+xml")) {
|
|
2087
|
-
if (!part._element.hasAttribute("xmlns:w16du")) {
|
|
2088
|
-
part._element.setAttribute("xmlns:w16du", w16du_ns);
|
|
2089
|
-
}
|
|
2214
|
+
function _refine_match_boundaries(text, start, end) {
|
|
2215
|
+
const markers = ["**", "__", "*", "_"];
|
|
2216
|
+
let current_text = text.substring(start, end);
|
|
2217
|
+
let best_start = start;
|
|
2218
|
+
let best_end = end;
|
|
2219
|
+
const countMarker = (str, mk) => (str.match(new RegExp(mk.replace(/\*/g, "\\*"), "g")) || []).length;
|
|
2220
|
+
for (const marker of markers) {
|
|
2221
|
+
if (current_text.startsWith(marker)) {
|
|
2222
|
+
const current_score = countMarker(current_text, marker) % 2;
|
|
2223
|
+
const trimmed_text = current_text.substring(marker.length);
|
|
2224
|
+
const trimmed_score = countMarker(trimmed_text, marker) % 2;
|
|
2225
|
+
if (current_score === 1 && trimmed_score === 0) {
|
|
2226
|
+
best_start += marker.length;
|
|
2227
|
+
current_text = trimmed_text;
|
|
2090
2228
|
}
|
|
2091
2229
|
}
|
|
2092
|
-
this.current_id = this._scan_existing_ids();
|
|
2093
|
-
this.mapper = new DocumentMapper(this.doc);
|
|
2094
|
-
this.comments_manager = new CommentsManager(this.doc);
|
|
2095
2230
|
}
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
|
|
2099
|
-
const
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2231
|
+
for (const marker of markers) {
|
|
2232
|
+
if (current_text.endsWith(marker)) {
|
|
2233
|
+
const current_score = countMarker(current_text, marker) % 2;
|
|
2234
|
+
const trimmed_text = current_text.substring(
|
|
2235
|
+
0,
|
|
2236
|
+
current_text.length - marker.length
|
|
2237
|
+
);
|
|
2238
|
+
const trimmed_score = countMarker(trimmed_text, marker) % 2;
|
|
2239
|
+
if (current_score === 1 && trimmed_score === 0) {
|
|
2240
|
+
best_end -= marker.length;
|
|
2241
|
+
current_text = trimmed_text;
|
|
2103
2242
|
}
|
|
2104
2243
|
}
|
|
2105
|
-
return maxId;
|
|
2106
2244
|
}
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2245
|
+
return [best_start, best_end];
|
|
2246
|
+
}
|
|
2247
|
+
function _make_fuzzy_regex(target_text) {
|
|
2248
|
+
target_text = _replace_smart_quotes(target_text);
|
|
2249
|
+
const parts = [];
|
|
2250
|
+
const token_pattern = /(_+)|(\s+)|(['"])|([.,;:\/])/g;
|
|
2251
|
+
const md_noise = "[*_]*";
|
|
2252
|
+
const structural_noise = "(?:\\s*(?:[*+\\->]|\\d+\\.)\\s+|\\s*\\n\\s*)";
|
|
2253
|
+
const start_list_marker = "(?:[ \\t]*(?:[*+\\->]|\\d+\\.)\\s+)?";
|
|
2254
|
+
parts.push(start_list_marker);
|
|
2255
|
+
parts.push(md_noise);
|
|
2256
|
+
let last_idx = 0;
|
|
2257
|
+
let match;
|
|
2258
|
+
const escapeRegExp = (str) => str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
2259
|
+
while ((match = token_pattern.exec(target_text)) !== null) {
|
|
2260
|
+
const literal = target_text.substring(last_idx, match.index);
|
|
2261
|
+
if (literal) {
|
|
2262
|
+
parts.push(escapeRegExp(literal));
|
|
2263
|
+
parts.push(md_noise);
|
|
2117
2264
|
}
|
|
2118
|
-
const
|
|
2119
|
-
|
|
2120
|
-
|
|
2121
|
-
|
|
2122
|
-
|
|
2265
|
+
const g_underscore = match[1];
|
|
2266
|
+
const g_space = match[2];
|
|
2267
|
+
const g_quote = match[3];
|
|
2268
|
+
const g_punct = match[4];
|
|
2269
|
+
if (g_underscore) {
|
|
2270
|
+
parts.push("_+");
|
|
2271
|
+
} else if (g_space) {
|
|
2272
|
+
if (g_space.includes("\n")) {
|
|
2273
|
+
parts.push(`(?:${structural_noise}|\\s+)+`);
|
|
2123
2274
|
} else {
|
|
2124
|
-
|
|
2125
|
-
parent?.removeChild(i);
|
|
2275
|
+
parts.push("\\s+");
|
|
2126
2276
|
}
|
|
2277
|
+
} else if (g_quote) {
|
|
2278
|
+
if (g_quote === "'") parts.push("[\u2018\u2019']");
|
|
2279
|
+
else parts.push('["\u201C\u201D]');
|
|
2280
|
+
} else if (g_punct) {
|
|
2281
|
+
parts.push(escapeRegExp(g_punct));
|
|
2127
2282
|
}
|
|
2283
|
+
parts.push(md_noise);
|
|
2284
|
+
last_idx = token_pattern.lastIndex;
|
|
2128
2285
|
}
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
|
|
2286
|
+
const remaining = target_text.substring(last_idx);
|
|
2287
|
+
if (remaining) parts.push(escapeRegExp(remaining));
|
|
2288
|
+
return parts.join("");
|
|
2289
|
+
}
|
|
2290
|
+
function _find_match_in_text(text, target) {
|
|
2291
|
+
if (!target) return [-1, -1];
|
|
2292
|
+
let idx = text.indexOf(target);
|
|
2293
|
+
if (idx !== -1) return _find_safe_boundaries(text, idx, idx + target.length);
|
|
2294
|
+
const norm_text = _replace_smart_quotes(text);
|
|
2295
|
+
const norm_target = _replace_smart_quotes(target);
|
|
2296
|
+
idx = norm_text.indexOf(norm_target);
|
|
2297
|
+
if (idx !== -1)
|
|
2298
|
+
return _find_safe_boundaries(text, idx, idx + norm_target.length);
|
|
2299
|
+
try {
|
|
2300
|
+
const pattern = new RegExp(_make_fuzzy_regex(target));
|
|
2301
|
+
const match = pattern.exec(text);
|
|
2302
|
+
if (match) {
|
|
2303
|
+
const raw_start = match.index;
|
|
2304
|
+
const raw_end = match.index + match[0].length;
|
|
2305
|
+
const [refined_start, refined_end] = _refine_match_boundaries(
|
|
2306
|
+
text,
|
|
2307
|
+
raw_start,
|
|
2308
|
+
raw_end
|
|
2309
|
+
);
|
|
2310
|
+
return _find_safe_boundaries(text, refined_start, refined_end);
|
|
2311
|
+
}
|
|
2312
|
+
} catch (e) {
|
|
2313
|
+
}
|
|
2314
|
+
return [-1, -1];
|
|
2315
|
+
}
|
|
2316
|
+
function _build_critic_markup(target_text, new_text, comment, edit_index, include_index, highlight_only) {
|
|
2317
|
+
const parts = [];
|
|
2318
|
+
let [prefix_markup, clean_target, suffix_markup] = _strip_balanced_markers(target_text);
|
|
2319
|
+
let clean_new = new_text;
|
|
2320
|
+
if (prefix_markup && new_text) {
|
|
2321
|
+
if (new_text.startsWith(prefix_markup) && new_text.endsWith(suffix_markup)) {
|
|
2322
|
+
const inner_len = prefix_markup.length;
|
|
2323
|
+
clean_new = new_text.length > inner_len * 2 ? new_text.substring(inner_len, new_text.length - inner_len) : new_text;
|
|
2324
|
+
}
|
|
2325
|
+
}
|
|
2326
|
+
parts.push(prefix_markup);
|
|
2327
|
+
if (highlight_only) {
|
|
2328
|
+
parts.push(`{==${clean_target}==}`);
|
|
2329
|
+
} else {
|
|
2330
|
+
const has_target = Boolean(clean_target);
|
|
2331
|
+
const has_new = Boolean(clean_new);
|
|
2332
|
+
if (has_target && !has_new) parts.push(`{--${clean_target}--}`);
|
|
2333
|
+
else if (!has_target && has_new) parts.push(`{++${clean_new}++}`);
|
|
2334
|
+
else if (has_target && has_new)
|
|
2335
|
+
parts.push(`{--${clean_target}--}{++${clean_new}++}`);
|
|
2336
|
+
}
|
|
2337
|
+
parts.push(suffix_markup);
|
|
2338
|
+
const meta_parts = [];
|
|
2339
|
+
if (comment) meta_parts.push(comment);
|
|
2340
|
+
if (include_index) meta_parts.push(`[Edit:${edit_index}]`);
|
|
2341
|
+
if (meta_parts.length > 0) {
|
|
2342
|
+
parts.push(`{>>${meta_parts.join(" ")}<<}`);
|
|
2343
|
+
}
|
|
2344
|
+
return parts.join("");
|
|
2345
|
+
}
|
|
2346
|
+
function apply_edits_to_markdown(markdown_text, edits, include_index = false, highlight_only = false) {
|
|
2347
|
+
if (!edits || edits.length === 0) return markdown_text;
|
|
2348
|
+
const matched_edits = [];
|
|
2349
|
+
for (let idx = 0; idx < edits.length; idx++) {
|
|
2350
|
+
const edit = edits[idx];
|
|
2351
|
+
const target = edit.target_text || "";
|
|
2352
|
+
if (!target) {
|
|
2353
|
+
continue;
|
|
2354
|
+
}
|
|
2355
|
+
const [start, end] = _find_match_in_text(markdown_text, target);
|
|
2356
|
+
if (start === -1) continue;
|
|
2357
|
+
const actual_matched_text = markdown_text.substring(start, end);
|
|
2358
|
+
matched_edits.push([start, end, actual_matched_text, edit, idx]);
|
|
2359
|
+
}
|
|
2360
|
+
const matched_edits_filtered = [];
|
|
2361
|
+
const occupied_ranges = [];
|
|
2362
|
+
matched_edits.sort((a, b) => a[4] - b[4]);
|
|
2363
|
+
for (const [start, end, actual_text, edit, orig_idx] of matched_edits) {
|
|
2364
|
+
let overlaps = false;
|
|
2365
|
+
for (const [occ_start, occ_end] of occupied_ranges) {
|
|
2366
|
+
if (start < occ_end && end > occ_start) {
|
|
2367
|
+
overlaps = true;
|
|
2368
|
+
break;
|
|
2369
|
+
}
|
|
2370
|
+
}
|
|
2371
|
+
if (!overlaps) {
|
|
2372
|
+
matched_edits_filtered.push([start, end, actual_text, edit, orig_idx]);
|
|
2373
|
+
occupied_ranges.push([start, end]);
|
|
2374
|
+
}
|
|
2375
|
+
}
|
|
2376
|
+
matched_edits_filtered.sort((a, b) => b[0] - a[0]);
|
|
2377
|
+
let result = markdown_text;
|
|
2378
|
+
for (const [
|
|
2379
|
+
start,
|
|
2380
|
+
end,
|
|
2381
|
+
actual_text,
|
|
2382
|
+
edit,
|
|
2383
|
+
orig_idx
|
|
2384
|
+
] of matched_edits_filtered) {
|
|
2385
|
+
const new_txt = edit.new_text || "";
|
|
2386
|
+
const [prefix_len, suffix_len] = trim_common_context(actual_text, new_txt);
|
|
2387
|
+
const unmodified_prefix = prefix_len > 0 ? actual_text.substring(0, prefix_len) : "";
|
|
2388
|
+
const unmodified_suffix = suffix_len > 0 ? actual_text.substring(actual_text.length - suffix_len) : "";
|
|
2389
|
+
const t_end = actual_text.length - suffix_len;
|
|
2390
|
+
const n_end = new_txt.length - suffix_len;
|
|
2391
|
+
const isolated_target = actual_text.substring(prefix_len, t_end);
|
|
2392
|
+
const isolated_new = new_txt.substring(prefix_len, n_end);
|
|
2393
|
+
const markup = _build_critic_markup(
|
|
2394
|
+
isolated_target,
|
|
2395
|
+
isolated_new,
|
|
2396
|
+
edit.comment,
|
|
2397
|
+
orig_idx,
|
|
2398
|
+
include_index,
|
|
2399
|
+
highlight_only
|
|
2400
|
+
);
|
|
2401
|
+
const full_replacement = unmodified_prefix + markup + unmodified_suffix;
|
|
2402
|
+
result = result.substring(0, start) + full_replacement + result.substring(end);
|
|
2403
|
+
}
|
|
2404
|
+
return result;
|
|
2405
|
+
}
|
|
2406
|
+
function format_ambiguity_error(edit_index, target_text, haystack, match_positions) {
|
|
2407
|
+
const total = match_positions.length;
|
|
2408
|
+
if (total < 2) {
|
|
2409
|
+
throw new Error(
|
|
2410
|
+
`format_ambiguity_error requires at least 2 matches, got ${total}`
|
|
2411
|
+
);
|
|
2412
|
+
}
|
|
2413
|
+
const shown = match_positions.slice(0, AMBIGUITY_EXAMPLES_CAP);
|
|
2414
|
+
const remaining = total - shown.length;
|
|
2415
|
+
const lines = [
|
|
2416
|
+
`- Edit ${edit_index} Failed: Ambiguous match. Target text appears ${total} times. First ${shown.length} occurrences:`
|
|
2417
|
+
];
|
|
2418
|
+
for (let i = 0; i < shown.length; i++) {
|
|
2419
|
+
const [start, end] = shown[i];
|
|
2420
|
+
const pre_start = Math.max(0, start - AMBIGUITY_CONTEXT_CHARS);
|
|
2421
|
+
const post_end = Math.min(haystack.length, end + AMBIGUITY_CONTEXT_CHARS);
|
|
2422
|
+
const pre_context = haystack.substring(pre_start, start).replace(/\n/g, " ");
|
|
2423
|
+
const post_context = haystack.substring(end, post_end).replace(/\n/g, " ");
|
|
2424
|
+
let match_text = haystack.substring(start, end).replace(/\n/g, " ");
|
|
2425
|
+
if (match_text.length > 50) {
|
|
2426
|
+
match_text = match_text.substring(0, 25) + "..." + match_text.substring(match_text.length - 20);
|
|
2427
|
+
}
|
|
2428
|
+
const prefix_marker = pre_start > 0 ? "..." : "";
|
|
2429
|
+
const suffix_marker = post_end < haystack.length ? "..." : "";
|
|
2430
|
+
lines.push(
|
|
2431
|
+
` ${i + 1}. "${prefix_marker}${pre_context}[${match_text}]${post_context}${suffix_marker}"`
|
|
2432
|
+
);
|
|
2433
|
+
}
|
|
2434
|
+
if (remaining > 0) {
|
|
2435
|
+
lines.push(` ... and ${remaining} more occurrence(s) not shown.`);
|
|
2436
|
+
}
|
|
2437
|
+
lines.push(
|
|
2438
|
+
" Please provide more surrounding context in your target_text to uniquely identify the location."
|
|
2439
|
+
);
|
|
2440
|
+
return lines.join("\n");
|
|
2441
|
+
}
|
|
2442
|
+
|
|
2443
|
+
// src/engine.ts
|
|
2444
|
+
function getNextElement(el) {
|
|
2445
|
+
let next = el.nextSibling;
|
|
2446
|
+
while (next) {
|
|
2447
|
+
if (next.nodeType === 1) return next;
|
|
2448
|
+
next = next.nextSibling;
|
|
2449
|
+
}
|
|
2450
|
+
return null;
|
|
2451
|
+
}
|
|
2452
|
+
function getPreviousElement(el) {
|
|
2453
|
+
let prev = el.previousSibling;
|
|
2454
|
+
while (prev) {
|
|
2455
|
+
if (prev.nodeType === 1) return prev;
|
|
2456
|
+
prev = prev.previousSibling;
|
|
2457
|
+
}
|
|
2458
|
+
return null;
|
|
2459
|
+
}
|
|
2460
|
+
function insertAfter(newNode, refNode) {
|
|
2461
|
+
if (refNode.parentNode) {
|
|
2462
|
+
refNode.parentNode.insertBefore(newNode, refNode.nextSibling);
|
|
2463
|
+
}
|
|
2464
|
+
}
|
|
2465
|
+
var BatchValidationError = class extends Error {
|
|
2466
|
+
errors;
|
|
2467
|
+
constructor(errors) {
|
|
2468
|
+
super("Batch validation failed:\n" + errors.join("\n"));
|
|
2469
|
+
this.name = "BatchValidationError";
|
|
2470
|
+
this.errors = errors;
|
|
2471
|
+
}
|
|
2472
|
+
};
|
|
2473
|
+
function validate_edit_strings(edits) {
|
|
2474
|
+
const errors = [];
|
|
2475
|
+
for (let i = 0; i < edits.length; i++) {
|
|
2476
|
+
const edit = edits[i];
|
|
2477
|
+
const t_text = edit.target_text || "";
|
|
2478
|
+
const n_text = edit.new_text || "";
|
|
2479
|
+
if (n_text.includes("{++") || n_text.includes("{--") || n_text.includes("{>>") || n_text.includes("{==")) {
|
|
2480
|
+
errors.push(
|
|
2481
|
+
`- Edit ${i + 1} Failed: Do not manually write CriticMarkup tags ({++, {--, {>>, {==) in \`new_text\`. The engine handles redlining automatically. To add a comment, use the \`comment\` parameter.`
|
|
2482
|
+
);
|
|
2483
|
+
}
|
|
2484
|
+
if (t_text.includes("[^") || n_text.includes("[^")) {
|
|
2485
|
+
const t_fns = (t_text.match(/\[\^(?:fn|en)-[^\]]+\]/g) || []).sort();
|
|
2486
|
+
const n_fns = (n_text.match(/\[\^(?:fn|en)-[^\]]+\]/g) || []).sort();
|
|
2487
|
+
if (JSON.stringify(t_fns) !== JSON.stringify(n_fns)) {
|
|
2488
|
+
if (n_fns.length > t_fns.length || n_fns.some(
|
|
2489
|
+
(f) => n_fns.filter((x) => x === f).length > t_fns.filter((x) => x === f).length
|
|
2490
|
+
)) {
|
|
2491
|
+
errors.push(
|
|
2492
|
+
`- Edit ${i + 1} Failed: Cannot insert footnote/endnote markers via text replace. Markers like \`[^fn-N]\` are read-only projections. Use Word's References menu.`
|
|
2493
|
+
);
|
|
2494
|
+
} else {
|
|
2495
|
+
errors.push(
|
|
2496
|
+
`- Edit ${i + 1} Failed: Cannot delete footnote/endnote references via text replace. The marker corresponds to a structural XML element.`
|
|
2497
|
+
);
|
|
2498
|
+
}
|
|
2499
|
+
}
|
|
2500
|
+
}
|
|
2501
|
+
if (t_text.includes("](") || n_text.includes("](")) {
|
|
2502
|
+
const t_links = (t_text.match(/\[(?!~)[^\]]+\]\([^)]+\)/g) || []).sort();
|
|
2503
|
+
const n_links = (n_text.match(/\[(?!~)[^\]]+\]\([^)]+\)/g) || []).sort();
|
|
2504
|
+
if (t_links.length !== n_links.length) {
|
|
2505
|
+
if (n_links.length > t_links.length) {
|
|
2506
|
+
errors.push(
|
|
2507
|
+
`- Edit ${i + 1} Failed: Cannot insert hyperlinks via text replace. Use a dedicated structural operation.`
|
|
2508
|
+
);
|
|
2509
|
+
} else {
|
|
2510
|
+
errors.push(
|
|
2511
|
+
`- Edit ${i + 1} Failed: Cannot delete hyperlinks via text replace. The marker corresponds to a structural XML element.`
|
|
2512
|
+
);
|
|
2513
|
+
}
|
|
2514
|
+
} else if (t_links.length > 1 && JSON.stringify(t_links) !== JSON.stringify(n_links)) {
|
|
2515
|
+
errors.push(
|
|
2516
|
+
`- Edit ${i + 1} Failed: Can only edit or retarget one hyperlink per text replacement. Please split into multiple edits.`
|
|
2517
|
+
);
|
|
2518
|
+
}
|
|
2519
|
+
}
|
|
2520
|
+
if (t_text.includes("[~") || n_text.includes("[~")) {
|
|
2521
|
+
const t_xrefs = t_text.match(/\[~[^~]+~\]\(#[^\)]+\)/g) || [];
|
|
2522
|
+
const n_xrefs = n_text.match(/\[~[^~]+~\]\(#[^\)]+\)/g) || [];
|
|
2523
|
+
if (t_xrefs.length !== n_xrefs.length) {
|
|
2524
|
+
if (n_xrefs.length > t_xrefs.length) {
|
|
2525
|
+
errors.push(
|
|
2526
|
+
`- Edit ${i + 1} Failed: Cannot insert cross-references via text replace. Markers are read-only projections.`
|
|
2527
|
+
);
|
|
2528
|
+
} else {
|
|
2529
|
+
errors.push(
|
|
2530
|
+
`- Edit ${i + 1} Failed: Cannot delete cross-references via text replace. The marker corresponds to a structural XML element.`
|
|
2531
|
+
);
|
|
2532
|
+
}
|
|
2533
|
+
} else {
|
|
2534
|
+
if (JSON.stringify(t_xrefs) !== JSON.stringify(n_xrefs)) {
|
|
2535
|
+
errors.push(
|
|
2536
|
+
`- Edit ${i + 1} Failed: Modifying or retargeting cross-reference markers is disallowed to prevent dependency corruption.`
|
|
2537
|
+
);
|
|
2538
|
+
}
|
|
2539
|
+
}
|
|
2540
|
+
}
|
|
2541
|
+
if (t_text.includes("{#") || n_text.includes("{#")) {
|
|
2542
|
+
const t_anchors = t_text.match(/\{#[^\}]+\}/g) || [];
|
|
2543
|
+
const n_anchors = n_text.match(/\{#[^\}]+\}/g) || [];
|
|
2544
|
+
for (const a of n_anchors) {
|
|
2545
|
+
if (n_anchors.filter((x) => x === a).length > t_anchors.filter((x) => x === a).length) {
|
|
2546
|
+
errors.push(
|
|
2547
|
+
`- Edit ${i + 1} Failed: Cannot modify or insert internal anchor markers (\`{#...}\`). These represent structural XML bookmarks.`
|
|
2548
|
+
);
|
|
2549
|
+
break;
|
|
2550
|
+
}
|
|
2551
|
+
}
|
|
2552
|
+
}
|
|
2553
|
+
if (edit.type === "modify" && n_text) {
|
|
2554
|
+
const lines = n_text.split(/[\r\n]+/);
|
|
2555
|
+
for (const line of lines) {
|
|
2556
|
+
const stripped = line.trimStart();
|
|
2557
|
+
if (stripped.startsWith("#######")) {
|
|
2558
|
+
const level = stripped.length - stripped.replace(/^#+/, "").length;
|
|
2559
|
+
if (stripped.substring(level).startsWith(" ") || stripped.substring(level) === "") {
|
|
2560
|
+
errors.push(
|
|
2561
|
+
`- Edit ${i + 1} Failed: Heading level ${level} is not supported (maximum is 6).`
|
|
2562
|
+
);
|
|
2563
|
+
break;
|
|
2564
|
+
}
|
|
2565
|
+
}
|
|
2566
|
+
}
|
|
2567
|
+
}
|
|
2568
|
+
if (t_text.includes("READONLY_BOUNDARY_START") || n_text.includes("READONLY_BOUNDARY_START") || t_text.includes("# Document Structure (Read-Only)") || n_text.includes("# Document Structure (Read-Only)")) {
|
|
2569
|
+
errors.push(
|
|
2570
|
+
`- Edit ${i + 1} Failed: Modification targets the read-only boundary (Structural Appendix). This section cannot be edited.`
|
|
2571
|
+
);
|
|
2572
|
+
}
|
|
2573
|
+
}
|
|
2574
|
+
return errors;
|
|
2575
|
+
}
|
|
2576
|
+
var RedlineEngine = class {
|
|
2577
|
+
doc;
|
|
2578
|
+
author;
|
|
2579
|
+
timestamp;
|
|
2580
|
+
current_id;
|
|
2581
|
+
mapper;
|
|
2582
|
+
comments_manager;
|
|
2583
|
+
clean_mapper = null;
|
|
2584
|
+
skipped_details = [];
|
|
2585
|
+
constructor(doc, author = "Adeu AI (TS)") {
|
|
2586
|
+
this.doc = doc;
|
|
2587
|
+
this.author = author;
|
|
2588
|
+
this.timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/\.\d{3}Z$/, "Z");
|
|
2589
|
+
const w16du_ns = "http://schemas.microsoft.com/office/word/2023/wordml/word16du";
|
|
2590
|
+
for (const part of this.doc.pkg.parts) {
|
|
2591
|
+
if (part === this.doc.part || part.contentType.includes("wordprocessingml") && part.contentType.endsWith("+xml")) {
|
|
2592
|
+
if (!part._element.hasAttribute("xmlns:w16du")) {
|
|
2593
|
+
part._element.setAttribute("xmlns:w16du", w16du_ns);
|
|
2594
|
+
}
|
|
2595
|
+
}
|
|
2596
|
+
}
|
|
2597
|
+
this.current_id = this._scan_existing_ids();
|
|
2598
|
+
this.mapper = new DocumentMapper(this.doc);
|
|
2599
|
+
this.comments_manager = new CommentsManager(this.doc);
|
|
2600
|
+
}
|
|
2601
|
+
_scan_existing_ids() {
|
|
2602
|
+
let maxId = 0;
|
|
2603
|
+
for (const tag of ["w:ins", "w:del"]) {
|
|
2604
|
+
const elements = findAllDescendants(this.doc.element, tag);
|
|
2605
|
+
for (const el of elements) {
|
|
2606
|
+
const val = parseInt(el.getAttribute("w:id") || "0", 10);
|
|
2607
|
+
if (!isNaN(val) && val > maxId) maxId = val;
|
|
2608
|
+
}
|
|
2609
|
+
}
|
|
2610
|
+
return maxId;
|
|
2611
|
+
}
|
|
2612
|
+
accept_all_revisions() {
|
|
2613
|
+
const parts_to_process = [this.doc.element];
|
|
2614
|
+
for (const part of this.doc.pkg.parts) {
|
|
2615
|
+
if (part === this.doc.part) continue;
|
|
2616
|
+
if (part.contentType.includes("wordprocessingml") && part.contentType.endsWith("+xml")) {
|
|
2617
|
+
parts_to_process.push(part._element);
|
|
2618
|
+
}
|
|
2619
|
+
}
|
|
2620
|
+
for (const root_element of parts_to_process) {
|
|
2621
|
+
const insNodes = findAllDescendants(root_element, "w:ins");
|
|
2622
|
+
for (const ins of insNodes) {
|
|
2623
|
+
this._clean_wrapping_comments(ins);
|
|
2624
|
+
const parent = ins.parentNode;
|
|
2625
|
+
if (!parent) continue;
|
|
2626
|
+
if (parent.tagName === "w:trPr") {
|
|
2627
|
+
parent.removeChild(ins);
|
|
2628
|
+
continue;
|
|
2629
|
+
}
|
|
2630
|
+
while (ins.firstChild) {
|
|
2631
|
+
parent.insertBefore(ins.firstChild, ins);
|
|
2632
|
+
}
|
|
2633
|
+
parent.removeChild(ins);
|
|
2634
|
+
}
|
|
2635
|
+
const pNodes = findAllDescendants(root_element, "w:p");
|
|
2636
|
+
for (const p of pNodes) {
|
|
2637
|
+
const pPr = findChild(p, "w:pPr");
|
|
2638
|
+
if (pPr) {
|
|
2639
|
+
const rPr = findChild(pPr, "w:rPr");
|
|
2640
|
+
const delMark = rPr ? findChild(rPr, "w:del") : null;
|
|
2641
|
+
if (rPr && delMark) {
|
|
2642
|
+
let has_content = false;
|
|
2643
|
+
for (const tag of ["w:t", "w:tab", "w:br"]) {
|
|
2644
|
+
for (const child of findAllDescendants(p, tag)) {
|
|
2645
|
+
if (tag === "w:t" && !child.textContent) continue;
|
|
2646
|
+
let is_deleted = false;
|
|
2647
|
+
let curr = child.parentNode;
|
|
2648
|
+
while (curr && curr !== p) {
|
|
2649
|
+
if (curr.tagName === "w:del") {
|
|
2650
|
+
is_deleted = true;
|
|
2651
|
+
break;
|
|
2652
|
+
}
|
|
2653
|
+
curr = curr.parentNode;
|
|
2654
|
+
}
|
|
2655
|
+
if (!is_deleted) {
|
|
2656
|
+
has_content = true;
|
|
2657
|
+
break;
|
|
2658
|
+
}
|
|
2659
|
+
}
|
|
2660
|
+
if (has_content) {
|
|
2661
|
+
break;
|
|
2662
|
+
}
|
|
2663
|
+
}
|
|
2664
|
+
if (has_content) {
|
|
2665
|
+
rPr.removeChild(delMark);
|
|
2666
|
+
} else {
|
|
2667
|
+
this._clean_wrapping_comments(p);
|
|
2668
|
+
this._delete_comments_in_element(p);
|
|
2669
|
+
if (p.parentNode) {
|
|
2670
|
+
p.parentNode.removeChild(p);
|
|
2671
|
+
}
|
|
2672
|
+
}
|
|
2673
|
+
}
|
|
2674
|
+
}
|
|
2675
|
+
}
|
|
2676
|
+
const delNodes = findAllDescendants(root_element, "w:del");
|
|
2677
|
+
for (const d of delNodes) {
|
|
2678
|
+
this._clean_wrapping_comments(d);
|
|
2679
|
+
this._delete_comments_in_element(d);
|
|
2680
|
+
const parent = d.parentNode;
|
|
2681
|
+
if (parent) {
|
|
2682
|
+
if (parent.tagName === "w:trPr") {
|
|
2683
|
+
const row = parent.parentNode;
|
|
2684
|
+
if (row && row.parentNode) {
|
|
2685
|
+
row.parentNode.removeChild(row);
|
|
2686
|
+
}
|
|
2687
|
+
} else {
|
|
2688
|
+
parent.removeChild(d);
|
|
2689
|
+
}
|
|
2690
|
+
}
|
|
2691
|
+
}
|
|
2692
|
+
}
|
|
2693
|
+
const comment_ids = /* @__PURE__ */ new Set();
|
|
2694
|
+
for (const tag of [
|
|
2695
|
+
"w:commentRangeStart",
|
|
2696
|
+
"w:commentRangeEnd",
|
|
2697
|
+
"w:commentReference"
|
|
2698
|
+
]) {
|
|
2699
|
+
for (const node of findAllDescendants(this.doc.element, tag)) {
|
|
2700
|
+
const cid = node.getAttribute("w:id");
|
|
2701
|
+
if (cid) comment_ids.add(cid);
|
|
2702
|
+
}
|
|
2703
|
+
}
|
|
2704
|
+
const comments_part = this.doc.pkg.parts.find(
|
|
2705
|
+
(p) => p.contentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"
|
|
2706
|
+
);
|
|
2707
|
+
if (comments_part) {
|
|
2708
|
+
for (const c of findAllDescendants(comments_part._element, "w:comment")) {
|
|
2709
|
+
const cid = c.getAttribute("w:id");
|
|
2710
|
+
if (cid) comment_ids.add(cid);
|
|
2711
|
+
}
|
|
2712
|
+
}
|
|
2713
|
+
for (const cid of comment_ids) {
|
|
2714
|
+
this.comments_manager.deleteComment(cid);
|
|
2715
|
+
}
|
|
2716
|
+
}
|
|
2717
|
+
_getNextId() {
|
|
2718
|
+
this.current_id++;
|
|
2719
|
+
return this.current_id.toString();
|
|
2132
2720
|
}
|
|
2133
2721
|
_create_track_change_tag(tagName, author = "", reuseId = null) {
|
|
2134
2722
|
const xmlDoc = this.doc.part._element.ownerDocument;
|
|
@@ -2140,11 +2728,267 @@ var RedlineEngine = class {
|
|
|
2140
2728
|
tag.setAttribute("w16du:dateUtc", this.timestamp);
|
|
2141
2729
|
return tag;
|
|
2142
2730
|
}
|
|
2143
|
-
_set_text_content(element, text) {
|
|
2144
|
-
element.textContent = text;
|
|
2145
|
-
if (text.trim() !== text) {
|
|
2146
|
-
element.setAttribute("xml:space", "preserve");
|
|
2731
|
+
_set_text_content(element, text) {
|
|
2732
|
+
element.textContent = text;
|
|
2733
|
+
if (text.trim() !== text) {
|
|
2734
|
+
element.setAttribute("xml:space", "preserve");
|
|
2735
|
+
}
|
|
2736
|
+
}
|
|
2737
|
+
/**
|
|
2738
|
+
* Attaches a comment that wraps a contiguous range within a single paragraph.
|
|
2739
|
+
* start_element and end_element must both be direct children of parent_element
|
|
2740
|
+
* and start_element must come before (or equal) end_element in document order.
|
|
2741
|
+
* Ported from Python `RedlineEngine._attach_comment`.
|
|
2742
|
+
*/
|
|
2743
|
+
_attach_comment(parent_element, start_element, end_element, text) {
|
|
2744
|
+
if (!text) return;
|
|
2745
|
+
const comment_id = this.comments_manager.addComment(this.author, text);
|
|
2746
|
+
const xmlDoc = parent_element.ownerDocument;
|
|
2747
|
+
const range_start = xmlDoc.createElement("w:commentRangeStart");
|
|
2748
|
+
range_start.setAttribute("w:id", comment_id);
|
|
2749
|
+
const range_end = xmlDoc.createElement("w:commentRangeEnd");
|
|
2750
|
+
range_end.setAttribute("w:id", comment_id);
|
|
2751
|
+
const ref_run = xmlDoc.createElement("w:r");
|
|
2752
|
+
const rPr = xmlDoc.createElement("w:rPr");
|
|
2753
|
+
const rStyle = xmlDoc.createElement("w:rStyle");
|
|
2754
|
+
rStyle.setAttribute("w:val", "CommentReference");
|
|
2755
|
+
rPr.appendChild(rStyle);
|
|
2756
|
+
ref_run.appendChild(rPr);
|
|
2757
|
+
const ref = xmlDoc.createElement("w:commentReference");
|
|
2758
|
+
ref.setAttribute("w:id", comment_id);
|
|
2759
|
+
ref_run.appendChild(ref);
|
|
2760
|
+
parent_element.insertBefore(range_start, start_element);
|
|
2761
|
+
const after_end = end_element.nextSibling;
|
|
2762
|
+
if (after_end) {
|
|
2763
|
+
parent_element.insertBefore(range_end, after_end);
|
|
2764
|
+
parent_element.insertBefore(ref_run, range_end.nextSibling);
|
|
2765
|
+
} else {
|
|
2766
|
+
parent_element.appendChild(range_end);
|
|
2767
|
+
parent_element.appendChild(ref_run);
|
|
2768
|
+
}
|
|
2769
|
+
}
|
|
2770
|
+
/**
|
|
2771
|
+
* Attaches a comment that spans across two different paragraphs (or other block
|
|
2772
|
+
* containers). start_element lives inside start_p, end_element lives inside end_p,
|
|
2773
|
+
* and the comment is open from start_element through end_element.
|
|
2774
|
+
* Ported from Python `RedlineEngine._attach_comment_spanning`.
|
|
2775
|
+
*/
|
|
2776
|
+
_attach_comment_spanning(start_p, start_el, end_p, end_el, text) {
|
|
2777
|
+
if (!text) return;
|
|
2778
|
+
const comment_id = this.comments_manager.addComment(this.author, text);
|
|
2779
|
+
const xmlDocStart = start_p.ownerDocument;
|
|
2780
|
+
const xmlDocEnd = end_p.ownerDocument;
|
|
2781
|
+
const range_start = xmlDocStart.createElement("w:commentRangeStart");
|
|
2782
|
+
range_start.setAttribute("w:id", comment_id);
|
|
2783
|
+
const range_end = xmlDocEnd.createElement("w:commentRangeEnd");
|
|
2784
|
+
range_end.setAttribute("w:id", comment_id);
|
|
2785
|
+
const ref_run = xmlDocEnd.createElement("w:r");
|
|
2786
|
+
const rPr = xmlDocEnd.createElement("w:rPr");
|
|
2787
|
+
const rStyle = xmlDocEnd.createElement("w:rStyle");
|
|
2788
|
+
rStyle.setAttribute("w:val", "CommentReference");
|
|
2789
|
+
rPr.appendChild(rStyle);
|
|
2790
|
+
ref_run.appendChild(rPr);
|
|
2791
|
+
const ref = xmlDocEnd.createElement("w:commentReference");
|
|
2792
|
+
ref.setAttribute("w:id", comment_id);
|
|
2793
|
+
ref_run.appendChild(ref);
|
|
2794
|
+
start_p.insertBefore(range_start, start_el);
|
|
2795
|
+
const after_end = end_el.nextSibling;
|
|
2796
|
+
if (after_end) {
|
|
2797
|
+
end_p.insertBefore(range_end, after_end);
|
|
2798
|
+
end_p.insertBefore(ref_run, range_end.nextSibling);
|
|
2799
|
+
} else {
|
|
2800
|
+
end_p.appendChild(range_end);
|
|
2801
|
+
end_p.appendChild(ref_run);
|
|
2802
|
+
}
|
|
2803
|
+
}
|
|
2804
|
+
/**
|
|
2805
|
+
* Inserts `text` as one or more tracked paragraphs anchored relative to
|
|
2806
|
+
* either an existing run or a paragraph. Returns:
|
|
2807
|
+
* { first_node, last_p, last_ins, used_block_mode }
|
|
2808
|
+
* where:
|
|
2809
|
+
* - first_node: the first <w:ins> (for inline mode) OR the first new <w:p>
|
|
2810
|
+
* (for block mode). The caller uses this for splicing into the DOM and
|
|
2811
|
+
* for anchoring comments.
|
|
2812
|
+
* - last_p: the last new <w:p> created, if any. null when entirely inline.
|
|
2813
|
+
* - last_ins: the last <w:ins> created (inside the last new <w:p>, or the
|
|
2814
|
+
* sole inline ins). Used as the comment's end anchor.
|
|
2815
|
+
* - used_block_mode: true when the first line carried a heading/list style
|
|
2816
|
+
* marker and we created a new paragraph for it (rather than inlining it).
|
|
2817
|
+
*
|
|
2818
|
+
* Multi-paragraph rules (only when text contains '\n'):
|
|
2819
|
+
* - Each additional line becomes a new <w:p>, inserted after the anchor
|
|
2820
|
+
* paragraph in document order.
|
|
2821
|
+
* - Each new <w:p> gets a copy of the anchor paragraph's <w:pPr> (so list
|
|
2822
|
+
* numbering / indentation are preserved) unless the line itself starts
|
|
2823
|
+
* with a markdown heading or list marker, which overrides the style.
|
|
2824
|
+
* - Each new <w:p> carries a tracked paragraph-break marker
|
|
2825
|
+
* (<w:pPr><w:rPr><w:ins/></w:rPr></w:pPr>) so Word natively tracks the
|
|
2826
|
+
* paragraph break.
|
|
2827
|
+
* - Each new <w:p>'s content is wrapped in a <w:ins>, with inline bold/
|
|
2828
|
+
* italic markdown parsed via _parse_inline_markdown.
|
|
2829
|
+
*
|
|
2830
|
+
* The first line:
|
|
2831
|
+
* - If it carries a heading / list marker AND we have a paragraph anchor,
|
|
2832
|
+
* we drop into "block mode": no inline <w:ins>; the first line itself
|
|
2833
|
+
* becomes the first new <w:p>.
|
|
2834
|
+
* - Otherwise we emit a single inline <w:ins> for the first line (current
|
|
2835
|
+
* behaviour) and treat the remaining lines as block extensions.
|
|
2836
|
+
*
|
|
2837
|
+
* Does NOT attach comments; callers handle that.
|
|
2838
|
+
*/
|
|
2839
|
+
_track_insert_multiline(text, anchor_run, anchor_paragraph, reuse_id) {
|
|
2840
|
+
if (!text) {
|
|
2841
|
+
return {
|
|
2842
|
+
first_node: null,
|
|
2843
|
+
last_p: null,
|
|
2844
|
+
last_ins: null,
|
|
2845
|
+
used_block_mode: false
|
|
2846
|
+
};
|
|
2847
|
+
}
|
|
2848
|
+
const xmlDoc = this.doc.part._element.ownerDocument;
|
|
2849
|
+
const lines = text.split(/[\r\n]+/);
|
|
2850
|
+
let current_p = null;
|
|
2851
|
+
if (anchor_paragraph !== null) {
|
|
2852
|
+
current_p = anchor_paragraph._element;
|
|
2853
|
+
} else if (anchor_run !== null) {
|
|
2854
|
+
let walker = anchor_run._element;
|
|
2855
|
+
while (walker && walker.tagName !== "w:p") {
|
|
2856
|
+
walker = walker.parentNode;
|
|
2857
|
+
}
|
|
2858
|
+
current_p = walker;
|
|
2859
|
+
}
|
|
2860
|
+
while (lines.length > 1 && lines[lines.length - 1] === "") {
|
|
2861
|
+
lines.pop();
|
|
2862
|
+
}
|
|
2863
|
+
if (lines.length === 0) {
|
|
2864
|
+
return {
|
|
2865
|
+
first_node: null,
|
|
2866
|
+
last_p: null,
|
|
2867
|
+
last_ins: null,
|
|
2868
|
+
used_block_mode: false
|
|
2869
|
+
};
|
|
2870
|
+
}
|
|
2871
|
+
const [first_clean, first_style] = this._parse_markdown_style(lines[0]);
|
|
2872
|
+
const have_paragraph_context = current_p !== null;
|
|
2873
|
+
const block_mode = first_style !== null && have_paragraph_context;
|
|
2874
|
+
let first_node = null;
|
|
2875
|
+
let inline_ins = null;
|
|
2876
|
+
if (!block_mode) {
|
|
2877
|
+
inline_ins = this._build_tracked_ins_for_line(
|
|
2878
|
+
first_clean === lines[0] ? lines[0] : lines[0],
|
|
2879
|
+
anchor_run,
|
|
2880
|
+
reuse_id,
|
|
2881
|
+
xmlDoc
|
|
2882
|
+
);
|
|
2883
|
+
first_node = inline_ins;
|
|
2884
|
+
}
|
|
2885
|
+
const remaining_lines = block_mode ? lines : lines.slice(1);
|
|
2886
|
+
if (remaining_lines.length === 0) {
|
|
2887
|
+
return {
|
|
2888
|
+
first_node,
|
|
2889
|
+
last_p: null,
|
|
2890
|
+
last_ins: inline_ins,
|
|
2891
|
+
used_block_mode: false
|
|
2892
|
+
};
|
|
2893
|
+
}
|
|
2894
|
+
if (!current_p) {
|
|
2895
|
+
return {
|
|
2896
|
+
first_node,
|
|
2897
|
+
last_p: null,
|
|
2898
|
+
last_ins: inline_ins,
|
|
2899
|
+
used_block_mode: false
|
|
2900
|
+
};
|
|
2901
|
+
}
|
|
2902
|
+
const parent_body = current_p.parentNode;
|
|
2903
|
+
if (!parent_body) {
|
|
2904
|
+
return {
|
|
2905
|
+
first_node,
|
|
2906
|
+
last_p: null,
|
|
2907
|
+
last_ins: inline_ins,
|
|
2908
|
+
used_block_mode: false
|
|
2909
|
+
};
|
|
2910
|
+
}
|
|
2911
|
+
const insertAfterEl = (newNode, ref) => {
|
|
2912
|
+
parent_body.insertBefore(newNode, ref.nextSibling);
|
|
2913
|
+
};
|
|
2914
|
+
let last_p = null;
|
|
2915
|
+
let last_ins = null;
|
|
2916
|
+
let after = current_p;
|
|
2917
|
+
for (let i = 0; i < remaining_lines.length; i++) {
|
|
2918
|
+
const raw_line = remaining_lines[i];
|
|
2919
|
+
const [clean_text, style_name] = this._parse_markdown_style(raw_line);
|
|
2920
|
+
const new_p = xmlDoc.createElement("w:p");
|
|
2921
|
+
if (style_name) {
|
|
2922
|
+
this._set_paragraph_style(new_p, style_name);
|
|
2923
|
+
} else {
|
|
2924
|
+
const existing_pPr = findChild(current_p, "w:pPr");
|
|
2925
|
+
if (existing_pPr) {
|
|
2926
|
+
new_p.appendChild(existing_pPr.cloneNode(true));
|
|
2927
|
+
}
|
|
2928
|
+
}
|
|
2929
|
+
let pPr = findChild(new_p, "w:pPr");
|
|
2930
|
+
if (!pPr) {
|
|
2931
|
+
pPr = xmlDoc.createElement("w:pPr");
|
|
2932
|
+
new_p.insertBefore(pPr, new_p.firstChild);
|
|
2933
|
+
}
|
|
2934
|
+
let rPr = findChild(pPr, "w:rPr");
|
|
2935
|
+
if (!rPr) {
|
|
2936
|
+
rPr = xmlDoc.createElement("w:rPr");
|
|
2937
|
+
pPr.appendChild(rPr);
|
|
2938
|
+
}
|
|
2939
|
+
const ins_mark = this._create_track_change_tag("w:ins", "", reuse_id);
|
|
2940
|
+
rPr.appendChild(ins_mark);
|
|
2941
|
+
const content_ins = this._build_tracked_ins_for_line(
|
|
2942
|
+
clean_text,
|
|
2943
|
+
anchor_run,
|
|
2944
|
+
reuse_id,
|
|
2945
|
+
xmlDoc
|
|
2946
|
+
);
|
|
2947
|
+
if (content_ins) {
|
|
2948
|
+
new_p.appendChild(content_ins);
|
|
2949
|
+
}
|
|
2950
|
+
insertAfterEl(new_p, after);
|
|
2951
|
+
after = new_p;
|
|
2952
|
+
last_p = new_p;
|
|
2953
|
+
last_ins = content_ins;
|
|
2954
|
+
if (!first_node) {
|
|
2955
|
+
first_node = new_p;
|
|
2956
|
+
}
|
|
2957
|
+
}
|
|
2958
|
+
return { first_node, last_p, last_ins, used_block_mode: block_mode };
|
|
2959
|
+
}
|
|
2960
|
+
/**
|
|
2961
|
+
* Builds a single tracked-insert wrapper (<w:ins>) containing one or more
|
|
2962
|
+
* <w:r> elements representing the inline markdown segments of `line_text`.
|
|
2963
|
+
* Returns null if line_text is empty.
|
|
2964
|
+
*/
|
|
2965
|
+
_build_tracked_ins_for_line(line_text, anchor_run, reuse_id, xmlDoc) {
|
|
2966
|
+
if (!line_text && line_text !== "") return null;
|
|
2967
|
+
const ins = this._create_track_change_tag("w:ins", "", reuse_id);
|
|
2968
|
+
const segments = this._parse_inline_markdown(line_text);
|
|
2969
|
+
if (segments.length === 0) {
|
|
2970
|
+
return null;
|
|
2971
|
+
}
|
|
2972
|
+
for (const [segText, segProps] of segments) {
|
|
2973
|
+
const r = xmlDoc.createElement("w:r");
|
|
2974
|
+
if (anchor_run && anchor_run._element) {
|
|
2975
|
+
const anchor_rPr = findChild(anchor_run._element, "w:rPr");
|
|
2976
|
+
if (anchor_rPr) {
|
|
2977
|
+
const clone = anchor_rPr.cloneNode(true);
|
|
2978
|
+
for (const tag of ["w:vanish", "w:strike", "w:dstrike"]) {
|
|
2979
|
+
const found = findChild(clone, tag);
|
|
2980
|
+
if (found) clone.removeChild(found);
|
|
2981
|
+
}
|
|
2982
|
+
r.appendChild(clone);
|
|
2983
|
+
}
|
|
2984
|
+
}
|
|
2985
|
+
this._apply_run_props(r, segProps, false);
|
|
2986
|
+
const t = xmlDoc.createElement("w:t");
|
|
2987
|
+
this._set_text_content(t, segText);
|
|
2988
|
+
r.appendChild(t);
|
|
2989
|
+
ins.appendChild(r);
|
|
2147
2990
|
}
|
|
2991
|
+
return ins;
|
|
2148
2992
|
}
|
|
2149
2993
|
_parse_markdown_style(text) {
|
|
2150
2994
|
const stripped_text = text.trimStart();
|
|
@@ -2224,6 +3068,172 @@ var RedlineEngine = class {
|
|
|
2224
3068
|
}
|
|
2225
3069
|
}
|
|
2226
3070
|
}
|
|
3071
|
+
/**
|
|
3072
|
+
* Replaces (or creates) a paragraph's <w:pPr> with a single <w:pStyle> entry
|
|
3073
|
+
* pointing at `style_name`. Strips any existing pPr to avoid layering a new
|
|
3074
|
+
* heading style on top of a previous list/heading configuration.
|
|
3075
|
+
*
|
|
3076
|
+
* In Python, the style id is resolved via doc.styles[style_name].style_id and
|
|
3077
|
+
* falls back to stripping spaces. Node has no equivalent style cache exposed
|
|
3078
|
+
* on `doc`, so we always use the simple "strip spaces" fallback: "Heading 1"
|
|
3079
|
+
* becomes the style id "Heading1", "List Number" becomes "ListNumber", etc.
|
|
3080
|
+
* This matches python-docx's default style-id convention for the built-in
|
|
3081
|
+
* paragraph styles and is what Word writes by default.
|
|
3082
|
+
*/
|
|
3083
|
+
_set_paragraph_style(p_element, style_name) {
|
|
3084
|
+
const xmlDoc = p_element.ownerDocument;
|
|
3085
|
+
const existing_pPr = findChild(p_element, "w:pPr");
|
|
3086
|
+
if (existing_pPr) {
|
|
3087
|
+
p_element.removeChild(existing_pPr);
|
|
3088
|
+
}
|
|
3089
|
+
const pPr = xmlDoc.createElement("w:pPr");
|
|
3090
|
+
const pStyle = xmlDoc.createElement("w:pStyle");
|
|
3091
|
+
const style_id = style_name.replace(/\s+/g, "");
|
|
3092
|
+
pStyle.setAttribute("w:val", style_id);
|
|
3093
|
+
pPr.appendChild(pStyle);
|
|
3094
|
+
p_element.insertBefore(pPr, p_element.firstChild);
|
|
3095
|
+
}
|
|
3096
|
+
_anchor_reply_comment(parent_id, new_id) {
|
|
3097
|
+
const docEl = this.doc.part._element.ownerDocument;
|
|
3098
|
+
const starts = findAllDescendants(
|
|
3099
|
+
this.doc.element,
|
|
3100
|
+
"w:commentRangeStart"
|
|
3101
|
+
).filter((n) => n.getAttribute("w:id") === parent_id);
|
|
3102
|
+
if (starts.length === 0) return;
|
|
3103
|
+
const parent_start = starts[0];
|
|
3104
|
+
const new_start = docEl.createElement("w:commentRangeStart");
|
|
3105
|
+
new_start.setAttribute("w:id", new_id);
|
|
3106
|
+
insertAfter(new_start, parent_start);
|
|
3107
|
+
const ends = findAllDescendants(
|
|
3108
|
+
this.doc.element,
|
|
3109
|
+
"w:commentRangeEnd"
|
|
3110
|
+
).filter((n) => n.getAttribute("w:id") === parent_id);
|
|
3111
|
+
if (ends.length === 0) return;
|
|
3112
|
+
const parent_end = ends[0];
|
|
3113
|
+
const parent_refs = findAllDescendants(
|
|
3114
|
+
this.doc.element,
|
|
3115
|
+
"w:commentReference"
|
|
3116
|
+
).filter((n) => n.getAttribute("w:id") === parent_id);
|
|
3117
|
+
let insertion_point = parent_end;
|
|
3118
|
+
if (parent_refs.length > 0) {
|
|
3119
|
+
const ref_el = parent_refs[0];
|
|
3120
|
+
if (ref_el.parentNode && ref_el.parentNode.tagName === "w:r") {
|
|
3121
|
+
insertion_point = ref_el.parentNode;
|
|
3122
|
+
}
|
|
3123
|
+
}
|
|
3124
|
+
const new_end = docEl.createElement("w:commentRangeEnd");
|
|
3125
|
+
new_end.setAttribute("w:id", new_id);
|
|
3126
|
+
insertAfter(new_end, insertion_point);
|
|
3127
|
+
const ref_run = docEl.createElement("w:r");
|
|
3128
|
+
const rPr = docEl.createElement("w:rPr");
|
|
3129
|
+
const rStyle = docEl.createElement("w:rStyle");
|
|
3130
|
+
rStyle.setAttribute("w:val", "CommentReference");
|
|
3131
|
+
rPr.appendChild(rStyle);
|
|
3132
|
+
ref_run.appendChild(rPr);
|
|
3133
|
+
const ref = docEl.createElement("w:commentReference");
|
|
3134
|
+
ref.setAttribute("w:id", new_id);
|
|
3135
|
+
ref_run.appendChild(ref);
|
|
3136
|
+
insertAfter(ref_run, new_end);
|
|
3137
|
+
}
|
|
3138
|
+
_clean_wrapping_comments(element) {
|
|
3139
|
+
let first_node = element;
|
|
3140
|
+
while (true) {
|
|
3141
|
+
const prev2 = getPreviousElement(first_node);
|
|
3142
|
+
if (prev2 && (prev2.tagName === "w:ins" || prev2.tagName === "w:del")) {
|
|
3143
|
+
first_node = prev2;
|
|
3144
|
+
} else {
|
|
3145
|
+
break;
|
|
3146
|
+
}
|
|
3147
|
+
}
|
|
3148
|
+
let last_node = element;
|
|
3149
|
+
while (true) {
|
|
3150
|
+
const nxt2 = getNextElement(last_node);
|
|
3151
|
+
if (nxt2 && (nxt2.tagName === "w:ins" || nxt2.tagName === "w:del")) {
|
|
3152
|
+
last_node = nxt2;
|
|
3153
|
+
} else {
|
|
3154
|
+
break;
|
|
3155
|
+
}
|
|
3156
|
+
}
|
|
3157
|
+
const starts_to_remove = [];
|
|
3158
|
+
let prev = getPreviousElement(first_node);
|
|
3159
|
+
while (prev) {
|
|
3160
|
+
if (prev.tagName === "w:commentRangeStart") {
|
|
3161
|
+
starts_to_remove.push(prev);
|
|
3162
|
+
prev = getPreviousElement(prev);
|
|
3163
|
+
} else if (prev.tagName === "w:rPr" || prev.tagName === "w:pPr") {
|
|
3164
|
+
prev = getPreviousElement(prev);
|
|
3165
|
+
} else {
|
|
3166
|
+
break;
|
|
3167
|
+
}
|
|
3168
|
+
}
|
|
3169
|
+
const ends_to_remove = [];
|
|
3170
|
+
let nxt = getNextElement(last_node);
|
|
3171
|
+
while (nxt) {
|
|
3172
|
+
if (nxt.tagName === "w:commentRangeEnd") {
|
|
3173
|
+
ends_to_remove.push(nxt);
|
|
3174
|
+
nxt = getNextElement(nxt);
|
|
3175
|
+
} else if (nxt.tagName === "w:r" && findAllDescendants(nxt, "w:commentReference").length > 0) {
|
|
3176
|
+
ends_to_remove.push(nxt);
|
|
3177
|
+
nxt = getNextElement(nxt);
|
|
3178
|
+
} else if (nxt.tagName === "w:commentReference") {
|
|
3179
|
+
ends_to_remove.push(nxt);
|
|
3180
|
+
nxt = getNextElement(nxt);
|
|
3181
|
+
} else {
|
|
3182
|
+
break;
|
|
3183
|
+
}
|
|
3184
|
+
}
|
|
3185
|
+
const end_ids = /* @__PURE__ */ new Set();
|
|
3186
|
+
for (const e of ends_to_remove) {
|
|
3187
|
+
if (e.tagName === "w:commentRangeEnd") {
|
|
3188
|
+
const eid = e.getAttribute("w:id");
|
|
3189
|
+
if (eid) end_ids.add(eid);
|
|
3190
|
+
} else {
|
|
3191
|
+
let ref = findAllDescendants(e, "w:commentReference")[0];
|
|
3192
|
+
if (!ref && e.tagName === "w:commentReference") ref = e;
|
|
3193
|
+
if (ref) {
|
|
3194
|
+
const eid = ref.getAttribute("w:id");
|
|
3195
|
+
if (eid) end_ids.add(eid);
|
|
3196
|
+
}
|
|
3197
|
+
}
|
|
3198
|
+
}
|
|
3199
|
+
for (const s of starts_to_remove) {
|
|
3200
|
+
const c_id = s.getAttribute("w:id");
|
|
3201
|
+
if (c_id && end_ids.has(c_id)) {
|
|
3202
|
+
this.comments_manager.deleteComment(c_id);
|
|
3203
|
+
if (s.parentNode) s.parentNode.removeChild(s);
|
|
3204
|
+
for (const e of ends_to_remove) {
|
|
3205
|
+
let e_id = null;
|
|
3206
|
+
if (e.tagName === "w:commentRangeEnd") {
|
|
3207
|
+
e_id = e.getAttribute("w:id");
|
|
3208
|
+
} else {
|
|
3209
|
+
let ref = findAllDescendants(e, "w:commentReference")[0];
|
|
3210
|
+
if (!ref && e.tagName === "w:commentReference") ref = e;
|
|
3211
|
+
if (ref) e_id = ref.getAttribute("w:id");
|
|
3212
|
+
}
|
|
3213
|
+
if (e_id === c_id && e.parentNode) {
|
|
3214
|
+
e.parentNode.removeChild(e);
|
|
3215
|
+
}
|
|
3216
|
+
}
|
|
3217
|
+
}
|
|
3218
|
+
}
|
|
3219
|
+
}
|
|
3220
|
+
_delete_comments_in_element(element) {
|
|
3221
|
+
const refs = findAllDescendants(element, "w:commentReference");
|
|
3222
|
+
for (const ref of refs) {
|
|
3223
|
+
const c_id = ref.getAttribute("w:id");
|
|
3224
|
+
if (c_id) {
|
|
3225
|
+
this.comments_manager.deleteComment(c_id);
|
|
3226
|
+
for (const tag of ["w:commentRangeStart", "w:commentRangeEnd"]) {
|
|
3227
|
+
const nodes = findAllDescendants(this.doc.element, tag);
|
|
3228
|
+
for (const node of nodes) {
|
|
3229
|
+
if (node.getAttribute("w:id") === c_id && node.parentNode) {
|
|
3230
|
+
node.parentNode.removeChild(node);
|
|
3231
|
+
}
|
|
3232
|
+
}
|
|
3233
|
+
}
|
|
3234
|
+
}
|
|
3235
|
+
}
|
|
3236
|
+
}
|
|
2227
3237
|
validate_edits(edits) {
|
|
2228
3238
|
const errors = [];
|
|
2229
3239
|
if (!this.mapper.full_text) this.mapper["_build_map"]();
|
|
@@ -2234,23 +3244,41 @@ var RedlineEngine = class {
|
|
|
2234
3244
|
let matches = this.mapper.find_all_match_indices(edit.target_text);
|
|
2235
3245
|
let activeText = this.mapper.full_text;
|
|
2236
3246
|
if (matches.length === 0) {
|
|
2237
|
-
if (!this.clean_mapper)
|
|
3247
|
+
if (!this.clean_mapper)
|
|
3248
|
+
this.clean_mapper = new DocumentMapper(this.doc, true);
|
|
2238
3249
|
matches = this.clean_mapper.find_all_match_indices(edit.target_text);
|
|
2239
3250
|
if (matches.length > 0) activeText = this.clean_mapper.full_text;
|
|
2240
3251
|
}
|
|
2241
3252
|
if (matches.length === 0) {
|
|
2242
|
-
errors.push(
|
|
2243
|
-
|
|
3253
|
+
errors.push(
|
|
3254
|
+
`- Edit ${i + 1} Failed: Target text not found in document:
|
|
3255
|
+
"${edit.target_text}"`
|
|
3256
|
+
);
|
|
2244
3257
|
} else if (matches.length > 1) {
|
|
2245
|
-
|
|
2246
|
-
|
|
3258
|
+
const positions = matches.map(([start, length]) => [
|
|
3259
|
+
start,
|
|
3260
|
+
start + length
|
|
3261
|
+
]);
|
|
3262
|
+
errors.push(
|
|
3263
|
+
format_ambiguity_error(
|
|
3264
|
+
i + 1,
|
|
3265
|
+
edit.target_text,
|
|
3266
|
+
activeText,
|
|
3267
|
+
positions
|
|
3268
|
+
)
|
|
3269
|
+
);
|
|
2247
3270
|
}
|
|
2248
3271
|
for (const [start, length] of matches) {
|
|
2249
|
-
const spans = this.mapper.spans.filter(
|
|
3272
|
+
const spans = this.mapper.spans.filter(
|
|
3273
|
+
(s) => s.end > start && s.start < start + length
|
|
3274
|
+
);
|
|
2250
3275
|
const nestedAuthors = /* @__PURE__ */ new Set();
|
|
2251
3276
|
for (const s of spans) {
|
|
2252
3277
|
if (s.ins_id) {
|
|
2253
|
-
const insNodes = findAllDescendants(
|
|
3278
|
+
const insNodes = findAllDescendants(
|
|
3279
|
+
this.doc.element,
|
|
3280
|
+
"w:ins"
|
|
3281
|
+
).filter((n) => n.getAttribute("w:id") === s.ins_id);
|
|
2254
3282
|
if (insNodes.length > 0) {
|
|
2255
3283
|
const auth = insNodes[0].getAttribute("w:author");
|
|
2256
3284
|
if (auth && auth !== this.author) nestedAuthors.add(auth);
|
|
@@ -2258,7 +3286,46 @@ Provide more context.`);
|
|
|
2258
3286
|
}
|
|
2259
3287
|
}
|
|
2260
3288
|
if (nestedAuthors.size > 0) {
|
|
2261
|
-
errors.push(
|
|
3289
|
+
errors.push(
|
|
3290
|
+
`- Edit ${i + 1} Failed: Modification targets an active insertion from another author (${Array.from(nestedAuthors).join(", ")}). Accept that change first or scope your edit outside of it.`
|
|
3291
|
+
);
|
|
3292
|
+
}
|
|
3293
|
+
}
|
|
3294
|
+
}
|
|
3295
|
+
return errors;
|
|
3296
|
+
}
|
|
3297
|
+
validate_review_actions(actions) {
|
|
3298
|
+
const errors = [];
|
|
3299
|
+
for (let i = 0; i < actions.length; i++) {
|
|
3300
|
+
const action = actions[i];
|
|
3301
|
+
const type = action.type;
|
|
3302
|
+
if (type === "reply") {
|
|
3303
|
+
const cid = action.target_id.replace("Com:", "");
|
|
3304
|
+
let found = false;
|
|
3305
|
+
const part = this.doc.pkg.parts.find(
|
|
3306
|
+
(p) => p.contentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"
|
|
3307
|
+
);
|
|
3308
|
+
if (part) {
|
|
3309
|
+
const comments = findAllDescendants(part._element, "w:comment");
|
|
3310
|
+
found = comments.some((c) => c.getAttribute("w:id") === cid);
|
|
3311
|
+
}
|
|
3312
|
+
if (!found) {
|
|
3313
|
+
errors.push(
|
|
3314
|
+
`- Action ${i + 1} Failed: Target comment ID ${action.target_id} not found.`
|
|
3315
|
+
);
|
|
3316
|
+
}
|
|
3317
|
+
} else if (type === "accept" || type === "reject") {
|
|
3318
|
+
const target_id = action.target_id.replace("Chg:", "");
|
|
3319
|
+
const all_ins = findAllDescendants(this.doc.element, "w:ins").filter(
|
|
3320
|
+
(n) => n.getAttribute("w:id") === target_id
|
|
3321
|
+
);
|
|
3322
|
+
const all_del = findAllDescendants(this.doc.element, "w:del").filter(
|
|
3323
|
+
(n) => n.getAttribute("w:id") === target_id
|
|
3324
|
+
);
|
|
3325
|
+
if (all_ins.length === 0 && all_del.length === 0) {
|
|
3326
|
+
errors.push(
|
|
3327
|
+
`- Action ${i + 1} Failed: Target ID ${action.target_id} not found.`
|
|
3328
|
+
);
|
|
2262
3329
|
}
|
|
2263
3330
|
}
|
|
2264
3331
|
}
|
|
@@ -2266,8 +3333,22 @@ Provide more context.`);
|
|
|
2266
3333
|
}
|
|
2267
3334
|
process_batch(changes) {
|
|
2268
3335
|
this.skipped_details = [];
|
|
2269
|
-
const actions = changes.filter(
|
|
2270
|
-
|
|
3336
|
+
const actions = changes.filter(
|
|
3337
|
+
(c) => ["accept", "reject", "reply"].includes(c.type)
|
|
3338
|
+
);
|
|
3339
|
+
const edits = changes.filter(
|
|
3340
|
+
(c) => !["accept", "reject", "reply"].includes(c.type)
|
|
3341
|
+
);
|
|
3342
|
+
const all_errors = [];
|
|
3343
|
+
if (actions.length > 0) {
|
|
3344
|
+
all_errors.push(...this.validate_review_actions(actions));
|
|
3345
|
+
}
|
|
3346
|
+
if (edits.length > 0) {
|
|
3347
|
+
all_errors.push(...this.validate_edits(edits));
|
|
3348
|
+
}
|
|
3349
|
+
if (all_errors.length > 0) {
|
|
3350
|
+
throw new BatchValidationError(all_errors);
|
|
3351
|
+
}
|
|
2271
3352
|
let applied_actions = 0, skipped_actions = 0;
|
|
2272
3353
|
if (actions.length > 0) {
|
|
2273
3354
|
const res = this.apply_review_actions(actions);
|
|
@@ -2278,10 +3359,6 @@ Provide more context.`);
|
|
|
2278
3359
|
if (this.clean_mapper) this.clean_mapper["_build_map"]();
|
|
2279
3360
|
}
|
|
2280
3361
|
}
|
|
2281
|
-
if (edits.length > 0) {
|
|
2282
|
-
const errors = this.validate_edits(edits);
|
|
2283
|
-
if (errors.length > 0) throw new BatchValidationError(errors);
|
|
2284
|
-
}
|
|
2285
3362
|
let applied_edits = 0, skipped_edits = 0;
|
|
2286
3363
|
if (edits.length > 0) {
|
|
2287
3364
|
const res = this.apply_edits(edits);
|
|
@@ -2310,7 +3387,9 @@ Provide more context.`);
|
|
|
2310
3387
|
resolved_edits.push([edit, null]);
|
|
2311
3388
|
} else {
|
|
2312
3389
|
skipped++;
|
|
2313
|
-
this.skipped_details.push(
|
|
3390
|
+
this.skipped_details.push(
|
|
3391
|
+
`- Failed to locate row target: '${(edit.target_text || "").substring(0, 40)}...'`
|
|
3392
|
+
);
|
|
2314
3393
|
}
|
|
2315
3394
|
} else {
|
|
2316
3395
|
const resolved = this._pre_resolve_heuristic_edit(edit);
|
|
@@ -2322,19 +3401,27 @@ Provide more context.`);
|
|
|
2322
3401
|
}
|
|
2323
3402
|
} else {
|
|
2324
3403
|
skipped++;
|
|
2325
|
-
this.skipped_details.push(
|
|
3404
|
+
this.skipped_details.push(
|
|
3405
|
+
`- Failed to apply edit targeting: '${(edit.target_text || "insertion").substring(0, 40)}...'`
|
|
3406
|
+
);
|
|
2326
3407
|
}
|
|
2327
3408
|
}
|
|
2328
3409
|
}
|
|
2329
|
-
resolved_edits.sort(
|
|
3410
|
+
resolved_edits.sort(
|
|
3411
|
+
(a, b) => (b[0]._match_start_index || 0) - (a[0]._match_start_index || 0)
|
|
3412
|
+
);
|
|
2330
3413
|
const occupied_ranges = [];
|
|
2331
3414
|
for (const [edit, orig_new] of resolved_edits) {
|
|
2332
3415
|
const start = edit._match_start_index || 0;
|
|
2333
3416
|
const end = start + (edit.target_text ? edit.target_text.length : 0);
|
|
2334
|
-
const overlaps = occupied_ranges.some(
|
|
3417
|
+
const overlaps = occupied_ranges.some(
|
|
3418
|
+
([occ_start, occ_end]) => start < occ_end && end > occ_start
|
|
3419
|
+
);
|
|
2335
3420
|
if (overlaps) {
|
|
2336
3421
|
skipped++;
|
|
2337
|
-
this.skipped_details.push(
|
|
3422
|
+
this.skipped_details.push(
|
|
3423
|
+
`- Skipped overlapping edit targeting: '${(edit.target_text || "insertion").substring(0, 40)}...'`
|
|
3424
|
+
);
|
|
2338
3425
|
continue;
|
|
2339
3426
|
}
|
|
2340
3427
|
let success = false;
|
|
@@ -2348,7 +3435,9 @@ Provide more context.`);
|
|
|
2348
3435
|
occupied_ranges.push([start, end]);
|
|
2349
3436
|
} else {
|
|
2350
3437
|
skipped++;
|
|
2351
|
-
this.skipped_details.push(
|
|
3438
|
+
this.skipped_details.push(
|
|
3439
|
+
`- Failed to apply edit targeting: '${(edit.target_text || "insertion").substring(0, 40)}...'`
|
|
3440
|
+
);
|
|
2352
3441
|
}
|
|
2353
3442
|
}
|
|
2354
3443
|
return [applied, skipped];
|
|
@@ -2360,17 +3449,28 @@ Provide more context.`);
|
|
|
2360
3449
|
const type = action.type;
|
|
2361
3450
|
if (type === "reply") {
|
|
2362
3451
|
const cid = action.target_id.replace("Com:", "");
|
|
2363
|
-
this.comments_manager.addComment(
|
|
3452
|
+
const new_id = this.comments_manager.addComment(
|
|
3453
|
+
this.author,
|
|
3454
|
+
action.text,
|
|
3455
|
+
cid
|
|
3456
|
+
);
|
|
3457
|
+
this._anchor_reply_comment(cid, new_id);
|
|
2364
3458
|
applied++;
|
|
2365
3459
|
continue;
|
|
2366
3460
|
}
|
|
2367
3461
|
const target_id = action.target_id.replace("Chg:", "");
|
|
2368
|
-
const all_ins = findAllDescendants(this.doc.element, "w:ins").filter(
|
|
2369
|
-
|
|
3462
|
+
const all_ins = findAllDescendants(this.doc.element, "w:ins").filter(
|
|
3463
|
+
(n) => n.getAttribute("w:id") === target_id
|
|
3464
|
+
);
|
|
3465
|
+
const all_del = findAllDescendants(this.doc.element, "w:del").filter(
|
|
3466
|
+
(n) => n.getAttribute("w:id") === target_id
|
|
3467
|
+
);
|
|
2370
3468
|
const all_nodes = [...all_ins, ...all_del];
|
|
2371
3469
|
if (all_nodes.length === 0) {
|
|
2372
3470
|
skipped++;
|
|
2373
|
-
this.skipped_details.push(
|
|
3471
|
+
this.skipped_details.push(
|
|
3472
|
+
`- Failed to apply action: Target ID ${action.target_id} not found.`
|
|
3473
|
+
);
|
|
2374
3474
|
continue;
|
|
2375
3475
|
}
|
|
2376
3476
|
for (const node of all_nodes) {
|
|
@@ -2379,12 +3479,16 @@ Provide more context.`);
|
|
|
2379
3479
|
const is_trPr = parent_tag === "w:trPr";
|
|
2380
3480
|
if (type === "accept") {
|
|
2381
3481
|
if (is_ins) {
|
|
3482
|
+
this._clean_wrapping_comments(node);
|
|
2382
3483
|
if (is_trPr) node.parentNode?.removeChild(node);
|
|
2383
3484
|
else {
|
|
2384
|
-
while (node.firstChild)
|
|
3485
|
+
while (node.firstChild)
|
|
3486
|
+
node.parentNode?.insertBefore(node.firstChild, node);
|
|
2385
3487
|
node.parentNode?.removeChild(node);
|
|
2386
3488
|
}
|
|
2387
3489
|
} else {
|
|
3490
|
+
this._clean_wrapping_comments(node);
|
|
3491
|
+
this._delete_comments_in_element(node);
|
|
2388
3492
|
if (is_trPr) {
|
|
2389
3493
|
const tr = node.parentNode?.parentNode;
|
|
2390
3494
|
tr?.parentNode?.removeChild(tr);
|
|
@@ -2394,21 +3498,28 @@ Provide more context.`);
|
|
|
2394
3498
|
}
|
|
2395
3499
|
} else if (type === "reject") {
|
|
2396
3500
|
if (is_ins) {
|
|
3501
|
+
this._clean_wrapping_comments(node);
|
|
3502
|
+
this._delete_comments_in_element(node);
|
|
2397
3503
|
if (is_trPr) {
|
|
2398
3504
|
const tr = node.parentNode?.parentNode;
|
|
2399
3505
|
tr?.parentNode?.removeChild(tr);
|
|
2400
3506
|
} else node.parentNode?.removeChild(node);
|
|
2401
3507
|
} else {
|
|
3508
|
+
this._clean_wrapping_comments(node);
|
|
2402
3509
|
if (is_trPr) node.parentNode?.removeChild(node);
|
|
2403
3510
|
else {
|
|
2404
|
-
const delTexts = Array.from(
|
|
3511
|
+
const delTexts = Array.from(
|
|
3512
|
+
node.getElementsByTagName("w:delText")
|
|
3513
|
+
);
|
|
2405
3514
|
for (const dt of delTexts) {
|
|
2406
3515
|
const t = dt.ownerDocument.createElement("w:t");
|
|
2407
3516
|
t.textContent = dt.textContent;
|
|
2408
|
-
if (dt.hasAttribute("xml:space"))
|
|
3517
|
+
if (dt.hasAttribute("xml:space"))
|
|
3518
|
+
t.setAttribute("xml:space", "preserve");
|
|
2409
3519
|
dt.parentNode?.replaceChild(t, dt);
|
|
2410
3520
|
}
|
|
2411
|
-
while (node.firstChild)
|
|
3521
|
+
while (node.firstChild)
|
|
3522
|
+
node.parentNode?.insertBefore(node.firstChild, node);
|
|
2412
3523
|
node.parentNode?.removeChild(node);
|
|
2413
3524
|
}
|
|
2414
3525
|
}
|
|
@@ -2420,7 +3531,10 @@ Provide more context.`);
|
|
|
2420
3531
|
}
|
|
2421
3532
|
_apply_table_edit(edit, rebuild_map) {
|
|
2422
3533
|
const start_idx = edit._match_start_index || 0;
|
|
2423
|
-
const [anchor_run, anchor_para] = this.mapper.get_insertion_anchor(
|
|
3534
|
+
const [anchor_run, anchor_para] = this.mapper.get_insertion_anchor(
|
|
3535
|
+
start_idx,
|
|
3536
|
+
rebuild_map
|
|
3537
|
+
);
|
|
2424
3538
|
let target_element = null;
|
|
2425
3539
|
if (anchor_run) target_element = anchor_run._element;
|
|
2426
3540
|
else if (anchor_para) target_element = anchor_para._element;
|
|
@@ -2447,7 +3561,8 @@ Provide more context.`);
|
|
|
2447
3561
|
const r = tr.ownerDocument.createElement("w:r");
|
|
2448
3562
|
const t = tr.ownerDocument.createElement("w:t");
|
|
2449
3563
|
t.textContent = cellText;
|
|
2450
|
-
if (cellText.trim() !== cellText)
|
|
3564
|
+
if (cellText.trim() !== cellText)
|
|
3565
|
+
t.setAttribute("xml:space", "preserve");
|
|
2451
3566
|
r.appendChild(t);
|
|
2452
3567
|
p.appendChild(r);
|
|
2453
3568
|
tc.appendChild(p);
|
|
@@ -2464,14 +3579,20 @@ Provide more context.`);
|
|
|
2464
3579
|
let [start_idx, match_len] = this.mapper.find_match_index(edit.target_text);
|
|
2465
3580
|
let use_clean_map = false;
|
|
2466
3581
|
if (start_idx === -1) {
|
|
2467
|
-
if (!this.clean_mapper)
|
|
2468
|
-
|
|
3582
|
+
if (!this.clean_mapper)
|
|
3583
|
+
this.clean_mapper = new DocumentMapper(this.doc, true);
|
|
3584
|
+
[start_idx, match_len] = this.clean_mapper.find_match_index(
|
|
3585
|
+
edit.target_text
|
|
3586
|
+
);
|
|
2469
3587
|
if (start_idx !== -1) use_clean_map = true;
|
|
2470
3588
|
else return null;
|
|
2471
3589
|
}
|
|
2472
3590
|
const active_mapper = use_clean_map ? this.clean_mapper : this.mapper;
|
|
2473
3591
|
const effective_new_text = edit.new_text || "";
|
|
2474
|
-
const actual_doc_text = this.mapper.full_text.substring(
|
|
3592
|
+
const actual_doc_text = this.mapper.full_text.substring(
|
|
3593
|
+
start_idx,
|
|
3594
|
+
start_idx + match_len
|
|
3595
|
+
);
|
|
2475
3596
|
if (actual_doc_text === effective_new_text || edit.target_text === effective_new_text) {
|
|
2476
3597
|
return {
|
|
2477
3598
|
type: "modify",
|
|
@@ -2492,330 +3613,337 @@ Provide more context.`);
|
|
|
2492
3613
|
final_new = effective_new_text.substring(actual_doc_text.length);
|
|
2493
3614
|
effective_start_idx = start_idx + match_len;
|
|
2494
3615
|
} else {
|
|
2495
|
-
const [prefix_len, suffix_len] = trim_common_context(
|
|
3616
|
+
const [prefix_len, suffix_len] = trim_common_context(
|
|
3617
|
+
actual_doc_text,
|
|
3618
|
+
effective_new_text
|
|
3619
|
+
);
|
|
2496
3620
|
const t_end = actual_doc_text.length - suffix_len;
|
|
2497
3621
|
const n_end = effective_new_text.length - suffix_len;
|
|
2498
3622
|
final_target = actual_doc_text.substring(prefix_len, t_end);
|
|
2499
3623
|
final_new = effective_new_text.substring(prefix_len, n_end);
|
|
2500
3624
|
effective_start_idx = start_idx + prefix_len;
|
|
2501
|
-
if (!final_target && final_new) effective_op = "INSERTION";
|
|
2502
|
-
else if (final_target && !final_new) effective_op = "DELETION";
|
|
2503
|
-
else if (final_target && final_new) effective_op = "MODIFICATION";
|
|
2504
|
-
else effective_op = "COMMENT_ONLY";
|
|
2505
|
-
}
|
|
2506
|
-
return {
|
|
2507
|
-
type: "modify",
|
|
2508
|
-
target_text: final_target,
|
|
2509
|
-
new_text: final_new,
|
|
2510
|
-
comment: edit.comment,
|
|
2511
|
-
_match_start_index: effective_start_idx,
|
|
2512
|
-
_internal_op: effective_op,
|
|
2513
|
-
_active_mapper_ref: active_mapper
|
|
2514
|
-
};
|
|
2515
|
-
}
|
|
2516
|
-
_apply_single_edit_indexed(edit, orig_new, rebuild_map) {
|
|
2517
|
-
let op = edit._internal_op;
|
|
2518
|
-
const active_mapper = edit._active_mapper_ref || this.mapper;
|
|
2519
|
-
const start_idx = edit._match_start_index || 0;
|
|
2520
|
-
const length = edit.target_text ? edit.target_text.length : 0;
|
|
2521
|
-
const del_id = ["DELETION", "MODIFICATION"].includes(op) ? this._getNextId() : null;
|
|
2522
|
-
const ins_id = ["INSERTION", "MODIFICATION"].includes(op) ? this._getNextId() : null;
|
|
2523
|
-
if (op === "COMMENT_ONLY") {
|
|
2524
|
-
return true;
|
|
2525
|
-
}
|
|
2526
|
-
if (op === "INSERTION") {
|
|
2527
|
-
const [anchor_run, anchor_para] = active_mapper.get_insertion_anchor(start_idx, rebuild_map);
|
|
2528
|
-
if (!anchor_run && !anchor_para) return false;
|
|
2529
|
-
const xmlDoc = this.doc.part._element.ownerDocument;
|
|
2530
|
-
const ins = this._create_track_change_tag("w:ins", "", ins_id);
|
|
2531
|
-
const segments = this._parse_inline_markdown(edit.new_text || "");
|
|
2532
|
-
for (const [segText, segProps] of segments) {
|
|
2533
|
-
const r = xmlDoc.createElement("w:r");
|
|
2534
|
-
this._apply_run_props(r, segProps, false);
|
|
2535
|
-
const t = xmlDoc.createElement("w:t");
|
|
2536
|
-
this._set_text_content(t, segText);
|
|
2537
|
-
r.appendChild(t);
|
|
2538
|
-
ins.appendChild(r);
|
|
2539
|
-
}
|
|
2540
|
-
if (anchor_run) {
|
|
2541
|
-
insertAfter(ins, anchor_run._element);
|
|
2542
|
-
} else if (anchor_para) {
|
|
2543
|
-
anchor_para._element.appendChild(ins);
|
|
2544
|
-
}
|
|
2545
|
-
return true;
|
|
2546
|
-
}
|
|
2547
|
-
const target_runs = active_mapper.find_target_runs_by_index(start_idx, length, rebuild_map);
|
|
2548
|
-
if (target_runs.length === 0) return false;
|
|
2549
|
-
let last_del = null;
|
|
2550
|
-
for (const run of target_runs) {
|
|
2551
|
-
const del_tag = this._create_track_change_tag("w:del", "", del_id);
|
|
2552
|
-
const new_run = run._element.cloneNode(true);
|
|
2553
|
-
const tNodes = Array.from(new_run.getElementsByTagName("w:t"));
|
|
2554
|
-
tNodes.forEach((t) => {
|
|
2555
|
-
const delText = new_run.ownerDocument.createElement("w:delText");
|
|
2556
|
-
delText.textContent = t.textContent;
|
|
2557
|
-
if (t.hasAttribute("xml:space")) delText.setAttribute("xml:space", "preserve");
|
|
2558
|
-
new_run.replaceChild(delText, t);
|
|
2559
|
-
});
|
|
2560
|
-
del_tag.appendChild(new_run);
|
|
2561
|
-
run._element.parentNode?.replaceChild(del_tag, run._element);
|
|
2562
|
-
last_del = del_tag;
|
|
2563
|
-
}
|
|
2564
|
-
if (op === "MODIFICATION" && edit.new_text && last_del) {
|
|
2565
|
-
const xmlDoc = this.doc.part._element.ownerDocument;
|
|
2566
|
-
const ins = this._create_track_change_tag("w:ins", "", ins_id);
|
|
2567
|
-
const segments = this._parse_inline_markdown(edit.new_text);
|
|
2568
|
-
for (const [segText, segProps] of segments) {
|
|
2569
|
-
const r = xmlDoc.createElement("w:r");
|
|
2570
|
-
this._apply_run_props(r, segProps, false);
|
|
2571
|
-
const t = xmlDoc.createElement("w:t");
|
|
2572
|
-
this._set_text_content(t, segText);
|
|
2573
|
-
r.appendChild(t);
|
|
2574
|
-
ins.appendChild(r);
|
|
2575
|
-
}
|
|
2576
|
-
insertAfter(ins, last_del);
|
|
2577
|
-
}
|
|
2578
|
-
return true;
|
|
2579
|
-
}
|
|
2580
|
-
};
|
|
2581
|
-
|
|
2582
|
-
// src/markup.ts
|
|
2583
|
-
function _should_strip_markers(text, marker) {
|
|
2584
|
-
if (!text.startsWith(marker) || !text.endsWith(marker)) return false;
|
|
2585
|
-
if (text.length < marker.length * 2) return false;
|
|
2586
|
-
const inner = text.substring(marker.length, text.length - marker.length);
|
|
2587
|
-
if (!inner) return false;
|
|
2588
|
-
if (inner.includes(marker)) return false;
|
|
2589
|
-
if (!/[a-zA-Z]/.test(inner)) return false;
|
|
2590
|
-
if (marker === "__" && /^\w+$/.test(inner)) return false;
|
|
2591
|
-
if (marker === "_") {
|
|
2592
|
-
if (inner.includes("_")) return false;
|
|
2593
|
-
if (/^[0-9_]+$/.test(inner)) return false;
|
|
2594
|
-
}
|
|
2595
|
-
return true;
|
|
2596
|
-
}
|
|
2597
|
-
function _strip_balanced_markers(text) {
|
|
2598
|
-
let prefix_markup = "";
|
|
2599
|
-
let suffix_markup = "";
|
|
2600
|
-
let clean_text = text;
|
|
2601
|
-
const markers = ["**", "__", "_", "*"];
|
|
2602
|
-
for (const marker of markers) {
|
|
2603
|
-
if (_should_strip_markers(clean_text, marker)) {
|
|
2604
|
-
prefix_markup += marker;
|
|
2605
|
-
suffix_markup = marker + suffix_markup;
|
|
2606
|
-
clean_text = clean_text.substring(marker.length, clean_text.length - marker.length);
|
|
2607
|
-
break;
|
|
2608
|
-
}
|
|
2609
|
-
}
|
|
2610
|
-
return [prefix_markup, clean_text, suffix_markup];
|
|
2611
|
-
}
|
|
2612
|
-
function _replace_smart_quotes(text) {
|
|
2613
|
-
return text.replace(/“/g, '"').replace(/”/g, '"').replace(/‘/g, "'").replace(/’/g, "'");
|
|
2614
|
-
}
|
|
2615
|
-
function _find_safe_boundaries(text, start, end) {
|
|
2616
|
-
let new_start = start;
|
|
2617
|
-
let new_end = end;
|
|
2618
|
-
const expand_if_unbalanced = (marker) => {
|
|
2619
|
-
const current_match = text.substring(new_start, new_end);
|
|
2620
|
-
const count = (current_match.match(new RegExp(marker.replace(/\*/g, "\\*"), "g")) || []).length;
|
|
2621
|
-
if (count % 2 !== 0) {
|
|
2622
|
-
const suffix = text.substring(new_end);
|
|
2623
|
-
if (suffix.startsWith(marker)) {
|
|
2624
|
-
new_end += marker.length;
|
|
2625
|
-
return;
|
|
2626
|
-
}
|
|
2627
|
-
const prefix = text.substring(0, new_start);
|
|
2628
|
-
if (prefix.endsWith(marker)) {
|
|
2629
|
-
new_start -= marker.length;
|
|
2630
|
-
return;
|
|
2631
|
-
}
|
|
2632
|
-
}
|
|
2633
|
-
};
|
|
2634
|
-
for (let i = 0; i < 2; i++) {
|
|
2635
|
-
expand_if_unbalanced("**");
|
|
2636
|
-
expand_if_unbalanced("__");
|
|
2637
|
-
expand_if_unbalanced("_");
|
|
2638
|
-
expand_if_unbalanced("*");
|
|
2639
|
-
}
|
|
2640
|
-
return [new_start, new_end];
|
|
2641
|
-
}
|
|
2642
|
-
function _refine_match_boundaries(text, start, end) {
|
|
2643
|
-
const markers = ["**", "__", "*", "_"];
|
|
2644
|
-
let current_text = text.substring(start, end);
|
|
2645
|
-
let best_start = start;
|
|
2646
|
-
let best_end = end;
|
|
2647
|
-
const countMarker = (str, mk) => (str.match(new RegExp(mk.replace(/\*/g, "\\*"), "g")) || []).length;
|
|
2648
|
-
for (const marker of markers) {
|
|
2649
|
-
if (current_text.startsWith(marker)) {
|
|
2650
|
-
const current_score = countMarker(current_text, marker) % 2;
|
|
2651
|
-
const trimmed_text = current_text.substring(marker.length);
|
|
2652
|
-
const trimmed_score = countMarker(trimmed_text, marker) % 2;
|
|
2653
|
-
if (current_score === 1 && trimmed_score === 0) {
|
|
2654
|
-
best_start += marker.length;
|
|
2655
|
-
current_text = trimmed_text;
|
|
2656
|
-
}
|
|
2657
|
-
}
|
|
2658
|
-
}
|
|
2659
|
-
for (const marker of markers) {
|
|
2660
|
-
if (current_text.endsWith(marker)) {
|
|
2661
|
-
const current_score = countMarker(current_text, marker) % 2;
|
|
2662
|
-
const trimmed_text = current_text.substring(0, current_text.length - marker.length);
|
|
2663
|
-
const trimmed_score = countMarker(trimmed_text, marker) % 2;
|
|
2664
|
-
if (current_score === 1 && trimmed_score === 0) {
|
|
2665
|
-
best_end -= marker.length;
|
|
2666
|
-
current_text = trimmed_text;
|
|
2667
|
-
}
|
|
2668
|
-
}
|
|
2669
|
-
}
|
|
2670
|
-
return [best_start, best_end];
|
|
2671
|
-
}
|
|
2672
|
-
function _make_fuzzy_regex(target_text) {
|
|
2673
|
-
target_text = _replace_smart_quotes(target_text);
|
|
2674
|
-
const parts = [];
|
|
2675
|
-
const token_pattern = /(_+)|(\s+)|(['"])|([.,;:\/])/g;
|
|
2676
|
-
const md_noise = "[*_]*";
|
|
2677
|
-
const structural_noise = "(?:\\s*(?:[*+\\->]|\\d+\\.)\\s+|\\s*\\n\\s*)";
|
|
2678
|
-
const start_list_marker = "(?:[ \\t]*(?:[*+\\->]|\\d+\\.)\\s+)?";
|
|
2679
|
-
parts.push(start_list_marker);
|
|
2680
|
-
parts.push(md_noise);
|
|
2681
|
-
let last_idx = 0;
|
|
2682
|
-
let match;
|
|
2683
|
-
const escapeRegExp = (str) => str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
2684
|
-
while ((match = token_pattern.exec(target_text)) !== null) {
|
|
2685
|
-
const literal = target_text.substring(last_idx, match.index);
|
|
2686
|
-
if (literal) {
|
|
2687
|
-
parts.push(escapeRegExp(literal));
|
|
2688
|
-
parts.push(md_noise);
|
|
3625
|
+
if (!final_target && final_new) effective_op = "INSERTION";
|
|
3626
|
+
else if (final_target && !final_new) effective_op = "DELETION";
|
|
3627
|
+
else if (final_target && final_new) effective_op = "MODIFICATION";
|
|
3628
|
+
else effective_op = "COMMENT_ONLY";
|
|
2689
3629
|
}
|
|
2690
|
-
|
|
2691
|
-
|
|
2692
|
-
|
|
2693
|
-
|
|
2694
|
-
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
|
|
2698
|
-
|
|
3630
|
+
return {
|
|
3631
|
+
type: "modify",
|
|
3632
|
+
target_text: final_target,
|
|
3633
|
+
new_text: final_new,
|
|
3634
|
+
comment: edit.comment,
|
|
3635
|
+
_match_start_index: effective_start_idx,
|
|
3636
|
+
_internal_op: effective_op,
|
|
3637
|
+
_active_mapper_ref: active_mapper
|
|
3638
|
+
};
|
|
3639
|
+
}
|
|
3640
|
+
_apply_single_edit_indexed(edit, orig_new, rebuild_map) {
|
|
3641
|
+
let op = edit._internal_op;
|
|
3642
|
+
const active_mapper = edit._active_mapper_ref || this.mapper;
|
|
3643
|
+
const start_idx = edit._match_start_index || 0;
|
|
3644
|
+
const length = edit.target_text ? edit.target_text.length : 0;
|
|
3645
|
+
const del_id = ["DELETION", "MODIFICATION"].includes(op) ? this._getNextId() : null;
|
|
3646
|
+
const ins_id = ["INSERTION", "MODIFICATION"].includes(op) ? this._getNextId() : null;
|
|
3647
|
+
if (op === "COMMENT_ONLY") {
|
|
3648
|
+
const target_runs2 = active_mapper.find_target_runs_by_index(
|
|
3649
|
+
start_idx,
|
|
3650
|
+
length,
|
|
3651
|
+
rebuild_map
|
|
3652
|
+
);
|
|
3653
|
+
if (target_runs2.length === 0) return false;
|
|
3654
|
+
if (!edit.comment) return true;
|
|
3655
|
+
const first_el = target_runs2[0]._element;
|
|
3656
|
+
const last_el = target_runs2[target_runs2.length - 1]._element;
|
|
3657
|
+
let start_p = first_el;
|
|
3658
|
+
while (start_p && start_p.tagName !== "w:p")
|
|
3659
|
+
start_p = start_p.parentNode;
|
|
3660
|
+
let end_p = last_el;
|
|
3661
|
+
while (end_p && end_p.tagName !== "w:p")
|
|
3662
|
+
end_p = end_p.parentNode;
|
|
3663
|
+
if (!start_p || !end_p) return false;
|
|
3664
|
+
const ascend_to_paragraph_child = (el, p) => {
|
|
3665
|
+
let cur = el;
|
|
3666
|
+
while (cur.parentNode && cur.parentNode !== p) {
|
|
3667
|
+
cur = cur.parentNode;
|
|
3668
|
+
}
|
|
3669
|
+
return cur;
|
|
3670
|
+
};
|
|
3671
|
+
const first_anchor = ascend_to_paragraph_child(first_el, start_p);
|
|
3672
|
+
const last_anchor = ascend_to_paragraph_child(last_el, end_p);
|
|
3673
|
+
if (start_p === end_p) {
|
|
3674
|
+
this._attach_comment(start_p, first_anchor, last_anchor, edit.comment);
|
|
2699
3675
|
} else {
|
|
2700
|
-
|
|
3676
|
+
this._attach_comment_spanning(
|
|
3677
|
+
start_p,
|
|
3678
|
+
first_anchor,
|
|
3679
|
+
end_p,
|
|
3680
|
+
last_anchor,
|
|
3681
|
+
edit.comment
|
|
3682
|
+
);
|
|
2701
3683
|
}
|
|
2702
|
-
|
|
2703
|
-
if (g_quote === "'") parts.push("[\u2018\u2019']");
|
|
2704
|
-
else parts.push('["\u201C\u201D]');
|
|
2705
|
-
} else if (g_punct) {
|
|
2706
|
-
parts.push(escapeRegExp(g_punct));
|
|
3684
|
+
return true;
|
|
2707
3685
|
}
|
|
2708
|
-
|
|
2709
|
-
|
|
2710
|
-
|
|
2711
|
-
|
|
2712
|
-
|
|
2713
|
-
|
|
2714
|
-
|
|
2715
|
-
|
|
2716
|
-
|
|
2717
|
-
|
|
2718
|
-
|
|
2719
|
-
|
|
2720
|
-
|
|
2721
|
-
|
|
2722
|
-
|
|
2723
|
-
|
|
2724
|
-
|
|
2725
|
-
|
|
2726
|
-
|
|
2727
|
-
|
|
2728
|
-
|
|
2729
|
-
|
|
2730
|
-
|
|
3686
|
+
if (op === "INSERTION") {
|
|
3687
|
+
const [anchor_run, anchor_para] = active_mapper.get_insertion_anchor(
|
|
3688
|
+
start_idx,
|
|
3689
|
+
rebuild_map
|
|
3690
|
+
);
|
|
3691
|
+
if (!anchor_run && !anchor_para) return false;
|
|
3692
|
+
const result = this._track_insert_multiline(
|
|
3693
|
+
edit.new_text || "",
|
|
3694
|
+
anchor_run,
|
|
3695
|
+
anchor_para,
|
|
3696
|
+
ins_id
|
|
3697
|
+
);
|
|
3698
|
+
if (!result.first_node) return false;
|
|
3699
|
+
const is_inline_first = result.first_node.tagName === "w:ins";
|
|
3700
|
+
if (is_inline_first) {
|
|
3701
|
+
if (anchor_run) {
|
|
3702
|
+
insertAfter(result.first_node, anchor_run._element);
|
|
3703
|
+
} else if (anchor_para) {
|
|
3704
|
+
anchor_para._element.appendChild(result.first_node);
|
|
3705
|
+
}
|
|
3706
|
+
}
|
|
3707
|
+
if (edit.comment) {
|
|
3708
|
+
const ascend_to_paragraph_child = (el, p) => {
|
|
3709
|
+
let cur = el;
|
|
3710
|
+
while (cur.parentNode && cur.parentNode !== p) {
|
|
3711
|
+
cur = cur.parentNode;
|
|
3712
|
+
}
|
|
3713
|
+
return cur;
|
|
3714
|
+
};
|
|
3715
|
+
if (result.last_p && result.last_ins) {
|
|
3716
|
+
let start_p = result.first_node;
|
|
3717
|
+
while (start_p && start_p.tagName !== "w:p")
|
|
3718
|
+
start_p = start_p.parentNode;
|
|
3719
|
+
if (start_p) {
|
|
3720
|
+
let first_anchor_target = result.first_node;
|
|
3721
|
+
if (result.first_node.tagName === "w:p") {
|
|
3722
|
+
first_anchor_target = findAllDescendants(result.first_node, "w:ins")[0] || result.first_node;
|
|
3723
|
+
}
|
|
3724
|
+
const start_anchor = ascend_to_paragraph_child(
|
|
3725
|
+
first_anchor_target,
|
|
3726
|
+
start_p
|
|
3727
|
+
);
|
|
3728
|
+
const end_anchor = ascend_to_paragraph_child(
|
|
3729
|
+
result.last_ins,
|
|
3730
|
+
result.last_p
|
|
3731
|
+
);
|
|
3732
|
+
this._attach_comment_spanning(
|
|
3733
|
+
start_p,
|
|
3734
|
+
start_anchor,
|
|
3735
|
+
result.last_p,
|
|
3736
|
+
end_anchor,
|
|
3737
|
+
edit.comment
|
|
3738
|
+
);
|
|
3739
|
+
}
|
|
3740
|
+
} else {
|
|
3741
|
+
let host_p = result.first_node;
|
|
3742
|
+
while (host_p && host_p.tagName !== "w:p")
|
|
3743
|
+
host_p = host_p.parentNode;
|
|
3744
|
+
if (host_p) {
|
|
3745
|
+
let first_anchor_target = result.first_node;
|
|
3746
|
+
if (result.first_node.tagName === "w:p") {
|
|
3747
|
+
first_anchor_target = findAllDescendants(result.first_node, "w:ins")[0] || result.first_node;
|
|
3748
|
+
}
|
|
3749
|
+
const anchor = ascend_to_paragraph_child(first_anchor_target, host_p);
|
|
3750
|
+
this._attach_comment(host_p, anchor, anchor, edit.comment);
|
|
3751
|
+
}
|
|
3752
|
+
}
|
|
3753
|
+
}
|
|
3754
|
+
return true;
|
|
2731
3755
|
}
|
|
2732
|
-
|
|
2733
|
-
|
|
2734
|
-
|
|
2735
|
-
|
|
2736
|
-
|
|
2737
|
-
|
|
2738
|
-
|
|
2739
|
-
|
|
2740
|
-
|
|
2741
|
-
|
|
2742
|
-
|
|
2743
|
-
|
|
3756
|
+
const target_runs = active_mapper.find_target_runs_by_index(
|
|
3757
|
+
start_idx,
|
|
3758
|
+
length,
|
|
3759
|
+
rebuild_map
|
|
3760
|
+
);
|
|
3761
|
+
const virtual_spans = active_mapper.get_virtual_spans_in_range(start_idx, length);
|
|
3762
|
+
if (target_runs.length === 0 && virtual_spans.length === 0) return false;
|
|
3763
|
+
const affected_ps = /* @__PURE__ */ new Set();
|
|
3764
|
+
for (const run of target_runs) {
|
|
3765
|
+
let p = run._element.parentNode;
|
|
3766
|
+
while (p && p.tagName !== "w:p") p = p.parentNode;
|
|
3767
|
+
if (p) affected_ps.add(p);
|
|
2744
3768
|
}
|
|
2745
|
-
|
|
2746
|
-
|
|
2747
|
-
|
|
2748
|
-
|
|
2749
|
-
|
|
2750
|
-
|
|
2751
|
-
|
|
2752
|
-
|
|
2753
|
-
|
|
2754
|
-
|
|
2755
|
-
|
|
2756
|
-
|
|
2757
|
-
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
|
|
2762
|
-
}
|
|
2763
|
-
return parts.join("");
|
|
2764
|
-
}
|
|
2765
|
-
function apply_edits_to_markdown(markdown_text, edits, include_index = false, highlight_only = false) {
|
|
2766
|
-
if (!edits || edits.length === 0) return markdown_text;
|
|
2767
|
-
const matched_edits = [];
|
|
2768
|
-
for (let idx = 0; idx < edits.length; idx++) {
|
|
2769
|
-
const edit = edits[idx];
|
|
2770
|
-
const target = edit.target_text || "";
|
|
2771
|
-
if (!target) {
|
|
2772
|
-
continue;
|
|
3769
|
+
let first_del = null;
|
|
3770
|
+
let last_del = null;
|
|
3771
|
+
for (const run of target_runs) {
|
|
3772
|
+
const del_tag = this._create_track_change_tag("w:del", "", del_id);
|
|
3773
|
+
const new_run = run._element.cloneNode(true);
|
|
3774
|
+
const tNodes = Array.from(new_run.getElementsByTagName("w:t"));
|
|
3775
|
+
tNodes.forEach((t) => {
|
|
3776
|
+
const delText = new_run.ownerDocument.createElement("w:delText");
|
|
3777
|
+
delText.textContent = t.textContent;
|
|
3778
|
+
if (t.hasAttribute("xml:space"))
|
|
3779
|
+
delText.setAttribute("xml:space", "preserve");
|
|
3780
|
+
new_run.replaceChild(delText, t);
|
|
3781
|
+
});
|
|
3782
|
+
del_tag.appendChild(new_run);
|
|
3783
|
+
run._element.parentNode?.replaceChild(del_tag, run._element);
|
|
3784
|
+
if (first_del === null) first_del = del_tag;
|
|
3785
|
+
last_del = del_tag;
|
|
2773
3786
|
}
|
|
2774
|
-
|
|
2775
|
-
|
|
2776
|
-
|
|
2777
|
-
|
|
2778
|
-
|
|
2779
|
-
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
3787
|
+
let ins_elem = null;
|
|
3788
|
+
let mod_last_p = null;
|
|
3789
|
+
let mod_last_ins = null;
|
|
3790
|
+
if (op === "MODIFICATION" && edit.new_text && last_del) {
|
|
3791
|
+
let mod_anchor_para_el = last_del;
|
|
3792
|
+
while (mod_anchor_para_el && mod_anchor_para_el.tagName !== "w:p") {
|
|
3793
|
+
mod_anchor_para_el = mod_anchor_para_el.parentNode;
|
|
3794
|
+
}
|
|
3795
|
+
const mod_anchor_para = mod_anchor_para_el ? new Paragraph(mod_anchor_para_el, null) : null;
|
|
3796
|
+
const style_source_run = target_runs.length > 0 ? target_runs[target_runs.length - 1] : null;
|
|
3797
|
+
const result = this._track_insert_multiline(
|
|
3798
|
+
edit.new_text,
|
|
3799
|
+
style_source_run,
|
|
3800
|
+
mod_anchor_para,
|
|
3801
|
+
ins_id
|
|
3802
|
+
);
|
|
3803
|
+
if (result.first_node) {
|
|
3804
|
+
const is_inline_first = result.first_node.tagName === "w:ins";
|
|
3805
|
+
if (is_inline_first) {
|
|
3806
|
+
insertAfter(result.first_node, last_del);
|
|
3807
|
+
ins_elem = result.first_node;
|
|
3808
|
+
} else {
|
|
3809
|
+
ins_elem = result.last_ins;
|
|
3810
|
+
}
|
|
3811
|
+
mod_last_p = result.last_p;
|
|
3812
|
+
mod_last_ins = result.last_ins;
|
|
3813
|
+
}
|
|
3814
|
+
}
|
|
3815
|
+
if (op === "DELETION" || op === "MODIFICATION") {
|
|
3816
|
+
if (op === "MODIFICATION" && target_runs.length === 0 && virtual_spans.length > 0 && edit.new_text) {
|
|
3817
|
+
const first_span = virtual_spans[0];
|
|
3818
|
+
if (first_span.paragraph) {
|
|
3819
|
+
const p1_el = first_span.paragraph._element;
|
|
3820
|
+
const last_runs = findAllDescendants(p1_el, "w:r");
|
|
3821
|
+
const anchor = last_runs.length > 0 ? new Run(last_runs[last_runs.length - 1], first_span.paragraph) : null;
|
|
3822
|
+
const result = this._track_insert_multiline(
|
|
3823
|
+
edit.new_text,
|
|
3824
|
+
anchor,
|
|
3825
|
+
first_span.paragraph,
|
|
3826
|
+
ins_id
|
|
3827
|
+
);
|
|
3828
|
+
if (result.first_node) {
|
|
3829
|
+
p1_el.appendChild(result.first_node);
|
|
3830
|
+
}
|
|
3831
|
+
}
|
|
3832
|
+
}
|
|
3833
|
+
for (const span of [...virtual_spans].reverse()) {
|
|
3834
|
+
if (span.paragraph) {
|
|
3835
|
+
const p1_element = span.paragraph._element;
|
|
3836
|
+
let p2_element = getNextElement(p1_element);
|
|
3837
|
+
while (p2_element && p2_element.tagName !== "w:p") {
|
|
3838
|
+
p2_element = getNextElement(p2_element);
|
|
3839
|
+
}
|
|
3840
|
+
if (p2_element && p2_element.tagName === "w:p") {
|
|
3841
|
+
let pPr = findChild(p1_element, "w:pPr");
|
|
3842
|
+
if (!pPr) {
|
|
3843
|
+
pPr = p1_element.ownerDocument.createElement("w:pPr");
|
|
3844
|
+
p1_element.insertBefore(pPr, p1_element.firstChild);
|
|
3845
|
+
}
|
|
3846
|
+
let rPr = findChild(pPr, "w:rPr");
|
|
3847
|
+
if (!rPr) {
|
|
3848
|
+
rPr = p1_element.ownerDocument.createElement("w:rPr");
|
|
3849
|
+
pPr.appendChild(rPr);
|
|
3850
|
+
}
|
|
3851
|
+
const del_mark = this._create_track_change_tag("w:del");
|
|
3852
|
+
rPr.appendChild(del_mark);
|
|
3853
|
+
const children = Array.from(p2_element.childNodes);
|
|
3854
|
+
for (const child of children) {
|
|
3855
|
+
if (child.nodeType === 1 && child.tagName === "w:pPr") {
|
|
3856
|
+
continue;
|
|
3857
|
+
}
|
|
3858
|
+
p1_element.appendChild(child);
|
|
3859
|
+
}
|
|
3860
|
+
if (p2_element.parentNode) {
|
|
3861
|
+
p2_element.parentNode.removeChild(p2_element);
|
|
3862
|
+
}
|
|
3863
|
+
}
|
|
3864
|
+
}
|
|
2788
3865
|
}
|
|
2789
3866
|
}
|
|
2790
|
-
if (
|
|
2791
|
-
|
|
2792
|
-
|
|
3867
|
+
if (edit.comment && first_del !== null) {
|
|
3868
|
+
let end_anchor_el;
|
|
3869
|
+
let end_p;
|
|
3870
|
+
if (mod_last_p && mod_last_ins) {
|
|
3871
|
+
end_anchor_el = mod_last_ins;
|
|
3872
|
+
end_p = mod_last_p;
|
|
3873
|
+
} else {
|
|
3874
|
+
const final_anchor = ins_elem !== null ? ins_elem : last_del;
|
|
3875
|
+
end_anchor_el = final_anchor;
|
|
3876
|
+
end_p = final_anchor;
|
|
3877
|
+
while (end_p && end_p.tagName !== "w:p")
|
|
3878
|
+
end_p = end_p.parentNode;
|
|
3879
|
+
}
|
|
3880
|
+
let start_p = first_del;
|
|
3881
|
+
while (start_p && start_p.tagName !== "w:p")
|
|
3882
|
+
start_p = start_p.parentNode;
|
|
3883
|
+
if (!start_p || !end_p) return true;
|
|
3884
|
+
const ascend_to_paragraph_child = (el, p) => {
|
|
3885
|
+
let cur = el;
|
|
3886
|
+
while (cur.parentNode && cur.parentNode !== p) {
|
|
3887
|
+
cur = cur.parentNode;
|
|
3888
|
+
}
|
|
3889
|
+
return cur;
|
|
3890
|
+
};
|
|
3891
|
+
const start_anchor = ascend_to_paragraph_child(first_del, start_p);
|
|
3892
|
+
const end_anchor = ascend_to_paragraph_child(end_anchor_el, end_p);
|
|
3893
|
+
if (start_p === end_p) {
|
|
3894
|
+
this._attach_comment(start_p, start_anchor, end_anchor, edit.comment);
|
|
3895
|
+
} else {
|
|
3896
|
+
this._attach_comment_spanning(
|
|
3897
|
+
start_p,
|
|
3898
|
+
start_anchor,
|
|
3899
|
+
end_p,
|
|
3900
|
+
end_anchor,
|
|
3901
|
+
edit.comment
|
|
3902
|
+
);
|
|
3903
|
+
}
|
|
3904
|
+
}
|
|
3905
|
+
for (const p_elem of affected_ps) {
|
|
3906
|
+
let has_visible = false;
|
|
3907
|
+
for (const tag of ["w:t", "w:tab", "w:br"]) {
|
|
3908
|
+
const nodes = findAllDescendants(p_elem, tag);
|
|
3909
|
+
for (const node of nodes) {
|
|
3910
|
+
let is_deleted = false;
|
|
3911
|
+
let curr = node.parentNode;
|
|
3912
|
+
while (curr && curr !== p_elem.parentNode) {
|
|
3913
|
+
if (curr.tagName === "w:del") {
|
|
3914
|
+
is_deleted = true;
|
|
3915
|
+
break;
|
|
3916
|
+
}
|
|
3917
|
+
curr = curr.parentNode;
|
|
3918
|
+
}
|
|
3919
|
+
if (!is_deleted) {
|
|
3920
|
+
if (tag === "w:t" && !node.textContent) continue;
|
|
3921
|
+
has_visible = true;
|
|
3922
|
+
break;
|
|
3923
|
+
}
|
|
3924
|
+
}
|
|
3925
|
+
if (has_visible) break;
|
|
3926
|
+
}
|
|
3927
|
+
if (!has_visible) {
|
|
3928
|
+
let pPr = findChild(p_elem, "w:pPr");
|
|
3929
|
+
if (!pPr) {
|
|
3930
|
+
pPr = p_elem.ownerDocument.createElement("w:pPr");
|
|
3931
|
+
p_elem.insertBefore(pPr, p_elem.firstChild);
|
|
3932
|
+
}
|
|
3933
|
+
let rPr = findChild(pPr, "w:rPr");
|
|
3934
|
+
if (!rPr) {
|
|
3935
|
+
rPr = p_elem.ownerDocument.createElement("w:rPr");
|
|
3936
|
+
pPr.appendChild(rPr);
|
|
3937
|
+
}
|
|
3938
|
+
if (!findChild(rPr, "w:del")) {
|
|
3939
|
+
const del_mark = this._create_track_change_tag("w:del");
|
|
3940
|
+
rPr.appendChild(del_mark);
|
|
3941
|
+
}
|
|
3942
|
+
}
|
|
2793
3943
|
}
|
|
3944
|
+
return true;
|
|
2794
3945
|
}
|
|
2795
|
-
|
|
2796
|
-
let result = markdown_text;
|
|
2797
|
-
for (const [start, end, actual_text, edit, orig_idx] of matched_edits_filtered) {
|
|
2798
|
-
const new_txt = edit.new_text || "";
|
|
2799
|
-
const [prefix_len, suffix_len] = trim_common_context(actual_text, new_txt);
|
|
2800
|
-
const unmodified_prefix = prefix_len > 0 ? actual_text.substring(0, prefix_len) : "";
|
|
2801
|
-
const unmodified_suffix = suffix_len > 0 ? actual_text.substring(actual_text.length - suffix_len) : "";
|
|
2802
|
-
const t_end = actual_text.length - suffix_len;
|
|
2803
|
-
const n_end = new_txt.length - suffix_len;
|
|
2804
|
-
const isolated_target = actual_text.substring(prefix_len, t_end);
|
|
2805
|
-
const isolated_new = new_txt.substring(prefix_len, n_end);
|
|
2806
|
-
const markup = _build_critic_markup(
|
|
2807
|
-
isolated_target,
|
|
2808
|
-
isolated_new,
|
|
2809
|
-
edit.comment,
|
|
2810
|
-
orig_idx,
|
|
2811
|
-
include_index,
|
|
2812
|
-
highlight_only
|
|
2813
|
-
);
|
|
2814
|
-
const full_replacement = unmodified_prefix + markup + unmodified_suffix;
|
|
2815
|
-
result = result.substring(0, start) + full_replacement + result.substring(end);
|
|
2816
|
-
}
|
|
2817
|
-
return result;
|
|
2818
|
-
}
|
|
3946
|
+
};
|
|
2819
3947
|
|
|
2820
3948
|
// src/pagination.ts
|
|
2821
3949
|
var PAGE_TARGET_CHARS = 19e3;
|
|
@@ -3608,7 +4736,12 @@ function extract_outline(doc, projected_body, body_pages, body_page_offsets, par
|
|
|
3608
4736
|
const level = _heading_level(paragraph);
|
|
3609
4737
|
const text = _heading_text(paragraph, comments_map);
|
|
3610
4738
|
const style = _determine_heading_style(paragraph);
|
|
3611
|
-
const owned_end = _find_owned_end(
|
|
4739
|
+
const owned_end = _find_owned_end(
|
|
4740
|
+
block_records,
|
|
4741
|
+
heading_indices,
|
|
4742
|
+
h_pos,
|
|
4743
|
+
level
|
|
4744
|
+
);
|
|
3612
4745
|
const owned_blocks = block_records.slice(rec_idx + 1, owned_end);
|
|
3613
4746
|
const has_table = _direct_has_table(block_records, rec_idx + 1, owned_end);
|
|
3614
4747
|
const footnote_ids = _collect_footnote_ids(owned_blocks);
|
|
@@ -3655,7 +4788,13 @@ function _walk_doc_body(doc, comments_map) {
|
|
|
3655
4788
|
const p_text = build_paragraph_text(item, comments_map, false);
|
|
3656
4789
|
const block_len = (prefix + p_text).length;
|
|
3657
4790
|
if (!is_first_block) cursor += 2;
|
|
3658
|
-
records.push({
|
|
4791
|
+
records.push({
|
|
4792
|
+
item,
|
|
4793
|
+
is_paragraph: true,
|
|
4794
|
+
is_table: false,
|
|
4795
|
+
start_offset: cursor,
|
|
4796
|
+
projected_length: block_len
|
|
4797
|
+
});
|
|
3659
4798
|
cursor += block_len;
|
|
3660
4799
|
is_first_block = false;
|
|
3661
4800
|
} else if (item instanceof Table) {
|
|
@@ -3663,7 +4802,13 @@ function _walk_doc_body(doc, comments_map) {
|
|
|
3663
4802
|
const block_len = table_text ? table_text.length : 0;
|
|
3664
4803
|
if (!is_first_block) cursor += 2;
|
|
3665
4804
|
const table_start = cursor;
|
|
3666
|
-
records.push({
|
|
4805
|
+
records.push({
|
|
4806
|
+
item,
|
|
4807
|
+
is_paragraph: false,
|
|
4808
|
+
is_table: true,
|
|
4809
|
+
start_offset: table_start,
|
|
4810
|
+
projected_length: block_len
|
|
4811
|
+
});
|
|
3667
4812
|
_record_table_inner_blocks_lite(item, table_start, records, comments_map);
|
|
3668
4813
|
cursor += block_len;
|
|
3669
4814
|
is_first_block = false;
|
|
@@ -3683,7 +4828,12 @@ function _compute_inner_block_offset(table, target_paragraph, table_start_offset
|
|
|
3683
4828
|
if (seen_cells.has(cell)) continue;
|
|
3684
4829
|
seen_cells.add(cell);
|
|
3685
4830
|
if (cells_in_row > 0) cursor += 3;
|
|
3686
|
-
const [new_cursor, found] = _walk_cell_for_offset(
|
|
4831
|
+
const [new_cursor, found] = _walk_cell_for_offset(
|
|
4832
|
+
cell,
|
|
4833
|
+
target_el,
|
|
4834
|
+
cursor,
|
|
4835
|
+
comments_map
|
|
4836
|
+
);
|
|
3687
4837
|
if (found) return new_cursor;
|
|
3688
4838
|
cursor = new_cursor;
|
|
3689
4839
|
cells_in_row++;
|
|
@@ -3703,9 +4853,15 @@ function _walk_cell_for_offset(cell, target_el, cell_start_cursor, comments_map)
|
|
|
3703
4853
|
const p_text = build_paragraph_text(inner_item, comments_map, false);
|
|
3704
4854
|
cursor += (prefix + p_text).length;
|
|
3705
4855
|
} else if (inner_item instanceof Table) {
|
|
3706
|
-
const nested_offset = _compute_inner_block_offset(
|
|
4856
|
+
const nested_offset = _compute_inner_block_offset(
|
|
4857
|
+
inner_item,
|
|
4858
|
+
new Paragraph(target_el, null),
|
|
4859
|
+
cursor,
|
|
4860
|
+
comments_map
|
|
4861
|
+
);
|
|
3707
4862
|
if (nested_offset !== cursor) {
|
|
3708
|
-
if (_element_is_descendant(target_el, inner_item._element))
|
|
4863
|
+
if (_element_is_descendant(target_el, inner_item._element))
|
|
4864
|
+
return [nested_offset, true];
|
|
3709
4865
|
}
|
|
3710
4866
|
const table_text = extract_table(inner_item, comments_map, false, 0);
|
|
3711
4867
|
cursor += table_text ? table_text.length : 0;
|
|
@@ -3730,11 +4886,33 @@ function _record_table_inner_blocks_lite(table, inherited_offset, records, comme
|
|
|
3730
4886
|
seen_cells.add(cell);
|
|
3731
4887
|
for (const inner_item of iter_block_items(cell)) {
|
|
3732
4888
|
if (inner_item instanceof Paragraph) {
|
|
3733
|
-
const true_offset = _is_heading(inner_item) ? _compute_inner_block_offset(
|
|
3734
|
-
|
|
4889
|
+
const true_offset = _is_heading(inner_item) ? _compute_inner_block_offset(
|
|
4890
|
+
table,
|
|
4891
|
+
inner_item,
|
|
4892
|
+
inherited_offset,
|
|
4893
|
+
comments_map
|
|
4894
|
+
) : inherited_offset;
|
|
4895
|
+
records.push({
|
|
4896
|
+
item: inner_item,
|
|
4897
|
+
is_paragraph: true,
|
|
4898
|
+
is_table: false,
|
|
4899
|
+
start_offset: true_offset,
|
|
4900
|
+
projected_length: 0
|
|
4901
|
+
});
|
|
3735
4902
|
} else if (inner_item instanceof Table) {
|
|
3736
|
-
records.push({
|
|
3737
|
-
|
|
4903
|
+
records.push({
|
|
4904
|
+
item: inner_item,
|
|
4905
|
+
is_paragraph: false,
|
|
4906
|
+
is_table: true,
|
|
4907
|
+
start_offset: inherited_offset,
|
|
4908
|
+
projected_length: 0
|
|
4909
|
+
});
|
|
4910
|
+
_record_table_inner_blocks_lite(
|
|
4911
|
+
inner_item,
|
|
4912
|
+
inherited_offset,
|
|
4913
|
+
records,
|
|
4914
|
+
comments_map
|
|
4915
|
+
);
|
|
3738
4916
|
}
|
|
3739
4917
|
}
|
|
3740
4918
|
}
|
|
@@ -3755,7 +4933,8 @@ ${header}`);
|
|
|
3755
4933
|
if (fn_text) blocks.push(fn_text);
|
|
3756
4934
|
} else if (item instanceof Paragraph) {
|
|
3757
4935
|
let prefix = get_paragraph_prefix(item);
|
|
3758
|
-
if (is_first_para && c_type === "FootnoteItem")
|
|
4936
|
+
if (is_first_para && c_type === "FootnoteItem")
|
|
4937
|
+
prefix = `[^${part.note_type}-${part.id}]: ${prefix}`;
|
|
3759
4938
|
const p_text = build_paragraph_text(item, comments_map, false);
|
|
3760
4939
|
blocks.push(prefix + p_text);
|
|
3761
4940
|
is_first_para = false;
|
|
@@ -3804,37 +4983,51 @@ function _strip_inline_formatting(text) {
|
|
|
3804
4983
|
return text;
|
|
3805
4984
|
}
|
|
3806
4985
|
function _determine_heading_style(paragraph) {
|
|
3807
|
-
const [style_cache, default_pstyle] = _get_style_cache(
|
|
4986
|
+
const [style_cache, default_pstyle] = _get_style_cache(
|
|
4987
|
+
paragraph._parent.part || paragraph._parent
|
|
4988
|
+
);
|
|
3808
4989
|
const pPr = findChild(paragraph._element, "w:pPr");
|
|
3809
4990
|
let style_id = default_pstyle;
|
|
3810
4991
|
if (pPr) {
|
|
3811
|
-
const oLvl = findChild(pPr, "w:outlineLvl");
|
|
3812
|
-
if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
|
|
3813
|
-
const style = _safe_style_name(paragraph, style_cache, default_pstyle);
|
|
3814
|
-
if (style && (style.startsWith("Heading") || style === "Title")) return style;
|
|
3815
|
-
return "(outline_level)";
|
|
3816
|
-
}
|
|
3817
4992
|
const pStyle = findChild(pPr, "w:pStyle");
|
|
3818
4993
|
if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
|
|
3819
4994
|
}
|
|
3820
|
-
|
|
3821
|
-
if (style_name && (style_name.startsWith("Heading") || style_name === "Title")) return style_name;
|
|
3822
|
-
if (style_name && /Heading[ ]?([1-6])(?![0-9])/.test(style_name)) return style_name;
|
|
3823
|
-
return "(heuristic)";
|
|
3824
|
-
}
|
|
3825
|
-
function _safe_style_name(paragraph, style_cache, default_pstyle) {
|
|
3826
|
-
const pPr = findChild(paragraph._element, "w:pPr");
|
|
3827
|
-
let style_id = default_pstyle;
|
|
4995
|
+
let outline_level = null;
|
|
3828
4996
|
if (pPr) {
|
|
3829
|
-
const
|
|
3830
|
-
if (
|
|
4997
|
+
const oLvl = findChild(pPr, "w:outlineLvl");
|
|
4998
|
+
if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
|
|
4999
|
+
outline_level = parseInt(oLvl.getAttribute("w:val"), 10);
|
|
5000
|
+
}
|
|
5001
|
+
}
|
|
5002
|
+
if (outline_level === null && style_id && style_cache && style_cache[style_id]) {
|
|
5003
|
+
outline_level = style_cache[style_id].outline_level;
|
|
5004
|
+
}
|
|
5005
|
+
const style_name = style_id && style_cache && style_cache[style_id] ? style_cache[style_id].name : style_id;
|
|
5006
|
+
let normalized_style_name = style_name;
|
|
5007
|
+
if (normalized_style_name && typeof normalized_style_name === "string") {
|
|
5008
|
+
if (normalized_style_name.toLowerCase().startsWith("heading")) {
|
|
5009
|
+
normalized_style_name = normalized_style_name.replace(/^heading/i, "Heading");
|
|
5010
|
+
} else if (normalized_style_name.toLowerCase() === "title") {
|
|
5011
|
+
normalized_style_name = "Title";
|
|
5012
|
+
}
|
|
5013
|
+
}
|
|
5014
|
+
if (outline_level !== null && outline_level >= 0 && outline_level <= 8) {
|
|
5015
|
+
if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")) {
|
|
5016
|
+
return normalized_style_name;
|
|
5017
|
+
}
|
|
5018
|
+
return "(outline_level)";
|
|
3831
5019
|
}
|
|
3832
|
-
|
|
5020
|
+
if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title"))
|
|
5021
|
+
return normalized_style_name;
|
|
5022
|
+
if (normalized_style_name && /Heading[ ]?([1-6])(?![0-9])/.test(normalized_style_name))
|
|
5023
|
+
return normalized_style_name;
|
|
5024
|
+
return "(heuristic)";
|
|
3833
5025
|
}
|
|
3834
5026
|
function _find_owned_end(block_records, heading_indices, current_h_pos, current_level) {
|
|
3835
5027
|
for (let next_h_pos = current_h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
|
|
3836
5028
|
const next_idx = heading_indices[next_h_pos];
|
|
3837
|
-
if (_heading_level(block_records[next_idx].item) <= current_level)
|
|
5029
|
+
if (_heading_level(block_records[next_idx].item) <= current_level)
|
|
5030
|
+
return next_idx;
|
|
3838
5031
|
}
|
|
3839
5032
|
return block_records.length;
|
|
3840
5033
|
}
|
|
@@ -3965,7 +5158,7 @@ var SanitizeReport = class {
|
|
|
3965
5158
|
if (this.warnings.length > 0) {
|
|
3966
5159
|
lines.push(`Result: CLEAN WITH WARNINGS (${this.warnings.length} warning${this.warnings.length > 1 ? "s" : ""})`);
|
|
3967
5160
|
} else {
|
|
3968
|
-
lines.push(
|
|
5161
|
+
lines.push(`Result: CLEAN (${this.tracked_changes_found} changes resolved, ${this.comments_removed} comments removed)`);
|
|
3969
5162
|
}
|
|
3970
5163
|
lines.push(sep);
|
|
3971
5164
|
return lines.join("\n");
|
|
@@ -3984,6 +5177,83 @@ function findDescendantsByLocalName(element, localName) {
|
|
|
3984
5177
|
}
|
|
3985
5178
|
return result;
|
|
3986
5179
|
}
|
|
5180
|
+
function coalesce_runs(doc) {
|
|
5181
|
+
let count = 0;
|
|
5182
|
+
function areRunsIdentical(rPr1, rPr2) {
|
|
5183
|
+
const xml1 = rPr1 ? rPr1.toString() : "";
|
|
5184
|
+
const xml2 = rPr2 ? rPr2.toString() : "";
|
|
5185
|
+
return xml1 === xml2;
|
|
5186
|
+
}
|
|
5187
|
+
function hasSpecialContent(run) {
|
|
5188
|
+
const safeTags = ["w:t", "w:tab", "w:br", "w:cr", "w:delText", "w:rPr"];
|
|
5189
|
+
for (let i = 0; i < run.childNodes.length; i++) {
|
|
5190
|
+
const child = run.childNodes[i];
|
|
5191
|
+
if (child.nodeType === 1) {
|
|
5192
|
+
const tag = child.tagName;
|
|
5193
|
+
if (!safeTags.includes(tag)) return true;
|
|
5194
|
+
}
|
|
5195
|
+
}
|
|
5196
|
+
return false;
|
|
5197
|
+
}
|
|
5198
|
+
function coalesceContainer(container) {
|
|
5199
|
+
const children = Array.from(container.childNodes).filter((n) => n.nodeType === 1);
|
|
5200
|
+
let i = 0;
|
|
5201
|
+
while (i < children.length - 1) {
|
|
5202
|
+
const curr = children[i];
|
|
5203
|
+
const nxt = children[i + 1];
|
|
5204
|
+
if (curr.tagName === "w:r" && nxt.tagName === "w:r") {
|
|
5205
|
+
if (!hasSpecialContent(curr) && !hasSpecialContent(nxt)) {
|
|
5206
|
+
const rPr1 = findChild(curr, "w:rPr");
|
|
5207
|
+
const rPr2 = findChild(nxt, "w:rPr");
|
|
5208
|
+
if (areRunsIdentical(rPr1, rPr2)) {
|
|
5209
|
+
let last_t = null;
|
|
5210
|
+
for (let c = 0; c < curr.childNodes.length; c++) {
|
|
5211
|
+
const child = curr.childNodes[c];
|
|
5212
|
+
if (child.nodeType === 1 && (child.tagName === "w:t" || child.tagName === "w:delText")) {
|
|
5213
|
+
last_t = child;
|
|
5214
|
+
}
|
|
5215
|
+
}
|
|
5216
|
+
const nxtChildren = Array.from(nxt.childNodes).filter((n) => n.nodeType === 1);
|
|
5217
|
+
for (const child of nxtChildren) {
|
|
5218
|
+
if (child.tagName === "w:rPr") continue;
|
|
5219
|
+
if ((child.tagName === "w:t" || child.tagName === "w:delText") && last_t && last_t.tagName === child.tagName) {
|
|
5220
|
+
const t1 = last_t.textContent || "";
|
|
5221
|
+
const t2 = child.textContent || "";
|
|
5222
|
+
const combined = t1 + t2;
|
|
5223
|
+
last_t.textContent = combined;
|
|
5224
|
+
if (combined.trim() !== combined) {
|
|
5225
|
+
last_t.setAttribute("xml:space", "preserve");
|
|
5226
|
+
}
|
|
5227
|
+
} else {
|
|
5228
|
+
curr.appendChild(child);
|
|
5229
|
+
if (child.tagName === "w:t" || child.tagName === "w:delText") {
|
|
5230
|
+
last_t = child;
|
|
5231
|
+
}
|
|
5232
|
+
}
|
|
5233
|
+
}
|
|
5234
|
+
container.removeChild(nxt);
|
|
5235
|
+
children.splice(i + 1, 1);
|
|
5236
|
+
count++;
|
|
5237
|
+
continue;
|
|
5238
|
+
}
|
|
5239
|
+
}
|
|
5240
|
+
}
|
|
5241
|
+
if (["w:ins", "w:del", "w:hyperlink", "w:sdt", "w:smartTag", "w:fldSimple", "w:sdtContent"].includes(curr.tagName)) {
|
|
5242
|
+
coalesceContainer(curr);
|
|
5243
|
+
}
|
|
5244
|
+
i++;
|
|
5245
|
+
}
|
|
5246
|
+
if (children.length > 0) {
|
|
5247
|
+
const last = children[children.length - 1];
|
|
5248
|
+
if (["w:ins", "w:del", "w:hyperlink", "w:sdt", "w:smartTag", "w:fldSimple", "w:sdtContent"].includes(last.tagName)) {
|
|
5249
|
+
coalesceContainer(last);
|
|
5250
|
+
}
|
|
5251
|
+
}
|
|
5252
|
+
}
|
|
5253
|
+
const paragraphs = findAllDescendants(doc.element, "w:p");
|
|
5254
|
+
for (const p of paragraphs) coalesceContainer(p);
|
|
5255
|
+
return count ? [`Adjacent identical runs coalesced: ${count}`] : [];
|
|
5256
|
+
}
|
|
3987
5257
|
function strip_rsid(doc) {
|
|
3988
5258
|
let count = 0;
|
|
3989
5259
|
const rsidAttrs = ["w:rsidR", "w:rsidRPr", "w:rsidRDefault", "w:rsidP", "w:rsidDel", "w:rsidSect", "w:rsidTr"];
|
|
@@ -4339,6 +5609,7 @@ async function finalize_document(doc, options) {
|
|
|
4339
5609
|
report.add_transform_lines(strip_proof_errors(doc));
|
|
4340
5610
|
report.add_transform_lines(strip_empty_properties(doc));
|
|
4341
5611
|
report.add_transform_lines(strip_hidden_text(doc));
|
|
5612
|
+
report.add_transform_lines(coalesce_runs(doc));
|
|
4342
5613
|
report.add_transform_lines(scrub_doc_properties(doc));
|
|
4343
5614
|
report.add_transform_lines(scrub_timestamps(doc));
|
|
4344
5615
|
report.add_transform_lines(strip_custom_xml(doc));
|
|
@@ -4366,6 +5637,26 @@ async function finalize_document(doc, options) {
|
|
|
4366
5637
|
if (options.export_pdf) {
|
|
4367
5638
|
report.warnings.push("PDF export requires the Python/Word COM environment and is skipped in this zero-dependency Node agent.");
|
|
4368
5639
|
}
|
|
5640
|
+
for (const part of doc.pkg.parts) {
|
|
5641
|
+
if (part === doc.part || part.contentType.includes("wordprocessingml") && part.contentType.endsWith("+xml")) {
|
|
5642
|
+
if (part._element.hasAttribute("xmlns:w16du")) {
|
|
5643
|
+
let hasW16du = false;
|
|
5644
|
+
if (Array.from(part._element.attributes || []).some((a) => a.name.startsWith("w16du:") && a.name !== "xmlns:w16du")) {
|
|
5645
|
+
hasW16du = true;
|
|
5646
|
+
}
|
|
5647
|
+
if (!hasW16du) {
|
|
5648
|
+
const allNodes = findAllDescendants(part._element, "*");
|
|
5649
|
+
for (const n of allNodes) {
|
|
5650
|
+
if (n.tagName.startsWith("w16du:") || Array.from(n.attributes || []).some((a) => a.name.startsWith("w16du:"))) {
|
|
5651
|
+
hasW16du = true;
|
|
5652
|
+
break;
|
|
5653
|
+
}
|
|
5654
|
+
}
|
|
5655
|
+
}
|
|
5656
|
+
if (!hasW16du) part._element.removeAttribute("xmlns:w16du");
|
|
5657
|
+
}
|
|
5658
|
+
}
|
|
5659
|
+
}
|
|
4369
5660
|
if (report.warnings.length > 0) report.status = "clean_with_warnings";
|
|
4370
5661
|
const outBuffer = await doc.save();
|
|
4371
5662
|
return { reportText: report.render(), outBuffer };
|
|
@@ -4382,6 +5673,7 @@ export {
|
|
|
4382
5673
|
RedlineEngine,
|
|
4383
5674
|
apply_edits_to_markdown,
|
|
4384
5675
|
create_unified_diff,
|
|
5676
|
+
create_word_patch_diff,
|
|
4385
5677
|
extractTextFromBuffer,
|
|
4386
5678
|
extract_outline,
|
|
4387
5679
|
finalize_document,
|