@adeu/core 1.6.8 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -0
- package/dist/index.cjs +1833 -540
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +75 -1
- package/dist/index.d.ts +75 -1
- package/dist/index.js +1832 -540
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/consistency.test.ts +134 -0
- package/src/diff.test.ts +13 -1
- package/src/diff.ts +189 -70
- package/src/docx/bridge.ts +99 -57
- package/src/docx/dom.ts +66 -7
- package/src/engine.bugs.test.ts +481 -0
- package/src/engine.ts +1346 -192
- package/src/index.ts +1 -1
- package/src/markup.ts +160 -53
- package/src/outline.ts +199 -69
- package/src/sanitize/core.ts +26 -0
- package/src/sanitize/report.ts +1 -1
- package/src/sanitize/sanitize.test.ts +47 -2
- package/src/sanitize/transforms.ts +87 -0
- package/src/utils/docx.ts +282 -157
package/dist/index.cjs
CHANGED
|
@@ -36,6 +36,7 @@ __export(index_exports, {
|
|
|
36
36
|
RedlineEngine: () => RedlineEngine,
|
|
37
37
|
apply_edits_to_markdown: () => apply_edits_to_markdown,
|
|
38
38
|
create_unified_diff: () => create_unified_diff,
|
|
39
|
+
create_word_patch_diff: () => create_word_patch_diff,
|
|
39
40
|
extractTextFromBuffer: () => extractTextFromBuffer,
|
|
40
41
|
extract_outline: () => extract_outline,
|
|
41
42
|
finalize_document: () => finalize_document,
|
|
@@ -78,7 +79,38 @@ function parseXml(xmlString) {
|
|
|
78
79
|
return new import_xmldom.DOMParser().parseFromString(xmlString, "text/xml");
|
|
79
80
|
}
|
|
80
81
|
function serializeXml(node) {
|
|
81
|
-
|
|
82
|
+
let xml = new import_xmldom.XMLSerializer().serializeToString(node);
|
|
83
|
+
const rootTagRegex = /<([a-zA-Z0-9_:]+)(\s+[^>]+?)(>|\/>)/;
|
|
84
|
+
const match = rootTagRegex.exec(xml);
|
|
85
|
+
if (match && !match[1].startsWith("?")) {
|
|
86
|
+
const index = match.index;
|
|
87
|
+
const textBefore = xml.substring(0, index);
|
|
88
|
+
const isRoot = !textBefore.includes("<") || textBefore.trim().startsWith("<?xml") && (textBefore.match(/</g) || []).length === 1;
|
|
89
|
+
if (isRoot) {
|
|
90
|
+
const fullTag = match[0];
|
|
91
|
+
const elemStart = `<${match[1]}`;
|
|
92
|
+
const attrsStr = match[2];
|
|
93
|
+
const tagEnd = match[3];
|
|
94
|
+
const attrRegex = /([a-zA-Z0-9_:]+)\s*=\s*(["'])(.*?)\2/g;
|
|
95
|
+
const attrs = [];
|
|
96
|
+
let m;
|
|
97
|
+
while ((m = attrRegex.exec(attrsStr)) !== null) {
|
|
98
|
+
attrs.push(m[0].trim());
|
|
99
|
+
}
|
|
100
|
+
attrs.sort((a, b) => {
|
|
101
|
+
const aName = a.split("=")[0].trim();
|
|
102
|
+
const bName = b.split("=")[0].trim();
|
|
103
|
+
const aIsXmlns = aName.startsWith("xmlns");
|
|
104
|
+
const bIsXmlns = bName.startsWith("xmlns");
|
|
105
|
+
if (aIsXmlns && !bIsXmlns) return -1;
|
|
106
|
+
if (!aIsXmlns && bIsXmlns) return 1;
|
|
107
|
+
return aName < bName ? -1 : aName > bName ? 1 : 0;
|
|
108
|
+
});
|
|
109
|
+
const newTag = attrs.length > 0 ? `${elemStart} ${attrs.join(" ")}${tagEnd}` : `${elemStart}${tagEnd}`;
|
|
110
|
+
xml = xml.substring(0, index) + newTag + xml.substring(index + fullTag.length);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
return xml;
|
|
82
114
|
}
|
|
83
115
|
|
|
84
116
|
// src/docx/bridge.ts
|
|
@@ -106,12 +138,16 @@ var Part = class {
|
|
|
106
138
|
contentType;
|
|
107
139
|
rels = /* @__PURE__ */ new Map();
|
|
108
140
|
_element;
|
|
141
|
+
package;
|
|
109
142
|
addRelationship(id, type, target, isExternal = false) {
|
|
110
143
|
this.rels.set(id, new Relationship(id, type, target, isExternal));
|
|
111
144
|
if (this.partname.endsWith(".rels")) {
|
|
112
145
|
const doc = this._element.ownerDocument;
|
|
113
146
|
if (doc) {
|
|
114
|
-
const relEl = doc.createElementNS(
|
|
147
|
+
const relEl = doc.createElementNS(
|
|
148
|
+
"http://schemas.openxmlformats.org/package/2006/relationships",
|
|
149
|
+
"Relationship"
|
|
150
|
+
);
|
|
115
151
|
relEl.setAttribute("Id", id);
|
|
116
152
|
relEl.setAttribute("Type", type);
|
|
117
153
|
relEl.setAttribute("Target", target);
|
|
@@ -130,7 +166,9 @@ var DocxPackage = class {
|
|
|
130
166
|
mainDocumentPart;
|
|
131
167
|
getPartByPath(path) {
|
|
132
168
|
const searchPath = path.startsWith("/") ? path.substring(1) : path;
|
|
133
|
-
return this.parts.find(
|
|
169
|
+
return this.parts.find(
|
|
170
|
+
(p) => p.partname === searchPath || p.partname === "/" + searchPath
|
|
171
|
+
);
|
|
134
172
|
}
|
|
135
173
|
nextPartname(pattern) {
|
|
136
174
|
let i = 1;
|
|
@@ -142,7 +180,13 @@ var DocxPackage = class {
|
|
|
142
180
|
}
|
|
143
181
|
addPart(partname, contentType, xmlString) {
|
|
144
182
|
const doc = parseXml(xmlString);
|
|
145
|
-
const part = new Part(
|
|
183
|
+
const part = new Part(
|
|
184
|
+
partname,
|
|
185
|
+
xmlString,
|
|
186
|
+
doc.documentElement,
|
|
187
|
+
contentType
|
|
188
|
+
);
|
|
189
|
+
part.package = this;
|
|
146
190
|
this.parts.push(part);
|
|
147
191
|
const ctPart = this.getPartByPath("[Content_Types].xml");
|
|
148
192
|
if (ctPart) {
|
|
@@ -164,7 +208,11 @@ var DocxPackage = class {
|
|
|
164
208
|
if (!relsPart) {
|
|
165
209
|
const xml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
166
210
|
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"></Relationships>`;
|
|
167
|
-
relsPart = this.addPart(
|
|
211
|
+
relsPart = this.addPart(
|
|
212
|
+
relsPath,
|
|
213
|
+
"application/vnd.openxmlformats-package.relationships+xml",
|
|
214
|
+
xml
|
|
215
|
+
);
|
|
168
216
|
}
|
|
169
217
|
return relsPart;
|
|
170
218
|
}
|
|
@@ -176,7 +224,9 @@ var DocumentObject = class _DocumentObject {
|
|
|
176
224
|
}
|
|
177
225
|
pkg;
|
|
178
226
|
part;
|
|
179
|
-
settings = {
|
|
227
|
+
settings = {
|
|
228
|
+
oddAndEvenPagesHeaderFooter: false
|
|
229
|
+
};
|
|
180
230
|
// Simplification for the TS port: sections hold header/footer refs
|
|
181
231
|
sections = [];
|
|
182
232
|
get element() {
|
|
@@ -203,6 +253,7 @@ var DocumentObject = class _DocumentObject {
|
|
|
203
253
|
const doc = parseXml(text);
|
|
204
254
|
const cType = contentTypes["/" + path] || "application/xml";
|
|
205
255
|
const part = new Part("/" + path, text, doc.documentElement, cType);
|
|
256
|
+
part.package = pkg;
|
|
206
257
|
pkg.parts.push(part);
|
|
207
258
|
}
|
|
208
259
|
}
|
|
@@ -218,7 +269,10 @@ var DocumentObject = class _DocumentObject {
|
|
|
218
269
|
const type = rel.getAttribute("Type");
|
|
219
270
|
const targetMode = rel.getAttribute("TargetMode");
|
|
220
271
|
if (rId && target && type) {
|
|
221
|
-
mainPart.rels.set(
|
|
272
|
+
mainPart.rels.set(
|
|
273
|
+
rId,
|
|
274
|
+
new Relationship(rId, type, target, targetMode === "External")
|
|
275
|
+
);
|
|
222
276
|
}
|
|
223
277
|
}
|
|
224
278
|
}
|
|
@@ -784,7 +838,14 @@ function _get_style_cache(part) {
|
|
|
784
838
|
const is_default = s.getAttribute("w:default") === "1" || s.getAttribute("w:default") === "true";
|
|
785
839
|
if (s_type === "paragraph" && is_default) default_pstyle = s_id;
|
|
786
840
|
const name_el = findChild(s, "w:name");
|
|
787
|
-
|
|
841
|
+
let name = name_el ? name_el.getAttribute("w:val") : s_id;
|
|
842
|
+
if (name && typeof name === "string") {
|
|
843
|
+
if (name.toLowerCase().startsWith("heading")) {
|
|
844
|
+
name = name.replace(/^heading/i, "Heading");
|
|
845
|
+
} else if (name.toLowerCase() === "title") {
|
|
846
|
+
name = "Title";
|
|
847
|
+
}
|
|
848
|
+
}
|
|
788
849
|
const based_on_el = findChild(s, "w:basedOn");
|
|
789
850
|
const based_on = based_on_el ? based_on_el.getAttribute("w:val") : null;
|
|
790
851
|
let outline_lvl = null;
|
|
@@ -809,7 +870,8 @@ function _get_style_cache(part) {
|
|
|
809
870
|
}
|
|
810
871
|
const resolve_style = (s_id, visited) => {
|
|
811
872
|
if (cache[s_id]) return cache[s_id];
|
|
812
|
-
if (visited.has(s_id) || !raw_styles[s_id])
|
|
873
|
+
if (visited.has(s_id) || !raw_styles[s_id])
|
|
874
|
+
return { name: s_id, outline_level: null, bold: false };
|
|
813
875
|
visited.add(s_id);
|
|
814
876
|
const raw = raw_styles[s_id];
|
|
815
877
|
const based_on_id = raw.based_on;
|
|
@@ -836,7 +898,9 @@ function _detect_heading_level_from_name(name) {
|
|
|
836
898
|
}
|
|
837
899
|
function is_native_heading(paragraph, style_cache, default_pstyle) {
|
|
838
900
|
if (!style_cache) {
|
|
839
|
-
[style_cache, default_pstyle] = _get_style_cache(
|
|
901
|
+
[style_cache, default_pstyle] = _get_style_cache(
|
|
902
|
+
paragraph._parent.part || paragraph._parent
|
|
903
|
+
);
|
|
840
904
|
}
|
|
841
905
|
const pPr = findChild(paragraph._element, QN_W_PPR);
|
|
842
906
|
if (pPr) {
|
|
@@ -858,7 +922,10 @@ function is_native_heading(paragraph, style_cache, default_pstyle) {
|
|
|
858
922
|
if (style_info && style_info.outline_level !== null && style_info.outline_level >= 0 && style_info.outline_level <= 8) {
|
|
859
923
|
return true;
|
|
860
924
|
}
|
|
861
|
-
|
|
925
|
+
let style_name = style_info ? style_info.name : style_id;
|
|
926
|
+
if (style_name && typeof style_name === "string" && style_name.toLowerCase().startsWith("heading")) {
|
|
927
|
+
style_name = style_name.replace(/^heading/i, "Heading");
|
|
928
|
+
}
|
|
862
929
|
if (style_name?.startsWith("Heading")) return true;
|
|
863
930
|
if (style_name === "Title") return true;
|
|
864
931
|
if (style_name && style_name !== "Normal") {
|
|
@@ -868,7 +935,9 @@ function is_native_heading(paragraph, style_cache, default_pstyle) {
|
|
|
868
935
|
}
|
|
869
936
|
function get_paragraph_prefix(paragraph, style_cache, default_pstyle) {
|
|
870
937
|
if (!style_cache) {
|
|
871
|
-
[style_cache, default_pstyle] = _get_style_cache(
|
|
938
|
+
[style_cache, default_pstyle] = _get_style_cache(
|
|
939
|
+
paragraph._parent.part || paragraph._parent
|
|
940
|
+
);
|
|
872
941
|
}
|
|
873
942
|
const pPr = findChild(paragraph._element, QN_W_PPR);
|
|
874
943
|
if (pPr) {
|
|
@@ -890,7 +959,10 @@ function get_paragraph_prefix(paragraph, style_cache, default_pstyle) {
|
|
|
890
959
|
if (style_info && style_info.outline_level !== null && style_info.outline_level >= 0 && style_info.outline_level <= 8) {
|
|
891
960
|
return "#".repeat(style_info.outline_level + 1) + " ";
|
|
892
961
|
}
|
|
893
|
-
|
|
962
|
+
let style_name = style_info ? style_info.name : style_id;
|
|
963
|
+
if (style_name && typeof style_name === "string" && style_name.toLowerCase().startsWith("heading")) {
|
|
964
|
+
style_name = style_name.replace(/^heading/i, "Heading");
|
|
965
|
+
}
|
|
894
966
|
if (style_name?.startsWith("Heading")) {
|
|
895
967
|
const match = style_name.replace("Heading", "").trim();
|
|
896
968
|
if (/^\d+$/.test(match)) return "#".repeat(parseInt(match, 10)) + " ";
|
|
@@ -957,9 +1029,11 @@ function get_run_style_markers(run, is_heading = null) {
|
|
|
957
1029
|
let is_italic = false;
|
|
958
1030
|
if (rPr) {
|
|
959
1031
|
const b = findChild(rPr, QN_W_B);
|
|
960
|
-
if (b && b.getAttribute(QN_W_VAL) !== "0" && b.getAttribute(QN_W_VAL) !== "false")
|
|
1032
|
+
if (b && b.getAttribute(QN_W_VAL) !== "0" && b.getAttribute(QN_W_VAL) !== "false")
|
|
1033
|
+
is_bold = true;
|
|
961
1034
|
const i = findChild(rPr, QN_W_I);
|
|
962
|
-
if (i && i.getAttribute(QN_W_VAL) !== "0" && i.getAttribute(QN_W_VAL) !== "false")
|
|
1035
|
+
if (i && i.getAttribute(QN_W_VAL) !== "0" && i.getAttribute(QN_W_VAL) !== "false")
|
|
1036
|
+
is_italic = true;
|
|
963
1037
|
}
|
|
964
1038
|
if (is_heading === null) {
|
|
965
1039
|
const parent = run._parent;
|
|
@@ -1004,7 +1078,8 @@ function* iter_block_items(parent) {
|
|
|
1004
1078
|
const tag = parent.note_type === "fn" ? "w:footnote" : "w:endnote";
|
|
1005
1079
|
const notes = findAllDescendants(parent_elm, tag);
|
|
1006
1080
|
for (const child of notes) {
|
|
1007
|
-
if (child.getAttribute("w:type") === "separator" || child.getAttribute("w:type") === "continuationSeparator")
|
|
1081
|
+
if (child.getAttribute("w:type") === "separator" || child.getAttribute("w:type") === "continuationSeparator")
|
|
1082
|
+
continue;
|
|
1008
1083
|
yield new FootnoteItem(child, parent, parent.note_type);
|
|
1009
1084
|
}
|
|
1010
1085
|
return;
|
|
@@ -1020,7 +1095,15 @@ function* iter_block_items(parent) {
|
|
|
1020
1095
|
}
|
|
1021
1096
|
}
|
|
1022
1097
|
function* iter_document_parts(doc) {
|
|
1098
|
+
const headers = doc.pkg.parts.filter(
|
|
1099
|
+
(p) => p.contentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"
|
|
1100
|
+
);
|
|
1101
|
+
for (const h of headers) yield h;
|
|
1023
1102
|
yield doc;
|
|
1103
|
+
const footers = doc.pkg.parts.filter(
|
|
1104
|
+
(p) => p.contentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"
|
|
1105
|
+
);
|
|
1106
|
+
for (const f of footers) yield f;
|
|
1024
1107
|
const fnPart = doc.pkg.getPartByPath("word/footnotes.xml");
|
|
1025
1108
|
const enPart = doc.pkg.getPartByPath("word/endnotes.xml");
|
|
1026
1109
|
if (fnPart) yield new NotesPart(fnPart, "fn");
|
|
@@ -1034,7 +1117,8 @@ function _is_page_instr(instr) {
|
|
|
1034
1117
|
function _get_part(parent) {
|
|
1035
1118
|
if (!parent) return null;
|
|
1036
1119
|
if (parent.part) return parent.part;
|
|
1037
|
-
if (parent.pkg && parent.pkg.mainDocumentPart)
|
|
1120
|
+
if (parent.pkg && parent.pkg.mainDocumentPart)
|
|
1121
|
+
return parent.pkg.mainDocumentPart;
|
|
1038
1122
|
if (parent._parent) return _get_part(parent._parent);
|
|
1039
1123
|
return null;
|
|
1040
1124
|
}
|
|
@@ -1049,7 +1133,12 @@ function* iter_paragraph_content(paragraph) {
|
|
|
1049
1133
|
const rPrChange = findChild(rPr, QN_W_RPRCHANGE);
|
|
1050
1134
|
if (rPrChange) {
|
|
1051
1135
|
c_id = rPrChange.getAttribute(QN_W_ID);
|
|
1052
|
-
yield {
|
|
1136
|
+
yield {
|
|
1137
|
+
type: "fmt_start",
|
|
1138
|
+
id: c_id,
|
|
1139
|
+
author: rPrChange.getAttribute(QN_W_AUTHOR) || void 0,
|
|
1140
|
+
date: rPrChange.getAttribute(QN_W_DATE) || void 0
|
|
1141
|
+
};
|
|
1053
1142
|
}
|
|
1054
1143
|
}
|
|
1055
1144
|
for (let i = 0; i < r_element.childNodes.length; i++) {
|
|
@@ -1074,12 +1163,14 @@ function* iter_paragraph_content(paragraph) {
|
|
|
1074
1163
|
if (_is_page_instr(current_instr)) hide_result = true;
|
|
1075
1164
|
else {
|
|
1076
1165
|
const parts = current_instr.trim().split(/\s+/);
|
|
1077
|
-
if (parts.length > 1 && parts[0] === "REF")
|
|
1166
|
+
if (parts.length > 1 && parts[0] === "REF")
|
|
1167
|
+
yield { type: "xref_start", id: parts[1] };
|
|
1078
1168
|
}
|
|
1079
1169
|
} else if (fld_type === "end") {
|
|
1080
1170
|
if (!hide_result) {
|
|
1081
1171
|
const parts = current_instr.trim().split(/\s+/);
|
|
1082
|
-
if (parts.length > 1 && parts[0] === "REF")
|
|
1172
|
+
if (parts.length > 1 && parts[0] === "REF")
|
|
1173
|
+
yield { type: "xref_end", id: parts[1] };
|
|
1083
1174
|
}
|
|
1084
1175
|
in_complex_field = false;
|
|
1085
1176
|
current_instr = "";
|
|
@@ -1100,16 +1191,28 @@ function* iter_paragraph_content(paragraph) {
|
|
|
1100
1191
|
if (tag === QN_W_R) yield* process_run_element(child);
|
|
1101
1192
|
else if (tag === QN_W_INS) {
|
|
1102
1193
|
const i_id = child.getAttribute(QN_W_ID);
|
|
1103
|
-
yield {
|
|
1194
|
+
yield {
|
|
1195
|
+
type: "ins_start",
|
|
1196
|
+
id: i_id,
|
|
1197
|
+
author: child.getAttribute(QN_W_AUTHOR) || void 0,
|
|
1198
|
+
date: child.getAttribute(QN_W_DATE) || void 0
|
|
1199
|
+
};
|
|
1104
1200
|
yield* traverse_node(child);
|
|
1105
1201
|
yield { type: "ins_end", id: i_id };
|
|
1106
1202
|
} else if (tag === QN_W_DEL) {
|
|
1107
1203
|
const d_id = child.getAttribute(QN_W_ID);
|
|
1108
|
-
yield {
|
|
1204
|
+
yield {
|
|
1205
|
+
type: "del_start",
|
|
1206
|
+
id: d_id,
|
|
1207
|
+
author: child.getAttribute(QN_W_AUTHOR) || void 0,
|
|
1208
|
+
date: child.getAttribute(QN_W_DATE) || void 0
|
|
1209
|
+
};
|
|
1109
1210
|
yield* traverse_node(child);
|
|
1110
1211
|
yield { type: "del_end", id: d_id };
|
|
1111
|
-
} else if (tag === QN_W_COMMENTRANGESTART)
|
|
1112
|
-
|
|
1212
|
+
} else if (tag === QN_W_COMMENTRANGESTART)
|
|
1213
|
+
yield { type: "start", id: child.getAttribute(QN_W_ID) };
|
|
1214
|
+
else if (tag === QN_W_COMMENTRANGEEND)
|
|
1215
|
+
yield { type: "end", id: child.getAttribute(QN_W_ID) };
|
|
1113
1216
|
else if (tag === QN_W_HYPERLINK) {
|
|
1114
1217
|
const rId = child.getAttribute(QN_R_ID) || child.getAttribute("id");
|
|
1115
1218
|
let url = "";
|
|
@@ -1130,7 +1233,8 @@ function* iter_paragraph_content(paragraph) {
|
|
|
1130
1233
|
if (target) yield { type: "xref_end", id: target };
|
|
1131
1234
|
} else if (tag === QN_W_BOOKMARKSTART) {
|
|
1132
1235
|
const b_name = child.getAttribute(QN_W_NAME);
|
|
1133
|
-
if (b_name && (!b_name.startsWith("_") || b_name.startsWith("_Ref")))
|
|
1236
|
+
if (b_name && (!b_name.startsWith("_") || b_name.startsWith("_Ref")))
|
|
1237
|
+
yield { type: "bookmark", id: b_name };
|
|
1134
1238
|
} else if (tag === QN_W_SDT || tag === QN_W_SMARTTAG || tag === QN_W_SDTCONTENT) {
|
|
1135
1239
|
yield* traverse_node(child);
|
|
1136
1240
|
}
|
|
@@ -1942,13 +2046,18 @@ function _words_to_chars(text1, text2) {
|
|
|
1942
2046
|
}
|
|
1943
2047
|
function generate_edits_from_text(original_text, modified_text) {
|
|
1944
2048
|
const dmp = new import_diff_match_patch.default.diff_match_patch();
|
|
1945
|
-
|
|
2049
|
+
dmp.Diff_Timeout = 2;
|
|
2050
|
+
const [chars1, chars2, token_array] = _words_to_chars(
|
|
2051
|
+
original_text,
|
|
2052
|
+
modified_text
|
|
2053
|
+
);
|
|
1946
2054
|
const diffs = dmp.diff_main(chars1, chars2, false);
|
|
1947
2055
|
dmp.diff_cleanupSemantic(diffs);
|
|
1948
2056
|
for (let i = 0; i < diffs.length; i++) {
|
|
1949
2057
|
const chars = diffs[i][1];
|
|
1950
2058
|
let text = "";
|
|
1951
|
-
for (let j = 0; j < chars.length; j++)
|
|
2059
|
+
for (let j = 0; j < chars.length; j++)
|
|
2060
|
+
text += token_array[chars.charCodeAt(j)];
|
|
1952
2061
|
diffs[i][1] = text;
|
|
1953
2062
|
}
|
|
1954
2063
|
const edits = [];
|
|
@@ -1958,7 +2067,13 @@ function generate_edits_from_text(original_text, modified_text) {
|
|
|
1958
2067
|
if (op === 0) {
|
|
1959
2068
|
if (pending_delete) {
|
|
1960
2069
|
const [idx, del_txt] = pending_delete;
|
|
1961
|
-
edits.push({
|
|
2070
|
+
edits.push({
|
|
2071
|
+
type: "modify",
|
|
2072
|
+
target_text: del_txt,
|
|
2073
|
+
new_text: "",
|
|
2074
|
+
comment: "Diff: Text deleted",
|
|
2075
|
+
_match_start_index: idx
|
|
2076
|
+
});
|
|
1962
2077
|
pending_delete = null;
|
|
1963
2078
|
}
|
|
1964
2079
|
current_original_index += text.length;
|
|
@@ -1968,21 +2083,40 @@ function generate_edits_from_text(original_text, modified_text) {
|
|
|
1968
2083
|
} else if (op === 1) {
|
|
1969
2084
|
if (pending_delete) {
|
|
1970
2085
|
const [idx, del_txt] = pending_delete;
|
|
1971
|
-
edits.push({
|
|
2086
|
+
edits.push({
|
|
2087
|
+
type: "modify",
|
|
2088
|
+
target_text: del_txt,
|
|
2089
|
+
new_text: text,
|
|
2090
|
+
comment: "Diff: Replacement",
|
|
2091
|
+
_match_start_index: idx
|
|
2092
|
+
});
|
|
1972
2093
|
pending_delete = null;
|
|
1973
2094
|
} else {
|
|
1974
|
-
edits.push({
|
|
2095
|
+
edits.push({
|
|
2096
|
+
type: "modify",
|
|
2097
|
+
target_text: "",
|
|
2098
|
+
new_text: text,
|
|
2099
|
+
comment: "Diff: Text inserted",
|
|
2100
|
+
_match_start_index: current_original_index
|
|
2101
|
+
});
|
|
1975
2102
|
}
|
|
1976
2103
|
}
|
|
1977
2104
|
}
|
|
1978
2105
|
if (pending_delete) {
|
|
1979
2106
|
const [idx, del_txt] = pending_delete;
|
|
1980
|
-
edits.push({
|
|
2107
|
+
edits.push({
|
|
2108
|
+
type: "modify",
|
|
2109
|
+
target_text: del_txt,
|
|
2110
|
+
new_text: "",
|
|
2111
|
+
comment: "Diff: Text deleted",
|
|
2112
|
+
_match_start_index: idx
|
|
2113
|
+
});
|
|
1981
2114
|
}
|
|
1982
2115
|
return edits;
|
|
1983
2116
|
}
|
|
1984
2117
|
function create_unified_diff(original_text, modified_text, context_lines = 3) {
|
|
1985
2118
|
const dmp = new import_diff_match_patch.default.diff_match_patch();
|
|
2119
|
+
dmp.Diff_Timeout = 2;
|
|
1986
2120
|
const a = dmp.diff_linesToChars_(original_text, modified_text);
|
|
1987
2121
|
const diffs = dmp.diff_main(a.chars1, a.chars2, false);
|
|
1988
2122
|
dmp.diff_charsToLines_(diffs, a.lineArray);
|
|
@@ -2025,159 +2159,614 @@ function create_unified_diff(original_text, modified_text, context_lines = 3) {
|
|
|
2025
2159
|
if (output.length === 2) return "";
|
|
2026
2160
|
return output.join("\n");
|
|
2027
2161
|
}
|
|
2028
|
-
|
|
2029
|
-
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
|
|
2162
|
+
function create_word_patch_diff(original_text, modified_text, original_path = "Original", modified_path = "Modified") {
|
|
2163
|
+
const edits = generate_edits_from_text(original_text, modified_text);
|
|
2164
|
+
const output = [
|
|
2165
|
+
`--- ${original_path}`,
|
|
2166
|
+
`+++ ${modified_path}`,
|
|
2167
|
+
""
|
|
2168
|
+
];
|
|
2169
|
+
const CONTEXT_SIZE = 40;
|
|
2170
|
+
for (const edit of edits) {
|
|
2171
|
+
const raw_start = edit._match_start_index || 0;
|
|
2172
|
+
const raw_target = edit.target_text || "";
|
|
2173
|
+
const raw_new = edit.new_text || "";
|
|
2174
|
+
const [prefix_len, suffix_len] = trim_common_context(raw_target, raw_new);
|
|
2175
|
+
const target_end_in_target = raw_target.length - suffix_len;
|
|
2176
|
+
const new_end_in_new = raw_new.length - suffix_len;
|
|
2177
|
+
const display_target = raw_target.substring(prefix_len, target_end_in_target);
|
|
2178
|
+
const display_new = raw_new.substring(prefix_len, new_end_in_new);
|
|
2179
|
+
const change_start = raw_start + prefix_len;
|
|
2180
|
+
const change_end = change_start + display_target.length;
|
|
2181
|
+
let pre_start = Math.max(0, change_start - CONTEXT_SIZE);
|
|
2182
|
+
let pre_context = original_text.substring(pre_start, change_start);
|
|
2183
|
+
if (pre_start > 0) pre_context = "..." + pre_context;
|
|
2184
|
+
let post_end = Math.min(original_text.length, change_end + CONTEXT_SIZE);
|
|
2185
|
+
let post_context = original_text.substring(change_end, post_end);
|
|
2186
|
+
if (post_end < original_text.length) post_context = post_context + "...";
|
|
2187
|
+
pre_context = pre_context.replace(/\n/g, " ").replace(/\r/g, "");
|
|
2188
|
+
post_context = post_context.replace(/\n/g, " ").replace(/\r/g, "");
|
|
2189
|
+
output.push("@@ Word Patch @@");
|
|
2190
|
+
output.push(` ${pre_context}`);
|
|
2191
|
+
if (display_target) output.push(`- ${display_target}`);
|
|
2192
|
+
if (display_new) output.push(`+ ${display_new}`);
|
|
2193
|
+
output.push(` ${post_context}`);
|
|
2194
|
+
output.push("");
|
|
2033
2195
|
}
|
|
2196
|
+
return output.join("\n");
|
|
2034
2197
|
}
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2198
|
+
|
|
2199
|
+
// src/markup.ts
|
|
2200
|
+
var AMBIGUITY_EXAMPLES_CAP = 5;
|
|
2201
|
+
var AMBIGUITY_CONTEXT_CHARS = 50;
|
|
2202
|
+
function _should_strip_markers(text, marker) {
|
|
2203
|
+
if (!text.startsWith(marker) || !text.endsWith(marker)) return false;
|
|
2204
|
+
if (text.length < marker.length * 2) return false;
|
|
2205
|
+
const inner = text.substring(marker.length, text.length - marker.length);
|
|
2206
|
+
if (!inner) return false;
|
|
2207
|
+
if (inner.includes(marker)) return false;
|
|
2208
|
+
if (!/[a-zA-Z]/.test(inner)) return false;
|
|
2209
|
+
if (marker === "__" && /^\w+$/.test(inner)) return false;
|
|
2210
|
+
if (marker === "_") {
|
|
2211
|
+
if (inner.includes("_")) return false;
|
|
2212
|
+
if (/^[0-9_]+$/.test(inner)) return false;
|
|
2041
2213
|
}
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
} else {
|
|
2059
|
-
errors.push(`- Edit ${i + 1} Failed: Cannot delete footnote/endnote references via text replace. The marker corresponds to a structural XML element.`);
|
|
2060
|
-
}
|
|
2061
|
-
}
|
|
2062
|
-
}
|
|
2063
|
-
if (t_text.includes("](") || n_text.includes("](")) {
|
|
2064
|
-
const t_links = (t_text.match(/\[(?!~)[^\]]+\]\([^)]+\)/g) || []).sort();
|
|
2065
|
-
const n_links = (n_text.match(/\[(?!~)[^\]]+\]\([^)]+\)/g) || []).sort();
|
|
2066
|
-
if (t_links.length !== n_links.length) {
|
|
2067
|
-
if (n_links.length > t_links.length) {
|
|
2068
|
-
errors.push(`- Edit ${i + 1} Failed: Cannot insert hyperlinks via text replace. Use a dedicated structural operation.`);
|
|
2069
|
-
} else {
|
|
2070
|
-
errors.push(`- Edit ${i + 1} Failed: Cannot delete hyperlinks via text replace. The marker corresponds to a structural XML element.`);
|
|
2071
|
-
}
|
|
2072
|
-
} else if (t_links.length > 1 && JSON.stringify(t_links) !== JSON.stringify(n_links)) {
|
|
2073
|
-
errors.push(`- Edit ${i + 1} Failed: Can only edit or retarget one hyperlink per text replacement. Please split into multiple edits.`);
|
|
2074
|
-
}
|
|
2075
|
-
}
|
|
2076
|
-
if (t_text.includes("[~") || n_text.includes("[~")) {
|
|
2077
|
-
const t_xrefs = t_text.match(/\[~[^~]+~\]\(#[^\)]+\)/g) || [];
|
|
2078
|
-
const n_xrefs = n_text.match(/\[~[^~]+~\]\(#[^\)]+\)/g) || [];
|
|
2079
|
-
if (t_xrefs.length !== n_xrefs.length) {
|
|
2080
|
-
if (n_xrefs.length > t_xrefs.length) {
|
|
2081
|
-
errors.push(`- Edit ${i + 1} Failed: Cannot insert cross-references via text replace. Markers are read-only projections.`);
|
|
2082
|
-
} else {
|
|
2083
|
-
errors.push(`- Edit ${i + 1} Failed: Cannot delete cross-references via text replace. The marker corresponds to a structural XML element.`);
|
|
2084
|
-
}
|
|
2085
|
-
} else {
|
|
2086
|
-
if (JSON.stringify(t_xrefs) !== JSON.stringify(n_xrefs)) {
|
|
2087
|
-
errors.push(`- Edit ${i + 1} Failed: Modifying or retargeting cross-reference markers is disallowed to prevent dependency corruption.`);
|
|
2088
|
-
}
|
|
2089
|
-
}
|
|
2214
|
+
return true;
|
|
2215
|
+
}
|
|
2216
|
+
function _strip_balanced_markers(text) {
|
|
2217
|
+
let prefix_markup = "";
|
|
2218
|
+
let suffix_markup = "";
|
|
2219
|
+
let clean_text = text;
|
|
2220
|
+
const markers = ["**", "__", "_", "*"];
|
|
2221
|
+
for (const marker of markers) {
|
|
2222
|
+
if (_should_strip_markers(clean_text, marker)) {
|
|
2223
|
+
prefix_markup += marker;
|
|
2224
|
+
suffix_markup = marker + suffix_markup;
|
|
2225
|
+
clean_text = clean_text.substring(
|
|
2226
|
+
marker.length,
|
|
2227
|
+
clean_text.length - marker.length
|
|
2228
|
+
);
|
|
2229
|
+
break;
|
|
2090
2230
|
}
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
|
|
2231
|
+
}
|
|
2232
|
+
return [prefix_markup, clean_text, suffix_markup];
|
|
2233
|
+
}
|
|
2234
|
+
function _replace_smart_quotes(text) {
|
|
2235
|
+
return text.replace(/“/g, '"').replace(/”/g, '"').replace(/‘/g, "'").replace(/’/g, "'");
|
|
2236
|
+
}
|
|
2237
|
+
function _find_safe_boundaries(text, start, end) {
|
|
2238
|
+
let new_start = start;
|
|
2239
|
+
let new_end = end;
|
|
2240
|
+
const expand_if_unbalanced = (marker) => {
|
|
2241
|
+
const current_match = text.substring(new_start, new_end);
|
|
2242
|
+
const count = (current_match.match(new RegExp(marker.replace(/\*/g, "\\*"), "g")) || []).length;
|
|
2243
|
+
if (count % 2 !== 0) {
|
|
2244
|
+
const suffix = text.substring(new_end);
|
|
2245
|
+
if (suffix.startsWith(marker)) {
|
|
2246
|
+
new_end += marker.length;
|
|
2247
|
+
return;
|
|
2099
2248
|
}
|
|
2100
|
-
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
const stripped = line.trimStart();
|
|
2105
|
-
if (stripped.startsWith("#######")) {
|
|
2106
|
-
const level = stripped.length - stripped.replace(/^#+/, "").length;
|
|
2107
|
-
if (stripped.substring(level).startsWith(" ") || stripped.substring(level) === "") {
|
|
2108
|
-
errors.push(`- Edit ${i + 1} Failed: Heading level ${level} is not supported (maximum is 6).`);
|
|
2109
|
-
break;
|
|
2110
|
-
}
|
|
2111
|
-
}
|
|
2249
|
+
const prefix = text.substring(0, new_start);
|
|
2250
|
+
if (prefix.endsWith(marker)) {
|
|
2251
|
+
new_start -= marker.length;
|
|
2252
|
+
return;
|
|
2112
2253
|
}
|
|
2113
2254
|
}
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2255
|
+
};
|
|
2256
|
+
for (let i = 0; i < 2; i++) {
|
|
2257
|
+
expand_if_unbalanced("**");
|
|
2258
|
+
expand_if_unbalanced("__");
|
|
2259
|
+
expand_if_unbalanced("_");
|
|
2260
|
+
expand_if_unbalanced("*");
|
|
2117
2261
|
}
|
|
2118
|
-
return
|
|
2262
|
+
return [new_start, new_end];
|
|
2119
2263
|
}
|
|
2120
|
-
|
|
2121
|
-
|
|
2122
|
-
|
|
2123
|
-
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
|
|
2132
|
-
|
|
2133
|
-
|
|
2134
|
-
for (const part of this.doc.pkg.parts) {
|
|
2135
|
-
if (part === this.doc.part || part.contentType.includes("wordprocessingml") && part.contentType.endsWith("+xml")) {
|
|
2136
|
-
if (!part._element.hasAttribute("xmlns:w16du")) {
|
|
2137
|
-
part._element.setAttribute("xmlns:w16du", w16du_ns);
|
|
2138
|
-
}
|
|
2264
|
+
function _refine_match_boundaries(text, start, end) {
|
|
2265
|
+
const markers = ["**", "__", "*", "_"];
|
|
2266
|
+
let current_text = text.substring(start, end);
|
|
2267
|
+
let best_start = start;
|
|
2268
|
+
let best_end = end;
|
|
2269
|
+
const countMarker = (str, mk) => (str.match(new RegExp(mk.replace(/\*/g, "\\*"), "g")) || []).length;
|
|
2270
|
+
for (const marker of markers) {
|
|
2271
|
+
if (current_text.startsWith(marker)) {
|
|
2272
|
+
const current_score = countMarker(current_text, marker) % 2;
|
|
2273
|
+
const trimmed_text = current_text.substring(marker.length);
|
|
2274
|
+
const trimmed_score = countMarker(trimmed_text, marker) % 2;
|
|
2275
|
+
if (current_score === 1 && trimmed_score === 0) {
|
|
2276
|
+
best_start += marker.length;
|
|
2277
|
+
current_text = trimmed_text;
|
|
2139
2278
|
}
|
|
2140
2279
|
}
|
|
2141
|
-
this.current_id = this._scan_existing_ids();
|
|
2142
|
-
this.mapper = new DocumentMapper(this.doc);
|
|
2143
|
-
this.comments_manager = new CommentsManager(this.doc);
|
|
2144
2280
|
}
|
|
2145
|
-
|
|
2146
|
-
|
|
2147
|
-
|
|
2148
|
-
const
|
|
2149
|
-
|
|
2150
|
-
|
|
2151
|
-
|
|
2281
|
+
for (const marker of markers) {
|
|
2282
|
+
if (current_text.endsWith(marker)) {
|
|
2283
|
+
const current_score = countMarker(current_text, marker) % 2;
|
|
2284
|
+
const trimmed_text = current_text.substring(
|
|
2285
|
+
0,
|
|
2286
|
+
current_text.length - marker.length
|
|
2287
|
+
);
|
|
2288
|
+
const trimmed_score = countMarker(trimmed_text, marker) % 2;
|
|
2289
|
+
if (current_score === 1 && trimmed_score === 0) {
|
|
2290
|
+
best_end -= marker.length;
|
|
2291
|
+
current_text = trimmed_text;
|
|
2152
2292
|
}
|
|
2153
2293
|
}
|
|
2154
|
-
return maxId;
|
|
2155
2294
|
}
|
|
2156
|
-
|
|
2157
|
-
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
2161
|
-
|
|
2162
|
-
|
|
2163
|
-
|
|
2164
|
-
|
|
2165
|
-
|
|
2295
|
+
return [best_start, best_end];
|
|
2296
|
+
}
|
|
2297
|
+
function _make_fuzzy_regex(target_text) {
|
|
2298
|
+
target_text = _replace_smart_quotes(target_text);
|
|
2299
|
+
const parts = [];
|
|
2300
|
+
const token_pattern = /(_+)|(\s+)|(['"])|([.,;:\/])/g;
|
|
2301
|
+
const md_noise = "[*_]*";
|
|
2302
|
+
const structural_noise = "(?:\\s*(?:[*+\\->]|\\d+\\.)\\s+|\\s*\\n\\s*)";
|
|
2303
|
+
const start_list_marker = "(?:[ \\t]*(?:[*+\\->]|\\d+\\.)\\s+)?";
|
|
2304
|
+
parts.push(start_list_marker);
|
|
2305
|
+
parts.push(md_noise);
|
|
2306
|
+
let last_idx = 0;
|
|
2307
|
+
let match;
|
|
2308
|
+
const escapeRegExp = (str) => str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
2309
|
+
while ((match = token_pattern.exec(target_text)) !== null) {
|
|
2310
|
+
const literal = target_text.substring(last_idx, match.index);
|
|
2311
|
+
if (literal) {
|
|
2312
|
+
parts.push(escapeRegExp(literal));
|
|
2313
|
+
parts.push(md_noise);
|
|
2166
2314
|
}
|
|
2167
|
-
const
|
|
2168
|
-
|
|
2169
|
-
|
|
2170
|
-
|
|
2171
|
-
|
|
2315
|
+
const g_underscore = match[1];
|
|
2316
|
+
const g_space = match[2];
|
|
2317
|
+
const g_quote = match[3];
|
|
2318
|
+
const g_punct = match[4];
|
|
2319
|
+
if (g_underscore) {
|
|
2320
|
+
parts.push("_+");
|
|
2321
|
+
} else if (g_space) {
|
|
2322
|
+
if (g_space.includes("\n")) {
|
|
2323
|
+
parts.push(`(?:${structural_noise}|\\s+)+`);
|
|
2172
2324
|
} else {
|
|
2173
|
-
|
|
2174
|
-
parent?.removeChild(i);
|
|
2325
|
+
parts.push("\\s+");
|
|
2175
2326
|
}
|
|
2327
|
+
} else if (g_quote) {
|
|
2328
|
+
if (g_quote === "'") parts.push("[\u2018\u2019']");
|
|
2329
|
+
else parts.push('["\u201C\u201D]');
|
|
2330
|
+
} else if (g_punct) {
|
|
2331
|
+
parts.push(escapeRegExp(g_punct));
|
|
2176
2332
|
}
|
|
2333
|
+
parts.push(md_noise);
|
|
2334
|
+
last_idx = token_pattern.lastIndex;
|
|
2177
2335
|
}
|
|
2178
|
-
|
|
2179
|
-
|
|
2180
|
-
|
|
2336
|
+
const remaining = target_text.substring(last_idx);
|
|
2337
|
+
if (remaining) parts.push(escapeRegExp(remaining));
|
|
2338
|
+
return parts.join("");
|
|
2339
|
+
}
|
|
2340
|
+
function _find_match_in_text(text, target) {
|
|
2341
|
+
if (!target) return [-1, -1];
|
|
2342
|
+
let idx = text.indexOf(target);
|
|
2343
|
+
if (idx !== -1) return _find_safe_boundaries(text, idx, idx + target.length);
|
|
2344
|
+
const norm_text = _replace_smart_quotes(text);
|
|
2345
|
+
const norm_target = _replace_smart_quotes(target);
|
|
2346
|
+
idx = norm_text.indexOf(norm_target);
|
|
2347
|
+
if (idx !== -1)
|
|
2348
|
+
return _find_safe_boundaries(text, idx, idx + norm_target.length);
|
|
2349
|
+
try {
|
|
2350
|
+
const pattern = new RegExp(_make_fuzzy_regex(target));
|
|
2351
|
+
const match = pattern.exec(text);
|
|
2352
|
+
if (match) {
|
|
2353
|
+
const raw_start = match.index;
|
|
2354
|
+
const raw_end = match.index + match[0].length;
|
|
2355
|
+
const [refined_start, refined_end] = _refine_match_boundaries(
|
|
2356
|
+
text,
|
|
2357
|
+
raw_start,
|
|
2358
|
+
raw_end
|
|
2359
|
+
);
|
|
2360
|
+
return _find_safe_boundaries(text, refined_start, refined_end);
|
|
2361
|
+
}
|
|
2362
|
+
} catch (e) {
|
|
2363
|
+
}
|
|
2364
|
+
return [-1, -1];
|
|
2365
|
+
}
|
|
2366
|
+
function _build_critic_markup(target_text, new_text, comment, edit_index, include_index, highlight_only) {
|
|
2367
|
+
const parts = [];
|
|
2368
|
+
let [prefix_markup, clean_target, suffix_markup] = _strip_balanced_markers(target_text);
|
|
2369
|
+
let clean_new = new_text;
|
|
2370
|
+
if (prefix_markup && new_text) {
|
|
2371
|
+
if (new_text.startsWith(prefix_markup) && new_text.endsWith(suffix_markup)) {
|
|
2372
|
+
const inner_len = prefix_markup.length;
|
|
2373
|
+
clean_new = new_text.length > inner_len * 2 ? new_text.substring(inner_len, new_text.length - inner_len) : new_text;
|
|
2374
|
+
}
|
|
2375
|
+
}
|
|
2376
|
+
parts.push(prefix_markup);
|
|
2377
|
+
if (highlight_only) {
|
|
2378
|
+
parts.push(`{==${clean_target}==}`);
|
|
2379
|
+
} else {
|
|
2380
|
+
const has_target = Boolean(clean_target);
|
|
2381
|
+
const has_new = Boolean(clean_new);
|
|
2382
|
+
if (has_target && !has_new) parts.push(`{--${clean_target}--}`);
|
|
2383
|
+
else if (!has_target && has_new) parts.push(`{++${clean_new}++}`);
|
|
2384
|
+
else if (has_target && has_new)
|
|
2385
|
+
parts.push(`{--${clean_target}--}{++${clean_new}++}`);
|
|
2386
|
+
}
|
|
2387
|
+
parts.push(suffix_markup);
|
|
2388
|
+
const meta_parts = [];
|
|
2389
|
+
if (comment) meta_parts.push(comment);
|
|
2390
|
+
if (include_index) meta_parts.push(`[Edit:${edit_index}]`);
|
|
2391
|
+
if (meta_parts.length > 0) {
|
|
2392
|
+
parts.push(`{>>${meta_parts.join(" ")}<<}`);
|
|
2393
|
+
}
|
|
2394
|
+
return parts.join("");
|
|
2395
|
+
}
|
|
2396
|
+
function apply_edits_to_markdown(markdown_text, edits, include_index = false, highlight_only = false) {
|
|
2397
|
+
if (!edits || edits.length === 0) return markdown_text;
|
|
2398
|
+
const matched_edits = [];
|
|
2399
|
+
for (let idx = 0; idx < edits.length; idx++) {
|
|
2400
|
+
const edit = edits[idx];
|
|
2401
|
+
const target = edit.target_text || "";
|
|
2402
|
+
if (!target) {
|
|
2403
|
+
continue;
|
|
2404
|
+
}
|
|
2405
|
+
const [start, end] = _find_match_in_text(markdown_text, target);
|
|
2406
|
+
if (start === -1) continue;
|
|
2407
|
+
const actual_matched_text = markdown_text.substring(start, end);
|
|
2408
|
+
matched_edits.push([start, end, actual_matched_text, edit, idx]);
|
|
2409
|
+
}
|
|
2410
|
+
const matched_edits_filtered = [];
|
|
2411
|
+
const occupied_ranges = [];
|
|
2412
|
+
matched_edits.sort((a, b) => a[4] - b[4]);
|
|
2413
|
+
for (const [start, end, actual_text, edit, orig_idx] of matched_edits) {
|
|
2414
|
+
let overlaps = false;
|
|
2415
|
+
for (const [occ_start, occ_end] of occupied_ranges) {
|
|
2416
|
+
if (start < occ_end && end > occ_start) {
|
|
2417
|
+
overlaps = true;
|
|
2418
|
+
break;
|
|
2419
|
+
}
|
|
2420
|
+
}
|
|
2421
|
+
if (!overlaps) {
|
|
2422
|
+
matched_edits_filtered.push([start, end, actual_text, edit, orig_idx]);
|
|
2423
|
+
occupied_ranges.push([start, end]);
|
|
2424
|
+
}
|
|
2425
|
+
}
|
|
2426
|
+
matched_edits_filtered.sort((a, b) => b[0] - a[0]);
|
|
2427
|
+
let result = markdown_text;
|
|
2428
|
+
for (const [
|
|
2429
|
+
start,
|
|
2430
|
+
end,
|
|
2431
|
+
actual_text,
|
|
2432
|
+
edit,
|
|
2433
|
+
orig_idx
|
|
2434
|
+
] of matched_edits_filtered) {
|
|
2435
|
+
const new_txt = edit.new_text || "";
|
|
2436
|
+
const [prefix_len, suffix_len] = trim_common_context(actual_text, new_txt);
|
|
2437
|
+
const unmodified_prefix = prefix_len > 0 ? actual_text.substring(0, prefix_len) : "";
|
|
2438
|
+
const unmodified_suffix = suffix_len > 0 ? actual_text.substring(actual_text.length - suffix_len) : "";
|
|
2439
|
+
const t_end = actual_text.length - suffix_len;
|
|
2440
|
+
const n_end = new_txt.length - suffix_len;
|
|
2441
|
+
const isolated_target = actual_text.substring(prefix_len, t_end);
|
|
2442
|
+
const isolated_new = new_txt.substring(prefix_len, n_end);
|
|
2443
|
+
const markup = _build_critic_markup(
|
|
2444
|
+
isolated_target,
|
|
2445
|
+
isolated_new,
|
|
2446
|
+
edit.comment,
|
|
2447
|
+
orig_idx,
|
|
2448
|
+
include_index,
|
|
2449
|
+
highlight_only
|
|
2450
|
+
);
|
|
2451
|
+
const full_replacement = unmodified_prefix + markup + unmodified_suffix;
|
|
2452
|
+
result = result.substring(0, start) + full_replacement + result.substring(end);
|
|
2453
|
+
}
|
|
2454
|
+
return result;
|
|
2455
|
+
}
|
|
2456
|
+
function format_ambiguity_error(edit_index, target_text, haystack, match_positions) {
|
|
2457
|
+
const total = match_positions.length;
|
|
2458
|
+
if (total < 2) {
|
|
2459
|
+
throw new Error(
|
|
2460
|
+
`format_ambiguity_error requires at least 2 matches, got ${total}`
|
|
2461
|
+
);
|
|
2462
|
+
}
|
|
2463
|
+
const shown = match_positions.slice(0, AMBIGUITY_EXAMPLES_CAP);
|
|
2464
|
+
const remaining = total - shown.length;
|
|
2465
|
+
const lines = [
|
|
2466
|
+
`- Edit ${edit_index} Failed: Ambiguous match. Target text appears ${total} times. First ${shown.length} occurrences:`
|
|
2467
|
+
];
|
|
2468
|
+
for (let i = 0; i < shown.length; i++) {
|
|
2469
|
+
const [start, end] = shown[i];
|
|
2470
|
+
const pre_start = Math.max(0, start - AMBIGUITY_CONTEXT_CHARS);
|
|
2471
|
+
const post_end = Math.min(haystack.length, end + AMBIGUITY_CONTEXT_CHARS);
|
|
2472
|
+
const pre_context = haystack.substring(pre_start, start).replace(/\n/g, " ");
|
|
2473
|
+
const post_context = haystack.substring(end, post_end).replace(/\n/g, " ");
|
|
2474
|
+
let match_text = haystack.substring(start, end).replace(/\n/g, " ");
|
|
2475
|
+
if (match_text.length > 50) {
|
|
2476
|
+
match_text = match_text.substring(0, 25) + "..." + match_text.substring(match_text.length - 20);
|
|
2477
|
+
}
|
|
2478
|
+
const prefix_marker = pre_start > 0 ? "..." : "";
|
|
2479
|
+
const suffix_marker = post_end < haystack.length ? "..." : "";
|
|
2480
|
+
lines.push(
|
|
2481
|
+
` ${i + 1}. "${prefix_marker}${pre_context}[${match_text}]${post_context}${suffix_marker}"`
|
|
2482
|
+
);
|
|
2483
|
+
}
|
|
2484
|
+
if (remaining > 0) {
|
|
2485
|
+
lines.push(` ... and ${remaining} more occurrence(s) not shown.`);
|
|
2486
|
+
}
|
|
2487
|
+
lines.push(
|
|
2488
|
+
" Please provide more surrounding context in your target_text to uniquely identify the location."
|
|
2489
|
+
);
|
|
2490
|
+
return lines.join("\n");
|
|
2491
|
+
}
|
|
2492
|
+
|
|
2493
|
+
// src/engine.ts
|
|
2494
|
+
function getNextElement(el) {
|
|
2495
|
+
let next = el.nextSibling;
|
|
2496
|
+
while (next) {
|
|
2497
|
+
if (next.nodeType === 1) return next;
|
|
2498
|
+
next = next.nextSibling;
|
|
2499
|
+
}
|
|
2500
|
+
return null;
|
|
2501
|
+
}
|
|
2502
|
+
function getPreviousElement(el) {
|
|
2503
|
+
let prev = el.previousSibling;
|
|
2504
|
+
while (prev) {
|
|
2505
|
+
if (prev.nodeType === 1) return prev;
|
|
2506
|
+
prev = prev.previousSibling;
|
|
2507
|
+
}
|
|
2508
|
+
return null;
|
|
2509
|
+
}
|
|
2510
|
+
function insertAfter(newNode, refNode) {
|
|
2511
|
+
if (refNode.parentNode) {
|
|
2512
|
+
refNode.parentNode.insertBefore(newNode, refNode.nextSibling);
|
|
2513
|
+
}
|
|
2514
|
+
}
|
|
2515
|
+
var BatchValidationError = class extends Error {
|
|
2516
|
+
errors;
|
|
2517
|
+
constructor(errors) {
|
|
2518
|
+
super("Batch validation failed:\n" + errors.join("\n"));
|
|
2519
|
+
this.name = "BatchValidationError";
|
|
2520
|
+
this.errors = errors;
|
|
2521
|
+
}
|
|
2522
|
+
};
|
|
2523
|
+
function validate_edit_strings(edits) {
|
|
2524
|
+
const errors = [];
|
|
2525
|
+
for (let i = 0; i < edits.length; i++) {
|
|
2526
|
+
const edit = edits[i];
|
|
2527
|
+
const t_text = edit.target_text || "";
|
|
2528
|
+
const n_text = edit.new_text || "";
|
|
2529
|
+
if (n_text.includes("{++") || n_text.includes("{--") || n_text.includes("{>>") || n_text.includes("{==")) {
|
|
2530
|
+
errors.push(
|
|
2531
|
+
`- Edit ${i + 1} Failed: Do not manually write CriticMarkup tags ({++, {--, {>>, {==) in \`new_text\`. The engine handles redlining automatically. To add a comment, use the \`comment\` parameter.`
|
|
2532
|
+
);
|
|
2533
|
+
}
|
|
2534
|
+
if (t_text.includes("[^") || n_text.includes("[^")) {
|
|
2535
|
+
const t_fns = (t_text.match(/\[\^(?:fn|en)-[^\]]+\]/g) || []).sort();
|
|
2536
|
+
const n_fns = (n_text.match(/\[\^(?:fn|en)-[^\]]+\]/g) || []).sort();
|
|
2537
|
+
if (JSON.stringify(t_fns) !== JSON.stringify(n_fns)) {
|
|
2538
|
+
if (n_fns.length > t_fns.length || n_fns.some(
|
|
2539
|
+
(f) => n_fns.filter((x) => x === f).length > t_fns.filter((x) => x === f).length
|
|
2540
|
+
)) {
|
|
2541
|
+
errors.push(
|
|
2542
|
+
`- Edit ${i + 1} Failed: Cannot insert footnote/endnote markers via text replace. Markers like \`[^fn-N]\` are read-only projections. Use Word's References menu.`
|
|
2543
|
+
);
|
|
2544
|
+
} else {
|
|
2545
|
+
errors.push(
|
|
2546
|
+
`- Edit ${i + 1} Failed: Cannot delete footnote/endnote references via text replace. The marker corresponds to a structural XML element.`
|
|
2547
|
+
);
|
|
2548
|
+
}
|
|
2549
|
+
}
|
|
2550
|
+
}
|
|
2551
|
+
if (t_text.includes("](") || n_text.includes("](")) {
|
|
2552
|
+
const t_links = (t_text.match(/\[(?!~)[^\]]+\]\([^)]+\)/g) || []).sort();
|
|
2553
|
+
const n_links = (n_text.match(/\[(?!~)[^\]]+\]\([^)]+\)/g) || []).sort();
|
|
2554
|
+
if (t_links.length !== n_links.length) {
|
|
2555
|
+
if (n_links.length > t_links.length) {
|
|
2556
|
+
errors.push(
|
|
2557
|
+
`- Edit ${i + 1} Failed: Cannot insert hyperlinks via text replace. Use a dedicated structural operation.`
|
|
2558
|
+
);
|
|
2559
|
+
} else {
|
|
2560
|
+
errors.push(
|
|
2561
|
+
`- Edit ${i + 1} Failed: Cannot delete hyperlinks via text replace. The marker corresponds to a structural XML element.`
|
|
2562
|
+
);
|
|
2563
|
+
}
|
|
2564
|
+
} else if (t_links.length > 1 && JSON.stringify(t_links) !== JSON.stringify(n_links)) {
|
|
2565
|
+
errors.push(
|
|
2566
|
+
`- Edit ${i + 1} Failed: Can only edit or retarget one hyperlink per text replacement. Please split into multiple edits.`
|
|
2567
|
+
);
|
|
2568
|
+
}
|
|
2569
|
+
}
|
|
2570
|
+
if (t_text.includes("[~") || n_text.includes("[~")) {
|
|
2571
|
+
const t_xrefs = t_text.match(/\[~[^~]+~\]\(#[^\)]+\)/g) || [];
|
|
2572
|
+
const n_xrefs = n_text.match(/\[~[^~]+~\]\(#[^\)]+\)/g) || [];
|
|
2573
|
+
if (t_xrefs.length !== n_xrefs.length) {
|
|
2574
|
+
if (n_xrefs.length > t_xrefs.length) {
|
|
2575
|
+
errors.push(
|
|
2576
|
+
`- Edit ${i + 1} Failed: Cannot insert cross-references via text replace. Markers are read-only projections.`
|
|
2577
|
+
);
|
|
2578
|
+
} else {
|
|
2579
|
+
errors.push(
|
|
2580
|
+
`- Edit ${i + 1} Failed: Cannot delete cross-references via text replace. The marker corresponds to a structural XML element.`
|
|
2581
|
+
);
|
|
2582
|
+
}
|
|
2583
|
+
} else {
|
|
2584
|
+
if (JSON.stringify(t_xrefs) !== JSON.stringify(n_xrefs)) {
|
|
2585
|
+
errors.push(
|
|
2586
|
+
`- Edit ${i + 1} Failed: Modifying or retargeting cross-reference markers is disallowed to prevent dependency corruption.`
|
|
2587
|
+
);
|
|
2588
|
+
}
|
|
2589
|
+
}
|
|
2590
|
+
}
|
|
2591
|
+
if (t_text.includes("{#") || n_text.includes("{#")) {
|
|
2592
|
+
const t_anchors = t_text.match(/\{#[^\}]+\}/g) || [];
|
|
2593
|
+
const n_anchors = n_text.match(/\{#[^\}]+\}/g) || [];
|
|
2594
|
+
for (const a of n_anchors) {
|
|
2595
|
+
if (n_anchors.filter((x) => x === a).length > t_anchors.filter((x) => x === a).length) {
|
|
2596
|
+
errors.push(
|
|
2597
|
+
`- Edit ${i + 1} Failed: Cannot modify or insert internal anchor markers (\`{#...}\`). These represent structural XML bookmarks.`
|
|
2598
|
+
);
|
|
2599
|
+
break;
|
|
2600
|
+
}
|
|
2601
|
+
}
|
|
2602
|
+
}
|
|
2603
|
+
if (edit.type === "modify" && n_text) {
|
|
2604
|
+
const lines = n_text.split(/[\r\n]+/);
|
|
2605
|
+
for (const line of lines) {
|
|
2606
|
+
const stripped = line.trimStart();
|
|
2607
|
+
if (stripped.startsWith("#######")) {
|
|
2608
|
+
const level = stripped.length - stripped.replace(/^#+/, "").length;
|
|
2609
|
+
if (stripped.substring(level).startsWith(" ") || stripped.substring(level) === "") {
|
|
2610
|
+
errors.push(
|
|
2611
|
+
`- Edit ${i + 1} Failed: Heading level ${level} is not supported (maximum is 6).`
|
|
2612
|
+
);
|
|
2613
|
+
break;
|
|
2614
|
+
}
|
|
2615
|
+
}
|
|
2616
|
+
}
|
|
2617
|
+
}
|
|
2618
|
+
if (t_text.includes("READONLY_BOUNDARY_START") || n_text.includes("READONLY_BOUNDARY_START") || t_text.includes("# Document Structure (Read-Only)") || n_text.includes("# Document Structure (Read-Only)")) {
|
|
2619
|
+
errors.push(
|
|
2620
|
+
`- Edit ${i + 1} Failed: Modification targets the read-only boundary (Structural Appendix). This section cannot be edited.`
|
|
2621
|
+
);
|
|
2622
|
+
}
|
|
2623
|
+
}
|
|
2624
|
+
return errors;
|
|
2625
|
+
}
|
|
2626
|
+
var RedlineEngine = class {
|
|
2627
|
+
doc;
|
|
2628
|
+
author;
|
|
2629
|
+
timestamp;
|
|
2630
|
+
current_id;
|
|
2631
|
+
mapper;
|
|
2632
|
+
comments_manager;
|
|
2633
|
+
clean_mapper = null;
|
|
2634
|
+
skipped_details = [];
|
|
2635
|
+
constructor(doc, author = "Adeu AI (TS)") {
|
|
2636
|
+
this.doc = doc;
|
|
2637
|
+
this.author = author;
|
|
2638
|
+
this.timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/\.\d{3}Z$/, "Z");
|
|
2639
|
+
const w16du_ns = "http://schemas.microsoft.com/office/word/2023/wordml/word16du";
|
|
2640
|
+
for (const part of this.doc.pkg.parts) {
|
|
2641
|
+
if (part === this.doc.part || part.contentType.includes("wordprocessingml") && part.contentType.endsWith("+xml")) {
|
|
2642
|
+
if (!part._element.hasAttribute("xmlns:w16du")) {
|
|
2643
|
+
part._element.setAttribute("xmlns:w16du", w16du_ns);
|
|
2644
|
+
}
|
|
2645
|
+
}
|
|
2646
|
+
}
|
|
2647
|
+
this.current_id = this._scan_existing_ids();
|
|
2648
|
+
this.mapper = new DocumentMapper(this.doc);
|
|
2649
|
+
this.comments_manager = new CommentsManager(this.doc);
|
|
2650
|
+
}
|
|
2651
|
+
_scan_existing_ids() {
|
|
2652
|
+
let maxId = 0;
|
|
2653
|
+
for (const tag of ["w:ins", "w:del"]) {
|
|
2654
|
+
const elements = findAllDescendants(this.doc.element, tag);
|
|
2655
|
+
for (const el of elements) {
|
|
2656
|
+
const val = parseInt(el.getAttribute("w:id") || "0", 10);
|
|
2657
|
+
if (!isNaN(val) && val > maxId) maxId = val;
|
|
2658
|
+
}
|
|
2659
|
+
}
|
|
2660
|
+
return maxId;
|
|
2661
|
+
}
|
|
2662
|
+
accept_all_revisions() {
|
|
2663
|
+
const parts_to_process = [this.doc.element];
|
|
2664
|
+
for (const part of this.doc.pkg.parts) {
|
|
2665
|
+
if (part === this.doc.part) continue;
|
|
2666
|
+
if (part.contentType.includes("wordprocessingml") && part.contentType.endsWith("+xml")) {
|
|
2667
|
+
parts_to_process.push(part._element);
|
|
2668
|
+
}
|
|
2669
|
+
}
|
|
2670
|
+
for (const root_element of parts_to_process) {
|
|
2671
|
+
const insNodes = findAllDescendants(root_element, "w:ins");
|
|
2672
|
+
for (const ins of insNodes) {
|
|
2673
|
+
this._clean_wrapping_comments(ins);
|
|
2674
|
+
const parent = ins.parentNode;
|
|
2675
|
+
if (!parent) continue;
|
|
2676
|
+
if (parent.tagName === "w:trPr") {
|
|
2677
|
+
parent.removeChild(ins);
|
|
2678
|
+
continue;
|
|
2679
|
+
}
|
|
2680
|
+
while (ins.firstChild) {
|
|
2681
|
+
parent.insertBefore(ins.firstChild, ins);
|
|
2682
|
+
}
|
|
2683
|
+
parent.removeChild(ins);
|
|
2684
|
+
}
|
|
2685
|
+
const pNodes = findAllDescendants(root_element, "w:p");
|
|
2686
|
+
for (const p of pNodes) {
|
|
2687
|
+
const pPr = findChild(p, "w:pPr");
|
|
2688
|
+
if (pPr) {
|
|
2689
|
+
const rPr = findChild(pPr, "w:rPr");
|
|
2690
|
+
const delMark = rPr ? findChild(rPr, "w:del") : null;
|
|
2691
|
+
if (rPr && delMark) {
|
|
2692
|
+
let has_content = false;
|
|
2693
|
+
for (const tag of ["w:t", "w:tab", "w:br"]) {
|
|
2694
|
+
for (const child of findAllDescendants(p, tag)) {
|
|
2695
|
+
if (tag === "w:t" && !child.textContent) continue;
|
|
2696
|
+
let is_deleted = false;
|
|
2697
|
+
let curr = child.parentNode;
|
|
2698
|
+
while (curr && curr !== p) {
|
|
2699
|
+
if (curr.tagName === "w:del") {
|
|
2700
|
+
is_deleted = true;
|
|
2701
|
+
break;
|
|
2702
|
+
}
|
|
2703
|
+
curr = curr.parentNode;
|
|
2704
|
+
}
|
|
2705
|
+
if (!is_deleted) {
|
|
2706
|
+
has_content = true;
|
|
2707
|
+
break;
|
|
2708
|
+
}
|
|
2709
|
+
}
|
|
2710
|
+
if (has_content) {
|
|
2711
|
+
break;
|
|
2712
|
+
}
|
|
2713
|
+
}
|
|
2714
|
+
if (has_content) {
|
|
2715
|
+
rPr.removeChild(delMark);
|
|
2716
|
+
} else {
|
|
2717
|
+
this._clean_wrapping_comments(p);
|
|
2718
|
+
this._delete_comments_in_element(p);
|
|
2719
|
+
if (p.parentNode) {
|
|
2720
|
+
p.parentNode.removeChild(p);
|
|
2721
|
+
}
|
|
2722
|
+
}
|
|
2723
|
+
}
|
|
2724
|
+
}
|
|
2725
|
+
}
|
|
2726
|
+
const delNodes = findAllDescendants(root_element, "w:del");
|
|
2727
|
+
for (const d of delNodes) {
|
|
2728
|
+
this._clean_wrapping_comments(d);
|
|
2729
|
+
this._delete_comments_in_element(d);
|
|
2730
|
+
const parent = d.parentNode;
|
|
2731
|
+
if (parent) {
|
|
2732
|
+
if (parent.tagName === "w:trPr") {
|
|
2733
|
+
const row = parent.parentNode;
|
|
2734
|
+
if (row && row.parentNode) {
|
|
2735
|
+
row.parentNode.removeChild(row);
|
|
2736
|
+
}
|
|
2737
|
+
} else {
|
|
2738
|
+
parent.removeChild(d);
|
|
2739
|
+
}
|
|
2740
|
+
}
|
|
2741
|
+
}
|
|
2742
|
+
}
|
|
2743
|
+
const comment_ids = /* @__PURE__ */ new Set();
|
|
2744
|
+
for (const tag of [
|
|
2745
|
+
"w:commentRangeStart",
|
|
2746
|
+
"w:commentRangeEnd",
|
|
2747
|
+
"w:commentReference"
|
|
2748
|
+
]) {
|
|
2749
|
+
for (const node of findAllDescendants(this.doc.element, tag)) {
|
|
2750
|
+
const cid = node.getAttribute("w:id");
|
|
2751
|
+
if (cid) comment_ids.add(cid);
|
|
2752
|
+
}
|
|
2753
|
+
}
|
|
2754
|
+
const comments_part = this.doc.pkg.parts.find(
|
|
2755
|
+
(p) => p.contentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"
|
|
2756
|
+
);
|
|
2757
|
+
if (comments_part) {
|
|
2758
|
+
for (const c of findAllDescendants(comments_part._element, "w:comment")) {
|
|
2759
|
+
const cid = c.getAttribute("w:id");
|
|
2760
|
+
if (cid) comment_ids.add(cid);
|
|
2761
|
+
}
|
|
2762
|
+
}
|
|
2763
|
+
for (const cid of comment_ids) {
|
|
2764
|
+
this.comments_manager.deleteComment(cid);
|
|
2765
|
+
}
|
|
2766
|
+
}
|
|
2767
|
+
_getNextId() {
|
|
2768
|
+
this.current_id++;
|
|
2769
|
+
return this.current_id.toString();
|
|
2181
2770
|
}
|
|
2182
2771
|
_create_track_change_tag(tagName, author = "", reuseId = null) {
|
|
2183
2772
|
const xmlDoc = this.doc.part._element.ownerDocument;
|
|
@@ -2189,11 +2778,267 @@ var RedlineEngine = class {
|
|
|
2189
2778
|
tag.setAttribute("w16du:dateUtc", this.timestamp);
|
|
2190
2779
|
return tag;
|
|
2191
2780
|
}
|
|
2192
|
-
_set_text_content(element, text) {
|
|
2193
|
-
element.textContent = text;
|
|
2194
|
-
if (text.trim() !== text) {
|
|
2195
|
-
element.setAttribute("xml:space", "preserve");
|
|
2781
|
+
_set_text_content(element, text) {
|
|
2782
|
+
element.textContent = text;
|
|
2783
|
+
if (text.trim() !== text) {
|
|
2784
|
+
element.setAttribute("xml:space", "preserve");
|
|
2785
|
+
}
|
|
2786
|
+
}
|
|
2787
|
+
/**
|
|
2788
|
+
* Attaches a comment that wraps a contiguous range within a single paragraph.
|
|
2789
|
+
* start_element and end_element must both be direct children of parent_element
|
|
2790
|
+
* and start_element must come before (or equal) end_element in document order.
|
|
2791
|
+
* Ported from Python `RedlineEngine._attach_comment`.
|
|
2792
|
+
*/
|
|
2793
|
+
_attach_comment(parent_element, start_element, end_element, text) {
|
|
2794
|
+
if (!text) return;
|
|
2795
|
+
const comment_id = this.comments_manager.addComment(this.author, text);
|
|
2796
|
+
const xmlDoc = parent_element.ownerDocument;
|
|
2797
|
+
const range_start = xmlDoc.createElement("w:commentRangeStart");
|
|
2798
|
+
range_start.setAttribute("w:id", comment_id);
|
|
2799
|
+
const range_end = xmlDoc.createElement("w:commentRangeEnd");
|
|
2800
|
+
range_end.setAttribute("w:id", comment_id);
|
|
2801
|
+
const ref_run = xmlDoc.createElement("w:r");
|
|
2802
|
+
const rPr = xmlDoc.createElement("w:rPr");
|
|
2803
|
+
const rStyle = xmlDoc.createElement("w:rStyle");
|
|
2804
|
+
rStyle.setAttribute("w:val", "CommentReference");
|
|
2805
|
+
rPr.appendChild(rStyle);
|
|
2806
|
+
ref_run.appendChild(rPr);
|
|
2807
|
+
const ref = xmlDoc.createElement("w:commentReference");
|
|
2808
|
+
ref.setAttribute("w:id", comment_id);
|
|
2809
|
+
ref_run.appendChild(ref);
|
|
2810
|
+
parent_element.insertBefore(range_start, start_element);
|
|
2811
|
+
const after_end = end_element.nextSibling;
|
|
2812
|
+
if (after_end) {
|
|
2813
|
+
parent_element.insertBefore(range_end, after_end);
|
|
2814
|
+
parent_element.insertBefore(ref_run, range_end.nextSibling);
|
|
2815
|
+
} else {
|
|
2816
|
+
parent_element.appendChild(range_end);
|
|
2817
|
+
parent_element.appendChild(ref_run);
|
|
2818
|
+
}
|
|
2819
|
+
}
|
|
2820
|
+
/**
|
|
2821
|
+
* Attaches a comment that spans across two different paragraphs (or other block
|
|
2822
|
+
* containers). start_element lives inside start_p, end_element lives inside end_p,
|
|
2823
|
+
* and the comment is open from start_element through end_element.
|
|
2824
|
+
* Ported from Python `RedlineEngine._attach_comment_spanning`.
|
|
2825
|
+
*/
|
|
2826
|
+
_attach_comment_spanning(start_p, start_el, end_p, end_el, text) {
|
|
2827
|
+
if (!text) return;
|
|
2828
|
+
const comment_id = this.comments_manager.addComment(this.author, text);
|
|
2829
|
+
const xmlDocStart = start_p.ownerDocument;
|
|
2830
|
+
const xmlDocEnd = end_p.ownerDocument;
|
|
2831
|
+
const range_start = xmlDocStart.createElement("w:commentRangeStart");
|
|
2832
|
+
range_start.setAttribute("w:id", comment_id);
|
|
2833
|
+
const range_end = xmlDocEnd.createElement("w:commentRangeEnd");
|
|
2834
|
+
range_end.setAttribute("w:id", comment_id);
|
|
2835
|
+
const ref_run = xmlDocEnd.createElement("w:r");
|
|
2836
|
+
const rPr = xmlDocEnd.createElement("w:rPr");
|
|
2837
|
+
const rStyle = xmlDocEnd.createElement("w:rStyle");
|
|
2838
|
+
rStyle.setAttribute("w:val", "CommentReference");
|
|
2839
|
+
rPr.appendChild(rStyle);
|
|
2840
|
+
ref_run.appendChild(rPr);
|
|
2841
|
+
const ref = xmlDocEnd.createElement("w:commentReference");
|
|
2842
|
+
ref.setAttribute("w:id", comment_id);
|
|
2843
|
+
ref_run.appendChild(ref);
|
|
2844
|
+
start_p.insertBefore(range_start, start_el);
|
|
2845
|
+
const after_end = end_el.nextSibling;
|
|
2846
|
+
if (after_end) {
|
|
2847
|
+
end_p.insertBefore(range_end, after_end);
|
|
2848
|
+
end_p.insertBefore(ref_run, range_end.nextSibling);
|
|
2849
|
+
} else {
|
|
2850
|
+
end_p.appendChild(range_end);
|
|
2851
|
+
end_p.appendChild(ref_run);
|
|
2852
|
+
}
|
|
2853
|
+
}
|
|
2854
|
+
/**
|
|
2855
|
+
* Inserts `text` as one or more tracked paragraphs anchored relative to
|
|
2856
|
+
* either an existing run or a paragraph. Returns:
|
|
2857
|
+
* { first_node, last_p, last_ins, used_block_mode }
|
|
2858
|
+
* where:
|
|
2859
|
+
* - first_node: the first <w:ins> (for inline mode) OR the first new <w:p>
|
|
2860
|
+
* (for block mode). The caller uses this for splicing into the DOM and
|
|
2861
|
+
* for anchoring comments.
|
|
2862
|
+
* - last_p: the last new <w:p> created, if any. null when entirely inline.
|
|
2863
|
+
* - last_ins: the last <w:ins> created (inside the last new <w:p>, or the
|
|
2864
|
+
* sole inline ins). Used as the comment's end anchor.
|
|
2865
|
+
* - used_block_mode: true when the first line carried a heading/list style
|
|
2866
|
+
* marker and we created a new paragraph for it (rather than inlining it).
|
|
2867
|
+
*
|
|
2868
|
+
* Multi-paragraph rules (only when text contains '\n'):
|
|
2869
|
+
* - Each additional line becomes a new <w:p>, inserted after the anchor
|
|
2870
|
+
* paragraph in document order.
|
|
2871
|
+
* - Each new <w:p> gets a copy of the anchor paragraph's <w:pPr> (so list
|
|
2872
|
+
* numbering / indentation are preserved) unless the line itself starts
|
|
2873
|
+
* with a markdown heading or list marker, which overrides the style.
|
|
2874
|
+
* - Each new <w:p> carries a tracked paragraph-break marker
|
|
2875
|
+
* (<w:pPr><w:rPr><w:ins/></w:rPr></w:pPr>) so Word natively tracks the
|
|
2876
|
+
* paragraph break.
|
|
2877
|
+
* - Each new <w:p>'s content is wrapped in a <w:ins>, with inline bold/
|
|
2878
|
+
* italic markdown parsed via _parse_inline_markdown.
|
|
2879
|
+
*
|
|
2880
|
+
* The first line:
|
|
2881
|
+
* - If it carries a heading / list marker AND we have a paragraph anchor,
|
|
2882
|
+
* we drop into "block mode": no inline <w:ins>; the first line itself
|
|
2883
|
+
* becomes the first new <w:p>.
|
|
2884
|
+
* - Otherwise we emit a single inline <w:ins> for the first line (current
|
|
2885
|
+
* behaviour) and treat the remaining lines as block extensions.
|
|
2886
|
+
*
|
|
2887
|
+
* Does NOT attach comments; callers handle that.
|
|
2888
|
+
*/
|
|
2889
|
+
_track_insert_multiline(text, anchor_run, anchor_paragraph, reuse_id) {
|
|
2890
|
+
if (!text) {
|
|
2891
|
+
return {
|
|
2892
|
+
first_node: null,
|
|
2893
|
+
last_p: null,
|
|
2894
|
+
last_ins: null,
|
|
2895
|
+
used_block_mode: false
|
|
2896
|
+
};
|
|
2897
|
+
}
|
|
2898
|
+
const xmlDoc = this.doc.part._element.ownerDocument;
|
|
2899
|
+
const lines = text.split(/[\r\n]+/);
|
|
2900
|
+
let current_p = null;
|
|
2901
|
+
if (anchor_paragraph !== null) {
|
|
2902
|
+
current_p = anchor_paragraph._element;
|
|
2903
|
+
} else if (anchor_run !== null) {
|
|
2904
|
+
let walker = anchor_run._element;
|
|
2905
|
+
while (walker && walker.tagName !== "w:p") {
|
|
2906
|
+
walker = walker.parentNode;
|
|
2907
|
+
}
|
|
2908
|
+
current_p = walker;
|
|
2909
|
+
}
|
|
2910
|
+
while (lines.length > 1 && lines[lines.length - 1] === "") {
|
|
2911
|
+
lines.pop();
|
|
2912
|
+
}
|
|
2913
|
+
if (lines.length === 0) {
|
|
2914
|
+
return {
|
|
2915
|
+
first_node: null,
|
|
2916
|
+
last_p: null,
|
|
2917
|
+
last_ins: null,
|
|
2918
|
+
used_block_mode: false
|
|
2919
|
+
};
|
|
2920
|
+
}
|
|
2921
|
+
const [first_clean, first_style] = this._parse_markdown_style(lines[0]);
|
|
2922
|
+
const have_paragraph_context = current_p !== null;
|
|
2923
|
+
const block_mode = first_style !== null && have_paragraph_context;
|
|
2924
|
+
let first_node = null;
|
|
2925
|
+
let inline_ins = null;
|
|
2926
|
+
if (!block_mode) {
|
|
2927
|
+
inline_ins = this._build_tracked_ins_for_line(
|
|
2928
|
+
first_clean === lines[0] ? lines[0] : lines[0],
|
|
2929
|
+
anchor_run,
|
|
2930
|
+
reuse_id,
|
|
2931
|
+
xmlDoc
|
|
2932
|
+
);
|
|
2933
|
+
first_node = inline_ins;
|
|
2934
|
+
}
|
|
2935
|
+
const remaining_lines = block_mode ? lines : lines.slice(1);
|
|
2936
|
+
if (remaining_lines.length === 0) {
|
|
2937
|
+
return {
|
|
2938
|
+
first_node,
|
|
2939
|
+
last_p: null,
|
|
2940
|
+
last_ins: inline_ins,
|
|
2941
|
+
used_block_mode: false
|
|
2942
|
+
};
|
|
2943
|
+
}
|
|
2944
|
+
if (!current_p) {
|
|
2945
|
+
return {
|
|
2946
|
+
first_node,
|
|
2947
|
+
last_p: null,
|
|
2948
|
+
last_ins: inline_ins,
|
|
2949
|
+
used_block_mode: false
|
|
2950
|
+
};
|
|
2951
|
+
}
|
|
2952
|
+
const parent_body = current_p.parentNode;
|
|
2953
|
+
if (!parent_body) {
|
|
2954
|
+
return {
|
|
2955
|
+
first_node,
|
|
2956
|
+
last_p: null,
|
|
2957
|
+
last_ins: inline_ins,
|
|
2958
|
+
used_block_mode: false
|
|
2959
|
+
};
|
|
2960
|
+
}
|
|
2961
|
+
const insertAfterEl = (newNode, ref) => {
|
|
2962
|
+
parent_body.insertBefore(newNode, ref.nextSibling);
|
|
2963
|
+
};
|
|
2964
|
+
let last_p = null;
|
|
2965
|
+
let last_ins = null;
|
|
2966
|
+
let after = current_p;
|
|
2967
|
+
for (let i = 0; i < remaining_lines.length; i++) {
|
|
2968
|
+
const raw_line = remaining_lines[i];
|
|
2969
|
+
const [clean_text, style_name] = this._parse_markdown_style(raw_line);
|
|
2970
|
+
const new_p = xmlDoc.createElement("w:p");
|
|
2971
|
+
if (style_name) {
|
|
2972
|
+
this._set_paragraph_style(new_p, style_name);
|
|
2973
|
+
} else {
|
|
2974
|
+
const existing_pPr = findChild(current_p, "w:pPr");
|
|
2975
|
+
if (existing_pPr) {
|
|
2976
|
+
new_p.appendChild(existing_pPr.cloneNode(true));
|
|
2977
|
+
}
|
|
2978
|
+
}
|
|
2979
|
+
let pPr = findChild(new_p, "w:pPr");
|
|
2980
|
+
if (!pPr) {
|
|
2981
|
+
pPr = xmlDoc.createElement("w:pPr");
|
|
2982
|
+
new_p.insertBefore(pPr, new_p.firstChild);
|
|
2983
|
+
}
|
|
2984
|
+
let rPr = findChild(pPr, "w:rPr");
|
|
2985
|
+
if (!rPr) {
|
|
2986
|
+
rPr = xmlDoc.createElement("w:rPr");
|
|
2987
|
+
pPr.appendChild(rPr);
|
|
2988
|
+
}
|
|
2989
|
+
const ins_mark = this._create_track_change_tag("w:ins", "", reuse_id);
|
|
2990
|
+
rPr.appendChild(ins_mark);
|
|
2991
|
+
const content_ins = this._build_tracked_ins_for_line(
|
|
2992
|
+
clean_text,
|
|
2993
|
+
anchor_run,
|
|
2994
|
+
reuse_id,
|
|
2995
|
+
xmlDoc
|
|
2996
|
+
);
|
|
2997
|
+
if (content_ins) {
|
|
2998
|
+
new_p.appendChild(content_ins);
|
|
2999
|
+
}
|
|
3000
|
+
insertAfterEl(new_p, after);
|
|
3001
|
+
after = new_p;
|
|
3002
|
+
last_p = new_p;
|
|
3003
|
+
last_ins = content_ins;
|
|
3004
|
+
if (!first_node) {
|
|
3005
|
+
first_node = new_p;
|
|
3006
|
+
}
|
|
3007
|
+
}
|
|
3008
|
+
return { first_node, last_p, last_ins, used_block_mode: block_mode };
|
|
3009
|
+
}
|
|
3010
|
+
/**
|
|
3011
|
+
* Builds a single tracked-insert wrapper (<w:ins>) containing one or more
|
|
3012
|
+
* <w:r> elements representing the inline markdown segments of `line_text`.
|
|
3013
|
+
* Returns null if line_text is empty.
|
|
3014
|
+
*/
|
|
3015
|
+
_build_tracked_ins_for_line(line_text, anchor_run, reuse_id, xmlDoc) {
|
|
3016
|
+
if (!line_text && line_text !== "") return null;
|
|
3017
|
+
const ins = this._create_track_change_tag("w:ins", "", reuse_id);
|
|
3018
|
+
const segments = this._parse_inline_markdown(line_text);
|
|
3019
|
+
if (segments.length === 0) {
|
|
3020
|
+
return null;
|
|
3021
|
+
}
|
|
3022
|
+
for (const [segText, segProps] of segments) {
|
|
3023
|
+
const r = xmlDoc.createElement("w:r");
|
|
3024
|
+
if (anchor_run && anchor_run._element) {
|
|
3025
|
+
const anchor_rPr = findChild(anchor_run._element, "w:rPr");
|
|
3026
|
+
if (anchor_rPr) {
|
|
3027
|
+
const clone = anchor_rPr.cloneNode(true);
|
|
3028
|
+
for (const tag of ["w:vanish", "w:strike", "w:dstrike"]) {
|
|
3029
|
+
const found = findChild(clone, tag);
|
|
3030
|
+
if (found) clone.removeChild(found);
|
|
3031
|
+
}
|
|
3032
|
+
r.appendChild(clone);
|
|
3033
|
+
}
|
|
3034
|
+
}
|
|
3035
|
+
this._apply_run_props(r, segProps, false);
|
|
3036
|
+
const t = xmlDoc.createElement("w:t");
|
|
3037
|
+
this._set_text_content(t, segText);
|
|
3038
|
+
r.appendChild(t);
|
|
3039
|
+
ins.appendChild(r);
|
|
2196
3040
|
}
|
|
3041
|
+
return ins;
|
|
2197
3042
|
}
|
|
2198
3043
|
_parse_markdown_style(text) {
|
|
2199
3044
|
const stripped_text = text.trimStart();
|
|
@@ -2273,6 +3118,172 @@ var RedlineEngine = class {
|
|
|
2273
3118
|
}
|
|
2274
3119
|
}
|
|
2275
3120
|
}
|
|
3121
|
+
/**
|
|
3122
|
+
* Replaces (or creates) a paragraph's <w:pPr> with a single <w:pStyle> entry
|
|
3123
|
+
* pointing at `style_name`. Strips any existing pPr to avoid layering a new
|
|
3124
|
+
* heading style on top of a previous list/heading configuration.
|
|
3125
|
+
*
|
|
3126
|
+
* In Python, the style id is resolved via doc.styles[style_name].style_id and
|
|
3127
|
+
* falls back to stripping spaces. Node has no equivalent style cache exposed
|
|
3128
|
+
* on `doc`, so we always use the simple "strip spaces" fallback: "Heading 1"
|
|
3129
|
+
* becomes the style id "Heading1", "List Number" becomes "ListNumber", etc.
|
|
3130
|
+
* This matches python-docx's default style-id convention for the built-in
|
|
3131
|
+
* paragraph styles and is what Word writes by default.
|
|
3132
|
+
*/
|
|
3133
|
+
_set_paragraph_style(p_element, style_name) {
|
|
3134
|
+
const xmlDoc = p_element.ownerDocument;
|
|
3135
|
+
const existing_pPr = findChild(p_element, "w:pPr");
|
|
3136
|
+
if (existing_pPr) {
|
|
3137
|
+
p_element.removeChild(existing_pPr);
|
|
3138
|
+
}
|
|
3139
|
+
const pPr = xmlDoc.createElement("w:pPr");
|
|
3140
|
+
const pStyle = xmlDoc.createElement("w:pStyle");
|
|
3141
|
+
const style_id = style_name.replace(/\s+/g, "");
|
|
3142
|
+
pStyle.setAttribute("w:val", style_id);
|
|
3143
|
+
pPr.appendChild(pStyle);
|
|
3144
|
+
p_element.insertBefore(pPr, p_element.firstChild);
|
|
3145
|
+
}
|
|
3146
|
+
_anchor_reply_comment(parent_id, new_id) {
|
|
3147
|
+
const docEl = this.doc.part._element.ownerDocument;
|
|
3148
|
+
const starts = findAllDescendants(
|
|
3149
|
+
this.doc.element,
|
|
3150
|
+
"w:commentRangeStart"
|
|
3151
|
+
).filter((n) => n.getAttribute("w:id") === parent_id);
|
|
3152
|
+
if (starts.length === 0) return;
|
|
3153
|
+
const parent_start = starts[0];
|
|
3154
|
+
const new_start = docEl.createElement("w:commentRangeStart");
|
|
3155
|
+
new_start.setAttribute("w:id", new_id);
|
|
3156
|
+
insertAfter(new_start, parent_start);
|
|
3157
|
+
const ends = findAllDescendants(
|
|
3158
|
+
this.doc.element,
|
|
3159
|
+
"w:commentRangeEnd"
|
|
3160
|
+
).filter((n) => n.getAttribute("w:id") === parent_id);
|
|
3161
|
+
if (ends.length === 0) return;
|
|
3162
|
+
const parent_end = ends[0];
|
|
3163
|
+
const parent_refs = findAllDescendants(
|
|
3164
|
+
this.doc.element,
|
|
3165
|
+
"w:commentReference"
|
|
3166
|
+
).filter((n) => n.getAttribute("w:id") === parent_id);
|
|
3167
|
+
let insertion_point = parent_end;
|
|
3168
|
+
if (parent_refs.length > 0) {
|
|
3169
|
+
const ref_el = parent_refs[0];
|
|
3170
|
+
if (ref_el.parentNode && ref_el.parentNode.tagName === "w:r") {
|
|
3171
|
+
insertion_point = ref_el.parentNode;
|
|
3172
|
+
}
|
|
3173
|
+
}
|
|
3174
|
+
const new_end = docEl.createElement("w:commentRangeEnd");
|
|
3175
|
+
new_end.setAttribute("w:id", new_id);
|
|
3176
|
+
insertAfter(new_end, insertion_point);
|
|
3177
|
+
const ref_run = docEl.createElement("w:r");
|
|
3178
|
+
const rPr = docEl.createElement("w:rPr");
|
|
3179
|
+
const rStyle = docEl.createElement("w:rStyle");
|
|
3180
|
+
rStyle.setAttribute("w:val", "CommentReference");
|
|
3181
|
+
rPr.appendChild(rStyle);
|
|
3182
|
+
ref_run.appendChild(rPr);
|
|
3183
|
+
const ref = docEl.createElement("w:commentReference");
|
|
3184
|
+
ref.setAttribute("w:id", new_id);
|
|
3185
|
+
ref_run.appendChild(ref);
|
|
3186
|
+
insertAfter(ref_run, new_end);
|
|
3187
|
+
}
|
|
3188
|
+
_clean_wrapping_comments(element) {
|
|
3189
|
+
let first_node = element;
|
|
3190
|
+
while (true) {
|
|
3191
|
+
const prev2 = getPreviousElement(first_node);
|
|
3192
|
+
if (prev2 && (prev2.tagName === "w:ins" || prev2.tagName === "w:del")) {
|
|
3193
|
+
first_node = prev2;
|
|
3194
|
+
} else {
|
|
3195
|
+
break;
|
|
3196
|
+
}
|
|
3197
|
+
}
|
|
3198
|
+
let last_node = element;
|
|
3199
|
+
while (true) {
|
|
3200
|
+
const nxt2 = getNextElement(last_node);
|
|
3201
|
+
if (nxt2 && (nxt2.tagName === "w:ins" || nxt2.tagName === "w:del")) {
|
|
3202
|
+
last_node = nxt2;
|
|
3203
|
+
} else {
|
|
3204
|
+
break;
|
|
3205
|
+
}
|
|
3206
|
+
}
|
|
3207
|
+
const starts_to_remove = [];
|
|
3208
|
+
let prev = getPreviousElement(first_node);
|
|
3209
|
+
while (prev) {
|
|
3210
|
+
if (prev.tagName === "w:commentRangeStart") {
|
|
3211
|
+
starts_to_remove.push(prev);
|
|
3212
|
+
prev = getPreviousElement(prev);
|
|
3213
|
+
} else if (prev.tagName === "w:rPr" || prev.tagName === "w:pPr") {
|
|
3214
|
+
prev = getPreviousElement(prev);
|
|
3215
|
+
} else {
|
|
3216
|
+
break;
|
|
3217
|
+
}
|
|
3218
|
+
}
|
|
3219
|
+
const ends_to_remove = [];
|
|
3220
|
+
let nxt = getNextElement(last_node);
|
|
3221
|
+
while (nxt) {
|
|
3222
|
+
if (nxt.tagName === "w:commentRangeEnd") {
|
|
3223
|
+
ends_to_remove.push(nxt);
|
|
3224
|
+
nxt = getNextElement(nxt);
|
|
3225
|
+
} else if (nxt.tagName === "w:r" && findAllDescendants(nxt, "w:commentReference").length > 0) {
|
|
3226
|
+
ends_to_remove.push(nxt);
|
|
3227
|
+
nxt = getNextElement(nxt);
|
|
3228
|
+
} else if (nxt.tagName === "w:commentReference") {
|
|
3229
|
+
ends_to_remove.push(nxt);
|
|
3230
|
+
nxt = getNextElement(nxt);
|
|
3231
|
+
} else {
|
|
3232
|
+
break;
|
|
3233
|
+
}
|
|
3234
|
+
}
|
|
3235
|
+
const end_ids = /* @__PURE__ */ new Set();
|
|
3236
|
+
for (const e of ends_to_remove) {
|
|
3237
|
+
if (e.tagName === "w:commentRangeEnd") {
|
|
3238
|
+
const eid = e.getAttribute("w:id");
|
|
3239
|
+
if (eid) end_ids.add(eid);
|
|
3240
|
+
} else {
|
|
3241
|
+
let ref = findAllDescendants(e, "w:commentReference")[0];
|
|
3242
|
+
if (!ref && e.tagName === "w:commentReference") ref = e;
|
|
3243
|
+
if (ref) {
|
|
3244
|
+
const eid = ref.getAttribute("w:id");
|
|
3245
|
+
if (eid) end_ids.add(eid);
|
|
3246
|
+
}
|
|
3247
|
+
}
|
|
3248
|
+
}
|
|
3249
|
+
for (const s of starts_to_remove) {
|
|
3250
|
+
const c_id = s.getAttribute("w:id");
|
|
3251
|
+
if (c_id && end_ids.has(c_id)) {
|
|
3252
|
+
this.comments_manager.deleteComment(c_id);
|
|
3253
|
+
if (s.parentNode) s.parentNode.removeChild(s);
|
|
3254
|
+
for (const e of ends_to_remove) {
|
|
3255
|
+
let e_id = null;
|
|
3256
|
+
if (e.tagName === "w:commentRangeEnd") {
|
|
3257
|
+
e_id = e.getAttribute("w:id");
|
|
3258
|
+
} else {
|
|
3259
|
+
let ref = findAllDescendants(e, "w:commentReference")[0];
|
|
3260
|
+
if (!ref && e.tagName === "w:commentReference") ref = e;
|
|
3261
|
+
if (ref) e_id = ref.getAttribute("w:id");
|
|
3262
|
+
}
|
|
3263
|
+
if (e_id === c_id && e.parentNode) {
|
|
3264
|
+
e.parentNode.removeChild(e);
|
|
3265
|
+
}
|
|
3266
|
+
}
|
|
3267
|
+
}
|
|
3268
|
+
}
|
|
3269
|
+
}
|
|
3270
|
+
_delete_comments_in_element(element) {
|
|
3271
|
+
const refs = findAllDescendants(element, "w:commentReference");
|
|
3272
|
+
for (const ref of refs) {
|
|
3273
|
+
const c_id = ref.getAttribute("w:id");
|
|
3274
|
+
if (c_id) {
|
|
3275
|
+
this.comments_manager.deleteComment(c_id);
|
|
3276
|
+
for (const tag of ["w:commentRangeStart", "w:commentRangeEnd"]) {
|
|
3277
|
+
const nodes = findAllDescendants(this.doc.element, tag);
|
|
3278
|
+
for (const node of nodes) {
|
|
3279
|
+
if (node.getAttribute("w:id") === c_id && node.parentNode) {
|
|
3280
|
+
node.parentNode.removeChild(node);
|
|
3281
|
+
}
|
|
3282
|
+
}
|
|
3283
|
+
}
|
|
3284
|
+
}
|
|
3285
|
+
}
|
|
3286
|
+
}
|
|
2276
3287
|
validate_edits(edits) {
|
|
2277
3288
|
const errors = [];
|
|
2278
3289
|
if (!this.mapper.full_text) this.mapper["_build_map"]();
|
|
@@ -2283,23 +3294,41 @@ var RedlineEngine = class {
|
|
|
2283
3294
|
let matches = this.mapper.find_all_match_indices(edit.target_text);
|
|
2284
3295
|
let activeText = this.mapper.full_text;
|
|
2285
3296
|
if (matches.length === 0) {
|
|
2286
|
-
if (!this.clean_mapper)
|
|
3297
|
+
if (!this.clean_mapper)
|
|
3298
|
+
this.clean_mapper = new DocumentMapper(this.doc, true);
|
|
2287
3299
|
matches = this.clean_mapper.find_all_match_indices(edit.target_text);
|
|
2288
3300
|
if (matches.length > 0) activeText = this.clean_mapper.full_text;
|
|
2289
3301
|
}
|
|
2290
3302
|
if (matches.length === 0) {
|
|
2291
|
-
errors.push(
|
|
2292
|
-
|
|
3303
|
+
errors.push(
|
|
3304
|
+
`- Edit ${i + 1} Failed: Target text not found in document:
|
|
3305
|
+
"${edit.target_text}"`
|
|
3306
|
+
);
|
|
2293
3307
|
} else if (matches.length > 1) {
|
|
2294
|
-
|
|
2295
|
-
|
|
3308
|
+
const positions = matches.map(([start, length]) => [
|
|
3309
|
+
start,
|
|
3310
|
+
start + length
|
|
3311
|
+
]);
|
|
3312
|
+
errors.push(
|
|
3313
|
+
format_ambiguity_error(
|
|
3314
|
+
i + 1,
|
|
3315
|
+
edit.target_text,
|
|
3316
|
+
activeText,
|
|
3317
|
+
positions
|
|
3318
|
+
)
|
|
3319
|
+
);
|
|
2296
3320
|
}
|
|
2297
3321
|
for (const [start, length] of matches) {
|
|
2298
|
-
const spans = this.mapper.spans.filter(
|
|
3322
|
+
const spans = this.mapper.spans.filter(
|
|
3323
|
+
(s) => s.end > start && s.start < start + length
|
|
3324
|
+
);
|
|
2299
3325
|
const nestedAuthors = /* @__PURE__ */ new Set();
|
|
2300
3326
|
for (const s of spans) {
|
|
2301
3327
|
if (s.ins_id) {
|
|
2302
|
-
const insNodes = findAllDescendants(
|
|
3328
|
+
const insNodes = findAllDescendants(
|
|
3329
|
+
this.doc.element,
|
|
3330
|
+
"w:ins"
|
|
3331
|
+
).filter((n) => n.getAttribute("w:id") === s.ins_id);
|
|
2303
3332
|
if (insNodes.length > 0) {
|
|
2304
3333
|
const auth = insNodes[0].getAttribute("w:author");
|
|
2305
3334
|
if (auth && auth !== this.author) nestedAuthors.add(auth);
|
|
@@ -2307,7 +3336,46 @@ Provide more context.`);
|
|
|
2307
3336
|
}
|
|
2308
3337
|
}
|
|
2309
3338
|
if (nestedAuthors.size > 0) {
|
|
2310
|
-
errors.push(
|
|
3339
|
+
errors.push(
|
|
3340
|
+
`- Edit ${i + 1} Failed: Modification targets an active insertion from another author (${Array.from(nestedAuthors).join(", ")}). Accept that change first or scope your edit outside of it.`
|
|
3341
|
+
);
|
|
3342
|
+
}
|
|
3343
|
+
}
|
|
3344
|
+
}
|
|
3345
|
+
return errors;
|
|
3346
|
+
}
|
|
3347
|
+
validate_review_actions(actions) {
|
|
3348
|
+
const errors = [];
|
|
3349
|
+
for (let i = 0; i < actions.length; i++) {
|
|
3350
|
+
const action = actions[i];
|
|
3351
|
+
const type = action.type;
|
|
3352
|
+
if (type === "reply") {
|
|
3353
|
+
const cid = action.target_id.replace("Com:", "");
|
|
3354
|
+
let found = false;
|
|
3355
|
+
const part = this.doc.pkg.parts.find(
|
|
3356
|
+
(p) => p.contentType === "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"
|
|
3357
|
+
);
|
|
3358
|
+
if (part) {
|
|
3359
|
+
const comments = findAllDescendants(part._element, "w:comment");
|
|
3360
|
+
found = comments.some((c) => c.getAttribute("w:id") === cid);
|
|
3361
|
+
}
|
|
3362
|
+
if (!found) {
|
|
3363
|
+
errors.push(
|
|
3364
|
+
`- Action ${i + 1} Failed: Target comment ID ${action.target_id} not found.`
|
|
3365
|
+
);
|
|
3366
|
+
}
|
|
3367
|
+
} else if (type === "accept" || type === "reject") {
|
|
3368
|
+
const target_id = action.target_id.replace("Chg:", "");
|
|
3369
|
+
const all_ins = findAllDescendants(this.doc.element, "w:ins").filter(
|
|
3370
|
+
(n) => n.getAttribute("w:id") === target_id
|
|
3371
|
+
);
|
|
3372
|
+
const all_del = findAllDescendants(this.doc.element, "w:del").filter(
|
|
3373
|
+
(n) => n.getAttribute("w:id") === target_id
|
|
3374
|
+
);
|
|
3375
|
+
if (all_ins.length === 0 && all_del.length === 0) {
|
|
3376
|
+
errors.push(
|
|
3377
|
+
`- Action ${i + 1} Failed: Target ID ${action.target_id} not found.`
|
|
3378
|
+
);
|
|
2311
3379
|
}
|
|
2312
3380
|
}
|
|
2313
3381
|
}
|
|
@@ -2315,8 +3383,22 @@ Provide more context.`);
|
|
|
2315
3383
|
}
|
|
2316
3384
|
process_batch(changes) {
|
|
2317
3385
|
this.skipped_details = [];
|
|
2318
|
-
const actions = changes.filter(
|
|
2319
|
-
|
|
3386
|
+
const actions = changes.filter(
|
|
3387
|
+
(c) => ["accept", "reject", "reply"].includes(c.type)
|
|
3388
|
+
);
|
|
3389
|
+
const edits = changes.filter(
|
|
3390
|
+
(c) => !["accept", "reject", "reply"].includes(c.type)
|
|
3391
|
+
);
|
|
3392
|
+
const all_errors = [];
|
|
3393
|
+
if (actions.length > 0) {
|
|
3394
|
+
all_errors.push(...this.validate_review_actions(actions));
|
|
3395
|
+
}
|
|
3396
|
+
if (edits.length > 0) {
|
|
3397
|
+
all_errors.push(...this.validate_edits(edits));
|
|
3398
|
+
}
|
|
3399
|
+
if (all_errors.length > 0) {
|
|
3400
|
+
throw new BatchValidationError(all_errors);
|
|
3401
|
+
}
|
|
2320
3402
|
let applied_actions = 0, skipped_actions = 0;
|
|
2321
3403
|
if (actions.length > 0) {
|
|
2322
3404
|
const res = this.apply_review_actions(actions);
|
|
@@ -2327,10 +3409,6 @@ Provide more context.`);
|
|
|
2327
3409
|
if (this.clean_mapper) this.clean_mapper["_build_map"]();
|
|
2328
3410
|
}
|
|
2329
3411
|
}
|
|
2330
|
-
if (edits.length > 0) {
|
|
2331
|
-
const errors = this.validate_edits(edits);
|
|
2332
|
-
if (errors.length > 0) throw new BatchValidationError(errors);
|
|
2333
|
-
}
|
|
2334
3412
|
let applied_edits = 0, skipped_edits = 0;
|
|
2335
3413
|
if (edits.length > 0) {
|
|
2336
3414
|
const res = this.apply_edits(edits);
|
|
@@ -2359,7 +3437,9 @@ Provide more context.`);
|
|
|
2359
3437
|
resolved_edits.push([edit, null]);
|
|
2360
3438
|
} else {
|
|
2361
3439
|
skipped++;
|
|
2362
|
-
this.skipped_details.push(
|
|
3440
|
+
this.skipped_details.push(
|
|
3441
|
+
`- Failed to locate row target: '${(edit.target_text || "").substring(0, 40)}...'`
|
|
3442
|
+
);
|
|
2363
3443
|
}
|
|
2364
3444
|
} else {
|
|
2365
3445
|
const resolved = this._pre_resolve_heuristic_edit(edit);
|
|
@@ -2371,19 +3451,27 @@ Provide more context.`);
|
|
|
2371
3451
|
}
|
|
2372
3452
|
} else {
|
|
2373
3453
|
skipped++;
|
|
2374
|
-
this.skipped_details.push(
|
|
3454
|
+
this.skipped_details.push(
|
|
3455
|
+
`- Failed to apply edit targeting: '${(edit.target_text || "insertion").substring(0, 40)}...'`
|
|
3456
|
+
);
|
|
2375
3457
|
}
|
|
2376
3458
|
}
|
|
2377
3459
|
}
|
|
2378
|
-
resolved_edits.sort(
|
|
3460
|
+
resolved_edits.sort(
|
|
3461
|
+
(a, b) => (b[0]._match_start_index || 0) - (a[0]._match_start_index || 0)
|
|
3462
|
+
);
|
|
2379
3463
|
const occupied_ranges = [];
|
|
2380
3464
|
for (const [edit, orig_new] of resolved_edits) {
|
|
2381
3465
|
const start = edit._match_start_index || 0;
|
|
2382
3466
|
const end = start + (edit.target_text ? edit.target_text.length : 0);
|
|
2383
|
-
const overlaps = occupied_ranges.some(
|
|
3467
|
+
const overlaps = occupied_ranges.some(
|
|
3468
|
+
([occ_start, occ_end]) => start < occ_end && end > occ_start
|
|
3469
|
+
);
|
|
2384
3470
|
if (overlaps) {
|
|
2385
3471
|
skipped++;
|
|
2386
|
-
this.skipped_details.push(
|
|
3472
|
+
this.skipped_details.push(
|
|
3473
|
+
`- Skipped overlapping edit targeting: '${(edit.target_text || "insertion").substring(0, 40)}...'`
|
|
3474
|
+
);
|
|
2387
3475
|
continue;
|
|
2388
3476
|
}
|
|
2389
3477
|
let success = false;
|
|
@@ -2397,7 +3485,9 @@ Provide more context.`);
|
|
|
2397
3485
|
occupied_ranges.push([start, end]);
|
|
2398
3486
|
} else {
|
|
2399
3487
|
skipped++;
|
|
2400
|
-
this.skipped_details.push(
|
|
3488
|
+
this.skipped_details.push(
|
|
3489
|
+
`- Failed to apply edit targeting: '${(edit.target_text || "insertion").substring(0, 40)}...'`
|
|
3490
|
+
);
|
|
2401
3491
|
}
|
|
2402
3492
|
}
|
|
2403
3493
|
return [applied, skipped];
|
|
@@ -2409,17 +3499,28 @@ Provide more context.`);
|
|
|
2409
3499
|
const type = action.type;
|
|
2410
3500
|
if (type === "reply") {
|
|
2411
3501
|
const cid = action.target_id.replace("Com:", "");
|
|
2412
|
-
this.comments_manager.addComment(
|
|
3502
|
+
const new_id = this.comments_manager.addComment(
|
|
3503
|
+
this.author,
|
|
3504
|
+
action.text,
|
|
3505
|
+
cid
|
|
3506
|
+
);
|
|
3507
|
+
this._anchor_reply_comment(cid, new_id);
|
|
2413
3508
|
applied++;
|
|
2414
3509
|
continue;
|
|
2415
3510
|
}
|
|
2416
3511
|
const target_id = action.target_id.replace("Chg:", "");
|
|
2417
|
-
const all_ins = findAllDescendants(this.doc.element, "w:ins").filter(
|
|
2418
|
-
|
|
3512
|
+
const all_ins = findAllDescendants(this.doc.element, "w:ins").filter(
|
|
3513
|
+
(n) => n.getAttribute("w:id") === target_id
|
|
3514
|
+
);
|
|
3515
|
+
const all_del = findAllDescendants(this.doc.element, "w:del").filter(
|
|
3516
|
+
(n) => n.getAttribute("w:id") === target_id
|
|
3517
|
+
);
|
|
2419
3518
|
const all_nodes = [...all_ins, ...all_del];
|
|
2420
3519
|
if (all_nodes.length === 0) {
|
|
2421
3520
|
skipped++;
|
|
2422
|
-
this.skipped_details.push(
|
|
3521
|
+
this.skipped_details.push(
|
|
3522
|
+
`- Failed to apply action: Target ID ${action.target_id} not found.`
|
|
3523
|
+
);
|
|
2423
3524
|
continue;
|
|
2424
3525
|
}
|
|
2425
3526
|
for (const node of all_nodes) {
|
|
@@ -2428,12 +3529,16 @@ Provide more context.`);
|
|
|
2428
3529
|
const is_trPr = parent_tag === "w:trPr";
|
|
2429
3530
|
if (type === "accept") {
|
|
2430
3531
|
if (is_ins) {
|
|
3532
|
+
this._clean_wrapping_comments(node);
|
|
2431
3533
|
if (is_trPr) node.parentNode?.removeChild(node);
|
|
2432
3534
|
else {
|
|
2433
|
-
while (node.firstChild)
|
|
3535
|
+
while (node.firstChild)
|
|
3536
|
+
node.parentNode?.insertBefore(node.firstChild, node);
|
|
2434
3537
|
node.parentNode?.removeChild(node);
|
|
2435
3538
|
}
|
|
2436
3539
|
} else {
|
|
3540
|
+
this._clean_wrapping_comments(node);
|
|
3541
|
+
this._delete_comments_in_element(node);
|
|
2437
3542
|
if (is_trPr) {
|
|
2438
3543
|
const tr = node.parentNode?.parentNode;
|
|
2439
3544
|
tr?.parentNode?.removeChild(tr);
|
|
@@ -2443,21 +3548,28 @@ Provide more context.`);
|
|
|
2443
3548
|
}
|
|
2444
3549
|
} else if (type === "reject") {
|
|
2445
3550
|
if (is_ins) {
|
|
3551
|
+
this._clean_wrapping_comments(node);
|
|
3552
|
+
this._delete_comments_in_element(node);
|
|
2446
3553
|
if (is_trPr) {
|
|
2447
3554
|
const tr = node.parentNode?.parentNode;
|
|
2448
3555
|
tr?.parentNode?.removeChild(tr);
|
|
2449
3556
|
} else node.parentNode?.removeChild(node);
|
|
2450
3557
|
} else {
|
|
3558
|
+
this._clean_wrapping_comments(node);
|
|
2451
3559
|
if (is_trPr) node.parentNode?.removeChild(node);
|
|
2452
3560
|
else {
|
|
2453
|
-
const delTexts = Array.from(
|
|
3561
|
+
const delTexts = Array.from(
|
|
3562
|
+
node.getElementsByTagName("w:delText")
|
|
3563
|
+
);
|
|
2454
3564
|
for (const dt of delTexts) {
|
|
2455
3565
|
const t = dt.ownerDocument.createElement("w:t");
|
|
2456
3566
|
t.textContent = dt.textContent;
|
|
2457
|
-
if (dt.hasAttribute("xml:space"))
|
|
3567
|
+
if (dt.hasAttribute("xml:space"))
|
|
3568
|
+
t.setAttribute("xml:space", "preserve");
|
|
2458
3569
|
dt.parentNode?.replaceChild(t, dt);
|
|
2459
3570
|
}
|
|
2460
|
-
while (node.firstChild)
|
|
3571
|
+
while (node.firstChild)
|
|
3572
|
+
node.parentNode?.insertBefore(node.firstChild, node);
|
|
2461
3573
|
node.parentNode?.removeChild(node);
|
|
2462
3574
|
}
|
|
2463
3575
|
}
|
|
@@ -2469,7 +3581,10 @@ Provide more context.`);
|
|
|
2469
3581
|
}
|
|
2470
3582
|
_apply_table_edit(edit, rebuild_map) {
|
|
2471
3583
|
const start_idx = edit._match_start_index || 0;
|
|
2472
|
-
const [anchor_run, anchor_para] = this.mapper.get_insertion_anchor(
|
|
3584
|
+
const [anchor_run, anchor_para] = this.mapper.get_insertion_anchor(
|
|
3585
|
+
start_idx,
|
|
3586
|
+
rebuild_map
|
|
3587
|
+
);
|
|
2473
3588
|
let target_element = null;
|
|
2474
3589
|
if (anchor_run) target_element = anchor_run._element;
|
|
2475
3590
|
else if (anchor_para) target_element = anchor_para._element;
|
|
@@ -2496,7 +3611,8 @@ Provide more context.`);
|
|
|
2496
3611
|
const r = tr.ownerDocument.createElement("w:r");
|
|
2497
3612
|
const t = tr.ownerDocument.createElement("w:t");
|
|
2498
3613
|
t.textContent = cellText;
|
|
2499
|
-
if (cellText.trim() !== cellText)
|
|
3614
|
+
if (cellText.trim() !== cellText)
|
|
3615
|
+
t.setAttribute("xml:space", "preserve");
|
|
2500
3616
|
r.appendChild(t);
|
|
2501
3617
|
p.appendChild(r);
|
|
2502
3618
|
tc.appendChild(p);
|
|
@@ -2513,14 +3629,20 @@ Provide more context.`);
|
|
|
2513
3629
|
let [start_idx, match_len] = this.mapper.find_match_index(edit.target_text);
|
|
2514
3630
|
let use_clean_map = false;
|
|
2515
3631
|
if (start_idx === -1) {
|
|
2516
|
-
if (!this.clean_mapper)
|
|
2517
|
-
|
|
3632
|
+
if (!this.clean_mapper)
|
|
3633
|
+
this.clean_mapper = new DocumentMapper(this.doc, true);
|
|
3634
|
+
[start_idx, match_len] = this.clean_mapper.find_match_index(
|
|
3635
|
+
edit.target_text
|
|
3636
|
+
);
|
|
2518
3637
|
if (start_idx !== -1) use_clean_map = true;
|
|
2519
3638
|
else return null;
|
|
2520
3639
|
}
|
|
2521
3640
|
const active_mapper = use_clean_map ? this.clean_mapper : this.mapper;
|
|
2522
3641
|
const effective_new_text = edit.new_text || "";
|
|
2523
|
-
const actual_doc_text = this.mapper.full_text.substring(
|
|
3642
|
+
const actual_doc_text = this.mapper.full_text.substring(
|
|
3643
|
+
start_idx,
|
|
3644
|
+
start_idx + match_len
|
|
3645
|
+
);
|
|
2524
3646
|
if (actual_doc_text === effective_new_text || edit.target_text === effective_new_text) {
|
|
2525
3647
|
return {
|
|
2526
3648
|
type: "modify",
|
|
@@ -2541,330 +3663,337 @@ Provide more context.`);
|
|
|
2541
3663
|
final_new = effective_new_text.substring(actual_doc_text.length);
|
|
2542
3664
|
effective_start_idx = start_idx + match_len;
|
|
2543
3665
|
} else {
|
|
2544
|
-
const [prefix_len, suffix_len] = trim_common_context(
|
|
3666
|
+
const [prefix_len, suffix_len] = trim_common_context(
|
|
3667
|
+
actual_doc_text,
|
|
3668
|
+
effective_new_text
|
|
3669
|
+
);
|
|
2545
3670
|
const t_end = actual_doc_text.length - suffix_len;
|
|
2546
3671
|
const n_end = effective_new_text.length - suffix_len;
|
|
2547
3672
|
final_target = actual_doc_text.substring(prefix_len, t_end);
|
|
2548
3673
|
final_new = effective_new_text.substring(prefix_len, n_end);
|
|
2549
3674
|
effective_start_idx = start_idx + prefix_len;
|
|
2550
|
-
if (!final_target && final_new) effective_op = "INSERTION";
|
|
2551
|
-
else if (final_target && !final_new) effective_op = "DELETION";
|
|
2552
|
-
else if (final_target && final_new) effective_op = "MODIFICATION";
|
|
2553
|
-
else effective_op = "COMMENT_ONLY";
|
|
2554
|
-
}
|
|
2555
|
-
return {
|
|
2556
|
-
type: "modify",
|
|
2557
|
-
target_text: final_target,
|
|
2558
|
-
new_text: final_new,
|
|
2559
|
-
comment: edit.comment,
|
|
2560
|
-
_match_start_index: effective_start_idx,
|
|
2561
|
-
_internal_op: effective_op,
|
|
2562
|
-
_active_mapper_ref: active_mapper
|
|
2563
|
-
};
|
|
2564
|
-
}
|
|
2565
|
-
_apply_single_edit_indexed(edit, orig_new, rebuild_map) {
|
|
2566
|
-
let op = edit._internal_op;
|
|
2567
|
-
const active_mapper = edit._active_mapper_ref || this.mapper;
|
|
2568
|
-
const start_idx = edit._match_start_index || 0;
|
|
2569
|
-
const length = edit.target_text ? edit.target_text.length : 0;
|
|
2570
|
-
const del_id = ["DELETION", "MODIFICATION"].includes(op) ? this._getNextId() : null;
|
|
2571
|
-
const ins_id = ["INSERTION", "MODIFICATION"].includes(op) ? this._getNextId() : null;
|
|
2572
|
-
if (op === "COMMENT_ONLY") {
|
|
2573
|
-
return true;
|
|
2574
|
-
}
|
|
2575
|
-
if (op === "INSERTION") {
|
|
2576
|
-
const [anchor_run, anchor_para] = active_mapper.get_insertion_anchor(start_idx, rebuild_map);
|
|
2577
|
-
if (!anchor_run && !anchor_para) return false;
|
|
2578
|
-
const xmlDoc = this.doc.part._element.ownerDocument;
|
|
2579
|
-
const ins = this._create_track_change_tag("w:ins", "", ins_id);
|
|
2580
|
-
const segments = this._parse_inline_markdown(edit.new_text || "");
|
|
2581
|
-
for (const [segText, segProps] of segments) {
|
|
2582
|
-
const r = xmlDoc.createElement("w:r");
|
|
2583
|
-
this._apply_run_props(r, segProps, false);
|
|
2584
|
-
const t = xmlDoc.createElement("w:t");
|
|
2585
|
-
this._set_text_content(t, segText);
|
|
2586
|
-
r.appendChild(t);
|
|
2587
|
-
ins.appendChild(r);
|
|
2588
|
-
}
|
|
2589
|
-
if (anchor_run) {
|
|
2590
|
-
insertAfter(ins, anchor_run._element);
|
|
2591
|
-
} else if (anchor_para) {
|
|
2592
|
-
anchor_para._element.appendChild(ins);
|
|
2593
|
-
}
|
|
2594
|
-
return true;
|
|
2595
|
-
}
|
|
2596
|
-
const target_runs = active_mapper.find_target_runs_by_index(start_idx, length, rebuild_map);
|
|
2597
|
-
if (target_runs.length === 0) return false;
|
|
2598
|
-
let last_del = null;
|
|
2599
|
-
for (const run of target_runs) {
|
|
2600
|
-
const del_tag = this._create_track_change_tag("w:del", "", del_id);
|
|
2601
|
-
const new_run = run._element.cloneNode(true);
|
|
2602
|
-
const tNodes = Array.from(new_run.getElementsByTagName("w:t"));
|
|
2603
|
-
tNodes.forEach((t) => {
|
|
2604
|
-
const delText = new_run.ownerDocument.createElement("w:delText");
|
|
2605
|
-
delText.textContent = t.textContent;
|
|
2606
|
-
if (t.hasAttribute("xml:space")) delText.setAttribute("xml:space", "preserve");
|
|
2607
|
-
new_run.replaceChild(delText, t);
|
|
2608
|
-
});
|
|
2609
|
-
del_tag.appendChild(new_run);
|
|
2610
|
-
run._element.parentNode?.replaceChild(del_tag, run._element);
|
|
2611
|
-
last_del = del_tag;
|
|
2612
|
-
}
|
|
2613
|
-
if (op === "MODIFICATION" && edit.new_text && last_del) {
|
|
2614
|
-
const xmlDoc = this.doc.part._element.ownerDocument;
|
|
2615
|
-
const ins = this._create_track_change_tag("w:ins", "", ins_id);
|
|
2616
|
-
const segments = this._parse_inline_markdown(edit.new_text);
|
|
2617
|
-
for (const [segText, segProps] of segments) {
|
|
2618
|
-
const r = xmlDoc.createElement("w:r");
|
|
2619
|
-
this._apply_run_props(r, segProps, false);
|
|
2620
|
-
const t = xmlDoc.createElement("w:t");
|
|
2621
|
-
this._set_text_content(t, segText);
|
|
2622
|
-
r.appendChild(t);
|
|
2623
|
-
ins.appendChild(r);
|
|
2624
|
-
}
|
|
2625
|
-
insertAfter(ins, last_del);
|
|
2626
|
-
}
|
|
2627
|
-
return true;
|
|
2628
|
-
}
|
|
2629
|
-
};
|
|
2630
|
-
|
|
2631
|
-
// src/markup.ts
|
|
2632
|
-
function _should_strip_markers(text, marker) {
|
|
2633
|
-
if (!text.startsWith(marker) || !text.endsWith(marker)) return false;
|
|
2634
|
-
if (text.length < marker.length * 2) return false;
|
|
2635
|
-
const inner = text.substring(marker.length, text.length - marker.length);
|
|
2636
|
-
if (!inner) return false;
|
|
2637
|
-
if (inner.includes(marker)) return false;
|
|
2638
|
-
if (!/[a-zA-Z]/.test(inner)) return false;
|
|
2639
|
-
if (marker === "__" && /^\w+$/.test(inner)) return false;
|
|
2640
|
-
if (marker === "_") {
|
|
2641
|
-
if (inner.includes("_")) return false;
|
|
2642
|
-
if (/^[0-9_]+$/.test(inner)) return false;
|
|
2643
|
-
}
|
|
2644
|
-
return true;
|
|
2645
|
-
}
|
|
2646
|
-
function _strip_balanced_markers(text) {
|
|
2647
|
-
let prefix_markup = "";
|
|
2648
|
-
let suffix_markup = "";
|
|
2649
|
-
let clean_text = text;
|
|
2650
|
-
const markers = ["**", "__", "_", "*"];
|
|
2651
|
-
for (const marker of markers) {
|
|
2652
|
-
if (_should_strip_markers(clean_text, marker)) {
|
|
2653
|
-
prefix_markup += marker;
|
|
2654
|
-
suffix_markup = marker + suffix_markup;
|
|
2655
|
-
clean_text = clean_text.substring(marker.length, clean_text.length - marker.length);
|
|
2656
|
-
break;
|
|
2657
|
-
}
|
|
2658
|
-
}
|
|
2659
|
-
return [prefix_markup, clean_text, suffix_markup];
|
|
2660
|
-
}
|
|
2661
|
-
function _replace_smart_quotes(text) {
|
|
2662
|
-
return text.replace(/“/g, '"').replace(/”/g, '"').replace(/‘/g, "'").replace(/’/g, "'");
|
|
2663
|
-
}
|
|
2664
|
-
function _find_safe_boundaries(text, start, end) {
|
|
2665
|
-
let new_start = start;
|
|
2666
|
-
let new_end = end;
|
|
2667
|
-
const expand_if_unbalanced = (marker) => {
|
|
2668
|
-
const current_match = text.substring(new_start, new_end);
|
|
2669
|
-
const count = (current_match.match(new RegExp(marker.replace(/\*/g, "\\*"), "g")) || []).length;
|
|
2670
|
-
if (count % 2 !== 0) {
|
|
2671
|
-
const suffix = text.substring(new_end);
|
|
2672
|
-
if (suffix.startsWith(marker)) {
|
|
2673
|
-
new_end += marker.length;
|
|
2674
|
-
return;
|
|
2675
|
-
}
|
|
2676
|
-
const prefix = text.substring(0, new_start);
|
|
2677
|
-
if (prefix.endsWith(marker)) {
|
|
2678
|
-
new_start -= marker.length;
|
|
2679
|
-
return;
|
|
2680
|
-
}
|
|
2681
|
-
}
|
|
2682
|
-
};
|
|
2683
|
-
for (let i = 0; i < 2; i++) {
|
|
2684
|
-
expand_if_unbalanced("**");
|
|
2685
|
-
expand_if_unbalanced("__");
|
|
2686
|
-
expand_if_unbalanced("_");
|
|
2687
|
-
expand_if_unbalanced("*");
|
|
2688
|
-
}
|
|
2689
|
-
return [new_start, new_end];
|
|
2690
|
-
}
|
|
2691
|
-
function _refine_match_boundaries(text, start, end) {
|
|
2692
|
-
const markers = ["**", "__", "*", "_"];
|
|
2693
|
-
let current_text = text.substring(start, end);
|
|
2694
|
-
let best_start = start;
|
|
2695
|
-
let best_end = end;
|
|
2696
|
-
const countMarker = (str, mk) => (str.match(new RegExp(mk.replace(/\*/g, "\\*"), "g")) || []).length;
|
|
2697
|
-
for (const marker of markers) {
|
|
2698
|
-
if (current_text.startsWith(marker)) {
|
|
2699
|
-
const current_score = countMarker(current_text, marker) % 2;
|
|
2700
|
-
const trimmed_text = current_text.substring(marker.length);
|
|
2701
|
-
const trimmed_score = countMarker(trimmed_text, marker) % 2;
|
|
2702
|
-
if (current_score === 1 && trimmed_score === 0) {
|
|
2703
|
-
best_start += marker.length;
|
|
2704
|
-
current_text = trimmed_text;
|
|
2705
|
-
}
|
|
2706
|
-
}
|
|
2707
|
-
}
|
|
2708
|
-
for (const marker of markers) {
|
|
2709
|
-
if (current_text.endsWith(marker)) {
|
|
2710
|
-
const current_score = countMarker(current_text, marker) % 2;
|
|
2711
|
-
const trimmed_text = current_text.substring(0, current_text.length - marker.length);
|
|
2712
|
-
const trimmed_score = countMarker(trimmed_text, marker) % 2;
|
|
2713
|
-
if (current_score === 1 && trimmed_score === 0) {
|
|
2714
|
-
best_end -= marker.length;
|
|
2715
|
-
current_text = trimmed_text;
|
|
2716
|
-
}
|
|
2717
|
-
}
|
|
2718
|
-
}
|
|
2719
|
-
return [best_start, best_end];
|
|
2720
|
-
}
|
|
2721
|
-
function _make_fuzzy_regex(target_text) {
|
|
2722
|
-
target_text = _replace_smart_quotes(target_text);
|
|
2723
|
-
const parts = [];
|
|
2724
|
-
const token_pattern = /(_+)|(\s+)|(['"])|([.,;:\/])/g;
|
|
2725
|
-
const md_noise = "[*_]*";
|
|
2726
|
-
const structural_noise = "(?:\\s*(?:[*+\\->]|\\d+\\.)\\s+|\\s*\\n\\s*)";
|
|
2727
|
-
const start_list_marker = "(?:[ \\t]*(?:[*+\\->]|\\d+\\.)\\s+)?";
|
|
2728
|
-
parts.push(start_list_marker);
|
|
2729
|
-
parts.push(md_noise);
|
|
2730
|
-
let last_idx = 0;
|
|
2731
|
-
let match;
|
|
2732
|
-
const escapeRegExp = (str) => str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
2733
|
-
while ((match = token_pattern.exec(target_text)) !== null) {
|
|
2734
|
-
const literal = target_text.substring(last_idx, match.index);
|
|
2735
|
-
if (literal) {
|
|
2736
|
-
parts.push(escapeRegExp(literal));
|
|
2737
|
-
parts.push(md_noise);
|
|
3675
|
+
if (!final_target && final_new) effective_op = "INSERTION";
|
|
3676
|
+
else if (final_target && !final_new) effective_op = "DELETION";
|
|
3677
|
+
else if (final_target && final_new) effective_op = "MODIFICATION";
|
|
3678
|
+
else effective_op = "COMMENT_ONLY";
|
|
2738
3679
|
}
|
|
2739
|
-
|
|
2740
|
-
|
|
2741
|
-
|
|
2742
|
-
|
|
2743
|
-
|
|
2744
|
-
|
|
2745
|
-
|
|
2746
|
-
|
|
2747
|
-
|
|
3680
|
+
return {
|
|
3681
|
+
type: "modify",
|
|
3682
|
+
target_text: final_target,
|
|
3683
|
+
new_text: final_new,
|
|
3684
|
+
comment: edit.comment,
|
|
3685
|
+
_match_start_index: effective_start_idx,
|
|
3686
|
+
_internal_op: effective_op,
|
|
3687
|
+
_active_mapper_ref: active_mapper
|
|
3688
|
+
};
|
|
3689
|
+
}
|
|
3690
|
+
_apply_single_edit_indexed(edit, orig_new, rebuild_map) {
|
|
3691
|
+
let op = edit._internal_op;
|
|
3692
|
+
const active_mapper = edit._active_mapper_ref || this.mapper;
|
|
3693
|
+
const start_idx = edit._match_start_index || 0;
|
|
3694
|
+
const length = edit.target_text ? edit.target_text.length : 0;
|
|
3695
|
+
const del_id = ["DELETION", "MODIFICATION"].includes(op) ? this._getNextId() : null;
|
|
3696
|
+
const ins_id = ["INSERTION", "MODIFICATION"].includes(op) ? this._getNextId() : null;
|
|
3697
|
+
if (op === "COMMENT_ONLY") {
|
|
3698
|
+
const target_runs2 = active_mapper.find_target_runs_by_index(
|
|
3699
|
+
start_idx,
|
|
3700
|
+
length,
|
|
3701
|
+
rebuild_map
|
|
3702
|
+
);
|
|
3703
|
+
if (target_runs2.length === 0) return false;
|
|
3704
|
+
if (!edit.comment) return true;
|
|
3705
|
+
const first_el = target_runs2[0]._element;
|
|
3706
|
+
const last_el = target_runs2[target_runs2.length - 1]._element;
|
|
3707
|
+
let start_p = first_el;
|
|
3708
|
+
while (start_p && start_p.tagName !== "w:p")
|
|
3709
|
+
start_p = start_p.parentNode;
|
|
3710
|
+
let end_p = last_el;
|
|
3711
|
+
while (end_p && end_p.tagName !== "w:p")
|
|
3712
|
+
end_p = end_p.parentNode;
|
|
3713
|
+
if (!start_p || !end_p) return false;
|
|
3714
|
+
const ascend_to_paragraph_child = (el, p) => {
|
|
3715
|
+
let cur = el;
|
|
3716
|
+
while (cur.parentNode && cur.parentNode !== p) {
|
|
3717
|
+
cur = cur.parentNode;
|
|
3718
|
+
}
|
|
3719
|
+
return cur;
|
|
3720
|
+
};
|
|
3721
|
+
const first_anchor = ascend_to_paragraph_child(first_el, start_p);
|
|
3722
|
+
const last_anchor = ascend_to_paragraph_child(last_el, end_p);
|
|
3723
|
+
if (start_p === end_p) {
|
|
3724
|
+
this._attach_comment(start_p, first_anchor, last_anchor, edit.comment);
|
|
2748
3725
|
} else {
|
|
2749
|
-
|
|
3726
|
+
this._attach_comment_spanning(
|
|
3727
|
+
start_p,
|
|
3728
|
+
first_anchor,
|
|
3729
|
+
end_p,
|
|
3730
|
+
last_anchor,
|
|
3731
|
+
edit.comment
|
|
3732
|
+
);
|
|
2750
3733
|
}
|
|
2751
|
-
|
|
2752
|
-
if (g_quote === "'") parts.push("[\u2018\u2019']");
|
|
2753
|
-
else parts.push('["\u201C\u201D]');
|
|
2754
|
-
} else if (g_punct) {
|
|
2755
|
-
parts.push(escapeRegExp(g_punct));
|
|
3734
|
+
return true;
|
|
2756
3735
|
}
|
|
2757
|
-
|
|
2758
|
-
|
|
2759
|
-
|
|
2760
|
-
|
|
2761
|
-
|
|
2762
|
-
|
|
2763
|
-
|
|
2764
|
-
|
|
2765
|
-
|
|
2766
|
-
|
|
2767
|
-
|
|
2768
|
-
|
|
2769
|
-
|
|
2770
|
-
|
|
2771
|
-
|
|
2772
|
-
|
|
2773
|
-
|
|
2774
|
-
|
|
2775
|
-
|
|
2776
|
-
|
|
2777
|
-
|
|
2778
|
-
|
|
2779
|
-
|
|
3736
|
+
if (op === "INSERTION") {
|
|
3737
|
+
const [anchor_run, anchor_para] = active_mapper.get_insertion_anchor(
|
|
3738
|
+
start_idx,
|
|
3739
|
+
rebuild_map
|
|
3740
|
+
);
|
|
3741
|
+
if (!anchor_run && !anchor_para) return false;
|
|
3742
|
+
const result = this._track_insert_multiline(
|
|
3743
|
+
edit.new_text || "",
|
|
3744
|
+
anchor_run,
|
|
3745
|
+
anchor_para,
|
|
3746
|
+
ins_id
|
|
3747
|
+
);
|
|
3748
|
+
if (!result.first_node) return false;
|
|
3749
|
+
const is_inline_first = result.first_node.tagName === "w:ins";
|
|
3750
|
+
if (is_inline_first) {
|
|
3751
|
+
if (anchor_run) {
|
|
3752
|
+
insertAfter(result.first_node, anchor_run._element);
|
|
3753
|
+
} else if (anchor_para) {
|
|
3754
|
+
anchor_para._element.appendChild(result.first_node);
|
|
3755
|
+
}
|
|
3756
|
+
}
|
|
3757
|
+
if (edit.comment) {
|
|
3758
|
+
const ascend_to_paragraph_child = (el, p) => {
|
|
3759
|
+
let cur = el;
|
|
3760
|
+
while (cur.parentNode && cur.parentNode !== p) {
|
|
3761
|
+
cur = cur.parentNode;
|
|
3762
|
+
}
|
|
3763
|
+
return cur;
|
|
3764
|
+
};
|
|
3765
|
+
if (result.last_p && result.last_ins) {
|
|
3766
|
+
let start_p = result.first_node;
|
|
3767
|
+
while (start_p && start_p.tagName !== "w:p")
|
|
3768
|
+
start_p = start_p.parentNode;
|
|
3769
|
+
if (start_p) {
|
|
3770
|
+
let first_anchor_target = result.first_node;
|
|
3771
|
+
if (result.first_node.tagName === "w:p") {
|
|
3772
|
+
first_anchor_target = findAllDescendants(result.first_node, "w:ins")[0] || result.first_node;
|
|
3773
|
+
}
|
|
3774
|
+
const start_anchor = ascend_to_paragraph_child(
|
|
3775
|
+
first_anchor_target,
|
|
3776
|
+
start_p
|
|
3777
|
+
);
|
|
3778
|
+
const end_anchor = ascend_to_paragraph_child(
|
|
3779
|
+
result.last_ins,
|
|
3780
|
+
result.last_p
|
|
3781
|
+
);
|
|
3782
|
+
this._attach_comment_spanning(
|
|
3783
|
+
start_p,
|
|
3784
|
+
start_anchor,
|
|
3785
|
+
result.last_p,
|
|
3786
|
+
end_anchor,
|
|
3787
|
+
edit.comment
|
|
3788
|
+
);
|
|
3789
|
+
}
|
|
3790
|
+
} else {
|
|
3791
|
+
let host_p = result.first_node;
|
|
3792
|
+
while (host_p && host_p.tagName !== "w:p")
|
|
3793
|
+
host_p = host_p.parentNode;
|
|
3794
|
+
if (host_p) {
|
|
3795
|
+
let first_anchor_target = result.first_node;
|
|
3796
|
+
if (result.first_node.tagName === "w:p") {
|
|
3797
|
+
first_anchor_target = findAllDescendants(result.first_node, "w:ins")[0] || result.first_node;
|
|
3798
|
+
}
|
|
3799
|
+
const anchor = ascend_to_paragraph_child(first_anchor_target, host_p);
|
|
3800
|
+
this._attach_comment(host_p, anchor, anchor, edit.comment);
|
|
3801
|
+
}
|
|
3802
|
+
}
|
|
3803
|
+
}
|
|
3804
|
+
return true;
|
|
2780
3805
|
}
|
|
2781
|
-
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
3806
|
+
const target_runs = active_mapper.find_target_runs_by_index(
|
|
3807
|
+
start_idx,
|
|
3808
|
+
length,
|
|
3809
|
+
rebuild_map
|
|
3810
|
+
);
|
|
3811
|
+
const virtual_spans = active_mapper.get_virtual_spans_in_range(start_idx, length);
|
|
3812
|
+
if (target_runs.length === 0 && virtual_spans.length === 0) return false;
|
|
3813
|
+
const affected_ps = /* @__PURE__ */ new Set();
|
|
3814
|
+
for (const run of target_runs) {
|
|
3815
|
+
let p = run._element.parentNode;
|
|
3816
|
+
while (p && p.tagName !== "w:p") p = p.parentNode;
|
|
3817
|
+
if (p) affected_ps.add(p);
|
|
2793
3818
|
}
|
|
2794
|
-
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
|
|
2800
|
-
|
|
2801
|
-
|
|
2802
|
-
|
|
2803
|
-
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
|
|
2807
|
-
|
|
2808
|
-
|
|
2809
|
-
|
|
2810
|
-
|
|
2811
|
-
}
|
|
2812
|
-
return parts.join("");
|
|
2813
|
-
}
|
|
2814
|
-
function apply_edits_to_markdown(markdown_text, edits, include_index = false, highlight_only = false) {
|
|
2815
|
-
if (!edits || edits.length === 0) return markdown_text;
|
|
2816
|
-
const matched_edits = [];
|
|
2817
|
-
for (let idx = 0; idx < edits.length; idx++) {
|
|
2818
|
-
const edit = edits[idx];
|
|
2819
|
-
const target = edit.target_text || "";
|
|
2820
|
-
if (!target) {
|
|
2821
|
-
continue;
|
|
3819
|
+
let first_del = null;
|
|
3820
|
+
let last_del = null;
|
|
3821
|
+
for (const run of target_runs) {
|
|
3822
|
+
const del_tag = this._create_track_change_tag("w:del", "", del_id);
|
|
3823
|
+
const new_run = run._element.cloneNode(true);
|
|
3824
|
+
const tNodes = Array.from(new_run.getElementsByTagName("w:t"));
|
|
3825
|
+
tNodes.forEach((t) => {
|
|
3826
|
+
const delText = new_run.ownerDocument.createElement("w:delText");
|
|
3827
|
+
delText.textContent = t.textContent;
|
|
3828
|
+
if (t.hasAttribute("xml:space"))
|
|
3829
|
+
delText.setAttribute("xml:space", "preserve");
|
|
3830
|
+
new_run.replaceChild(delText, t);
|
|
3831
|
+
});
|
|
3832
|
+
del_tag.appendChild(new_run);
|
|
3833
|
+
run._element.parentNode?.replaceChild(del_tag, run._element);
|
|
3834
|
+
if (first_del === null) first_del = del_tag;
|
|
3835
|
+
last_del = del_tag;
|
|
2822
3836
|
}
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
2833
|
-
|
|
2834
|
-
|
|
2835
|
-
|
|
2836
|
-
|
|
3837
|
+
let ins_elem = null;
|
|
3838
|
+
let mod_last_p = null;
|
|
3839
|
+
let mod_last_ins = null;
|
|
3840
|
+
if (op === "MODIFICATION" && edit.new_text && last_del) {
|
|
3841
|
+
let mod_anchor_para_el = last_del;
|
|
3842
|
+
while (mod_anchor_para_el && mod_anchor_para_el.tagName !== "w:p") {
|
|
3843
|
+
mod_anchor_para_el = mod_anchor_para_el.parentNode;
|
|
3844
|
+
}
|
|
3845
|
+
const mod_anchor_para = mod_anchor_para_el ? new Paragraph(mod_anchor_para_el, null) : null;
|
|
3846
|
+
const style_source_run = target_runs.length > 0 ? target_runs[target_runs.length - 1] : null;
|
|
3847
|
+
const result = this._track_insert_multiline(
|
|
3848
|
+
edit.new_text,
|
|
3849
|
+
style_source_run,
|
|
3850
|
+
mod_anchor_para,
|
|
3851
|
+
ins_id
|
|
3852
|
+
);
|
|
3853
|
+
if (result.first_node) {
|
|
3854
|
+
const is_inline_first = result.first_node.tagName === "w:ins";
|
|
3855
|
+
if (is_inline_first) {
|
|
3856
|
+
insertAfter(result.first_node, last_del);
|
|
3857
|
+
ins_elem = result.first_node;
|
|
3858
|
+
} else {
|
|
3859
|
+
ins_elem = result.last_ins;
|
|
3860
|
+
}
|
|
3861
|
+
mod_last_p = result.last_p;
|
|
3862
|
+
mod_last_ins = result.last_ins;
|
|
3863
|
+
}
|
|
3864
|
+
}
|
|
3865
|
+
if (op === "DELETION" || op === "MODIFICATION") {
|
|
3866
|
+
if (op === "MODIFICATION" && target_runs.length === 0 && virtual_spans.length > 0 && edit.new_text) {
|
|
3867
|
+
const first_span = virtual_spans[0];
|
|
3868
|
+
if (first_span.paragraph) {
|
|
3869
|
+
const p1_el = first_span.paragraph._element;
|
|
3870
|
+
const last_runs = findAllDescendants(p1_el, "w:r");
|
|
3871
|
+
const anchor = last_runs.length > 0 ? new Run(last_runs[last_runs.length - 1], first_span.paragraph) : null;
|
|
3872
|
+
const result = this._track_insert_multiline(
|
|
3873
|
+
edit.new_text,
|
|
3874
|
+
anchor,
|
|
3875
|
+
first_span.paragraph,
|
|
3876
|
+
ins_id
|
|
3877
|
+
);
|
|
3878
|
+
if (result.first_node) {
|
|
3879
|
+
p1_el.appendChild(result.first_node);
|
|
3880
|
+
}
|
|
3881
|
+
}
|
|
3882
|
+
}
|
|
3883
|
+
for (const span of [...virtual_spans].reverse()) {
|
|
3884
|
+
if (span.paragraph) {
|
|
3885
|
+
const p1_element = span.paragraph._element;
|
|
3886
|
+
let p2_element = getNextElement(p1_element);
|
|
3887
|
+
while (p2_element && p2_element.tagName !== "w:p") {
|
|
3888
|
+
p2_element = getNextElement(p2_element);
|
|
3889
|
+
}
|
|
3890
|
+
if (p2_element && p2_element.tagName === "w:p") {
|
|
3891
|
+
let pPr = findChild(p1_element, "w:pPr");
|
|
3892
|
+
if (!pPr) {
|
|
3893
|
+
pPr = p1_element.ownerDocument.createElement("w:pPr");
|
|
3894
|
+
p1_element.insertBefore(pPr, p1_element.firstChild);
|
|
3895
|
+
}
|
|
3896
|
+
let rPr = findChild(pPr, "w:rPr");
|
|
3897
|
+
if (!rPr) {
|
|
3898
|
+
rPr = p1_element.ownerDocument.createElement("w:rPr");
|
|
3899
|
+
pPr.appendChild(rPr);
|
|
3900
|
+
}
|
|
3901
|
+
const del_mark = this._create_track_change_tag("w:del");
|
|
3902
|
+
rPr.appendChild(del_mark);
|
|
3903
|
+
const children = Array.from(p2_element.childNodes);
|
|
3904
|
+
for (const child of children) {
|
|
3905
|
+
if (child.nodeType === 1 && child.tagName === "w:pPr") {
|
|
3906
|
+
continue;
|
|
3907
|
+
}
|
|
3908
|
+
p1_element.appendChild(child);
|
|
3909
|
+
}
|
|
3910
|
+
if (p2_element.parentNode) {
|
|
3911
|
+
p2_element.parentNode.removeChild(p2_element);
|
|
3912
|
+
}
|
|
3913
|
+
}
|
|
3914
|
+
}
|
|
2837
3915
|
}
|
|
2838
3916
|
}
|
|
2839
|
-
if (
|
|
2840
|
-
|
|
2841
|
-
|
|
3917
|
+
if (edit.comment && first_del !== null) {
|
|
3918
|
+
let end_anchor_el;
|
|
3919
|
+
let end_p;
|
|
3920
|
+
if (mod_last_p && mod_last_ins) {
|
|
3921
|
+
end_anchor_el = mod_last_ins;
|
|
3922
|
+
end_p = mod_last_p;
|
|
3923
|
+
} else {
|
|
3924
|
+
const final_anchor = ins_elem !== null ? ins_elem : last_del;
|
|
3925
|
+
end_anchor_el = final_anchor;
|
|
3926
|
+
end_p = final_anchor;
|
|
3927
|
+
while (end_p && end_p.tagName !== "w:p")
|
|
3928
|
+
end_p = end_p.parentNode;
|
|
3929
|
+
}
|
|
3930
|
+
let start_p = first_del;
|
|
3931
|
+
while (start_p && start_p.tagName !== "w:p")
|
|
3932
|
+
start_p = start_p.parentNode;
|
|
3933
|
+
if (!start_p || !end_p) return true;
|
|
3934
|
+
const ascend_to_paragraph_child = (el, p) => {
|
|
3935
|
+
let cur = el;
|
|
3936
|
+
while (cur.parentNode && cur.parentNode !== p) {
|
|
3937
|
+
cur = cur.parentNode;
|
|
3938
|
+
}
|
|
3939
|
+
return cur;
|
|
3940
|
+
};
|
|
3941
|
+
const start_anchor = ascend_to_paragraph_child(first_del, start_p);
|
|
3942
|
+
const end_anchor = ascend_to_paragraph_child(end_anchor_el, end_p);
|
|
3943
|
+
if (start_p === end_p) {
|
|
3944
|
+
this._attach_comment(start_p, start_anchor, end_anchor, edit.comment);
|
|
3945
|
+
} else {
|
|
3946
|
+
this._attach_comment_spanning(
|
|
3947
|
+
start_p,
|
|
3948
|
+
start_anchor,
|
|
3949
|
+
end_p,
|
|
3950
|
+
end_anchor,
|
|
3951
|
+
edit.comment
|
|
3952
|
+
);
|
|
3953
|
+
}
|
|
3954
|
+
}
|
|
3955
|
+
for (const p_elem of affected_ps) {
|
|
3956
|
+
let has_visible = false;
|
|
3957
|
+
for (const tag of ["w:t", "w:tab", "w:br"]) {
|
|
3958
|
+
const nodes = findAllDescendants(p_elem, tag);
|
|
3959
|
+
for (const node of nodes) {
|
|
3960
|
+
let is_deleted = false;
|
|
3961
|
+
let curr = node.parentNode;
|
|
3962
|
+
while (curr && curr !== p_elem.parentNode) {
|
|
3963
|
+
if (curr.tagName === "w:del") {
|
|
3964
|
+
is_deleted = true;
|
|
3965
|
+
break;
|
|
3966
|
+
}
|
|
3967
|
+
curr = curr.parentNode;
|
|
3968
|
+
}
|
|
3969
|
+
if (!is_deleted) {
|
|
3970
|
+
if (tag === "w:t" && !node.textContent) continue;
|
|
3971
|
+
has_visible = true;
|
|
3972
|
+
break;
|
|
3973
|
+
}
|
|
3974
|
+
}
|
|
3975
|
+
if (has_visible) break;
|
|
3976
|
+
}
|
|
3977
|
+
if (!has_visible) {
|
|
3978
|
+
let pPr = findChild(p_elem, "w:pPr");
|
|
3979
|
+
if (!pPr) {
|
|
3980
|
+
pPr = p_elem.ownerDocument.createElement("w:pPr");
|
|
3981
|
+
p_elem.insertBefore(pPr, p_elem.firstChild);
|
|
3982
|
+
}
|
|
3983
|
+
let rPr = findChild(pPr, "w:rPr");
|
|
3984
|
+
if (!rPr) {
|
|
3985
|
+
rPr = p_elem.ownerDocument.createElement("w:rPr");
|
|
3986
|
+
pPr.appendChild(rPr);
|
|
3987
|
+
}
|
|
3988
|
+
if (!findChild(rPr, "w:del")) {
|
|
3989
|
+
const del_mark = this._create_track_change_tag("w:del");
|
|
3990
|
+
rPr.appendChild(del_mark);
|
|
3991
|
+
}
|
|
3992
|
+
}
|
|
2842
3993
|
}
|
|
3994
|
+
return true;
|
|
2843
3995
|
}
|
|
2844
|
-
|
|
2845
|
-
let result = markdown_text;
|
|
2846
|
-
for (const [start, end, actual_text, edit, orig_idx] of matched_edits_filtered) {
|
|
2847
|
-
const new_txt = edit.new_text || "";
|
|
2848
|
-
const [prefix_len, suffix_len] = trim_common_context(actual_text, new_txt);
|
|
2849
|
-
const unmodified_prefix = prefix_len > 0 ? actual_text.substring(0, prefix_len) : "";
|
|
2850
|
-
const unmodified_suffix = suffix_len > 0 ? actual_text.substring(actual_text.length - suffix_len) : "";
|
|
2851
|
-
const t_end = actual_text.length - suffix_len;
|
|
2852
|
-
const n_end = new_txt.length - suffix_len;
|
|
2853
|
-
const isolated_target = actual_text.substring(prefix_len, t_end);
|
|
2854
|
-
const isolated_new = new_txt.substring(prefix_len, n_end);
|
|
2855
|
-
const markup = _build_critic_markup(
|
|
2856
|
-
isolated_target,
|
|
2857
|
-
isolated_new,
|
|
2858
|
-
edit.comment,
|
|
2859
|
-
orig_idx,
|
|
2860
|
-
include_index,
|
|
2861
|
-
highlight_only
|
|
2862
|
-
);
|
|
2863
|
-
const full_replacement = unmodified_prefix + markup + unmodified_suffix;
|
|
2864
|
-
result = result.substring(0, start) + full_replacement + result.substring(end);
|
|
2865
|
-
}
|
|
2866
|
-
return result;
|
|
2867
|
-
}
|
|
3996
|
+
};
|
|
2868
3997
|
|
|
2869
3998
|
// src/pagination.ts
|
|
2870
3999
|
var PAGE_TARGET_CHARS = 19e3;
|
|
@@ -3657,7 +4786,12 @@ function extract_outline(doc, projected_body, body_pages, body_page_offsets, par
|
|
|
3657
4786
|
const level = _heading_level(paragraph);
|
|
3658
4787
|
const text = _heading_text(paragraph, comments_map);
|
|
3659
4788
|
const style = _determine_heading_style(paragraph);
|
|
3660
|
-
const owned_end = _find_owned_end(
|
|
4789
|
+
const owned_end = _find_owned_end(
|
|
4790
|
+
block_records,
|
|
4791
|
+
heading_indices,
|
|
4792
|
+
h_pos,
|
|
4793
|
+
level
|
|
4794
|
+
);
|
|
3661
4795
|
const owned_blocks = block_records.slice(rec_idx + 1, owned_end);
|
|
3662
4796
|
const has_table = _direct_has_table(block_records, rec_idx + 1, owned_end);
|
|
3663
4797
|
const footnote_ids = _collect_footnote_ids(owned_blocks);
|
|
@@ -3704,7 +4838,13 @@ function _walk_doc_body(doc, comments_map) {
|
|
|
3704
4838
|
const p_text = build_paragraph_text(item, comments_map, false);
|
|
3705
4839
|
const block_len = (prefix + p_text).length;
|
|
3706
4840
|
if (!is_first_block) cursor += 2;
|
|
3707
|
-
records.push({
|
|
4841
|
+
records.push({
|
|
4842
|
+
item,
|
|
4843
|
+
is_paragraph: true,
|
|
4844
|
+
is_table: false,
|
|
4845
|
+
start_offset: cursor,
|
|
4846
|
+
projected_length: block_len
|
|
4847
|
+
});
|
|
3708
4848
|
cursor += block_len;
|
|
3709
4849
|
is_first_block = false;
|
|
3710
4850
|
} else if (item instanceof Table) {
|
|
@@ -3712,7 +4852,13 @@ function _walk_doc_body(doc, comments_map) {
|
|
|
3712
4852
|
const block_len = table_text ? table_text.length : 0;
|
|
3713
4853
|
if (!is_first_block) cursor += 2;
|
|
3714
4854
|
const table_start = cursor;
|
|
3715
|
-
records.push({
|
|
4855
|
+
records.push({
|
|
4856
|
+
item,
|
|
4857
|
+
is_paragraph: false,
|
|
4858
|
+
is_table: true,
|
|
4859
|
+
start_offset: table_start,
|
|
4860
|
+
projected_length: block_len
|
|
4861
|
+
});
|
|
3716
4862
|
_record_table_inner_blocks_lite(item, table_start, records, comments_map);
|
|
3717
4863
|
cursor += block_len;
|
|
3718
4864
|
is_first_block = false;
|
|
@@ -3732,7 +4878,12 @@ function _compute_inner_block_offset(table, target_paragraph, table_start_offset
|
|
|
3732
4878
|
if (seen_cells.has(cell)) continue;
|
|
3733
4879
|
seen_cells.add(cell);
|
|
3734
4880
|
if (cells_in_row > 0) cursor += 3;
|
|
3735
|
-
const [new_cursor, found] = _walk_cell_for_offset(
|
|
4881
|
+
const [new_cursor, found] = _walk_cell_for_offset(
|
|
4882
|
+
cell,
|
|
4883
|
+
target_el,
|
|
4884
|
+
cursor,
|
|
4885
|
+
comments_map
|
|
4886
|
+
);
|
|
3736
4887
|
if (found) return new_cursor;
|
|
3737
4888
|
cursor = new_cursor;
|
|
3738
4889
|
cells_in_row++;
|
|
@@ -3752,9 +4903,15 @@ function _walk_cell_for_offset(cell, target_el, cell_start_cursor, comments_map)
|
|
|
3752
4903
|
const p_text = build_paragraph_text(inner_item, comments_map, false);
|
|
3753
4904
|
cursor += (prefix + p_text).length;
|
|
3754
4905
|
} else if (inner_item instanceof Table) {
|
|
3755
|
-
const nested_offset = _compute_inner_block_offset(
|
|
4906
|
+
const nested_offset = _compute_inner_block_offset(
|
|
4907
|
+
inner_item,
|
|
4908
|
+
new Paragraph(target_el, null),
|
|
4909
|
+
cursor,
|
|
4910
|
+
comments_map
|
|
4911
|
+
);
|
|
3756
4912
|
if (nested_offset !== cursor) {
|
|
3757
|
-
if (_element_is_descendant(target_el, inner_item._element))
|
|
4913
|
+
if (_element_is_descendant(target_el, inner_item._element))
|
|
4914
|
+
return [nested_offset, true];
|
|
3758
4915
|
}
|
|
3759
4916
|
const table_text = extract_table(inner_item, comments_map, false, 0);
|
|
3760
4917
|
cursor += table_text ? table_text.length : 0;
|
|
@@ -3779,11 +4936,33 @@ function _record_table_inner_blocks_lite(table, inherited_offset, records, comme
|
|
|
3779
4936
|
seen_cells.add(cell);
|
|
3780
4937
|
for (const inner_item of iter_block_items(cell)) {
|
|
3781
4938
|
if (inner_item instanceof Paragraph) {
|
|
3782
|
-
const true_offset = _is_heading(inner_item) ? _compute_inner_block_offset(
|
|
3783
|
-
|
|
4939
|
+
const true_offset = _is_heading(inner_item) ? _compute_inner_block_offset(
|
|
4940
|
+
table,
|
|
4941
|
+
inner_item,
|
|
4942
|
+
inherited_offset,
|
|
4943
|
+
comments_map
|
|
4944
|
+
) : inherited_offset;
|
|
4945
|
+
records.push({
|
|
4946
|
+
item: inner_item,
|
|
4947
|
+
is_paragraph: true,
|
|
4948
|
+
is_table: false,
|
|
4949
|
+
start_offset: true_offset,
|
|
4950
|
+
projected_length: 0
|
|
4951
|
+
});
|
|
3784
4952
|
} else if (inner_item instanceof Table) {
|
|
3785
|
-
records.push({
|
|
3786
|
-
|
|
4953
|
+
records.push({
|
|
4954
|
+
item: inner_item,
|
|
4955
|
+
is_paragraph: false,
|
|
4956
|
+
is_table: true,
|
|
4957
|
+
start_offset: inherited_offset,
|
|
4958
|
+
projected_length: 0
|
|
4959
|
+
});
|
|
4960
|
+
_record_table_inner_blocks_lite(
|
|
4961
|
+
inner_item,
|
|
4962
|
+
inherited_offset,
|
|
4963
|
+
records,
|
|
4964
|
+
comments_map
|
|
4965
|
+
);
|
|
3787
4966
|
}
|
|
3788
4967
|
}
|
|
3789
4968
|
}
|
|
@@ -3804,7 +4983,8 @@ ${header}`);
|
|
|
3804
4983
|
if (fn_text) blocks.push(fn_text);
|
|
3805
4984
|
} else if (item instanceof Paragraph) {
|
|
3806
4985
|
let prefix = get_paragraph_prefix(item);
|
|
3807
|
-
if (is_first_para && c_type === "FootnoteItem")
|
|
4986
|
+
if (is_first_para && c_type === "FootnoteItem")
|
|
4987
|
+
prefix = `[^${part.note_type}-${part.id}]: ${prefix}`;
|
|
3808
4988
|
const p_text = build_paragraph_text(item, comments_map, false);
|
|
3809
4989
|
blocks.push(prefix + p_text);
|
|
3810
4990
|
is_first_para = false;
|
|
@@ -3853,37 +5033,51 @@ function _strip_inline_formatting(text) {
|
|
|
3853
5033
|
return text;
|
|
3854
5034
|
}
|
|
3855
5035
|
function _determine_heading_style(paragraph) {
|
|
3856
|
-
const [style_cache, default_pstyle] = _get_style_cache(
|
|
5036
|
+
const [style_cache, default_pstyle] = _get_style_cache(
|
|
5037
|
+
paragraph._parent.part || paragraph._parent
|
|
5038
|
+
);
|
|
3857
5039
|
const pPr = findChild(paragraph._element, "w:pPr");
|
|
3858
5040
|
let style_id = default_pstyle;
|
|
3859
5041
|
if (pPr) {
|
|
3860
|
-
const oLvl = findChild(pPr, "w:outlineLvl");
|
|
3861
|
-
if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
|
|
3862
|
-
const style = _safe_style_name(paragraph, style_cache, default_pstyle);
|
|
3863
|
-
if (style && (style.startsWith("Heading") || style === "Title")) return style;
|
|
3864
|
-
return "(outline_level)";
|
|
3865
|
-
}
|
|
3866
5042
|
const pStyle = findChild(pPr, "w:pStyle");
|
|
3867
5043
|
if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
|
|
3868
5044
|
}
|
|
3869
|
-
|
|
3870
|
-
if (style_name && (style_name.startsWith("Heading") || style_name === "Title")) return style_name;
|
|
3871
|
-
if (style_name && /Heading[ ]?([1-6])(?![0-9])/.test(style_name)) return style_name;
|
|
3872
|
-
return "(heuristic)";
|
|
3873
|
-
}
|
|
3874
|
-
function _safe_style_name(paragraph, style_cache, default_pstyle) {
|
|
3875
|
-
const pPr = findChild(paragraph._element, "w:pPr");
|
|
3876
|
-
let style_id = default_pstyle;
|
|
5045
|
+
let outline_level = null;
|
|
3877
5046
|
if (pPr) {
|
|
3878
|
-
const
|
|
3879
|
-
if (
|
|
5047
|
+
const oLvl = findChild(pPr, "w:outlineLvl");
|
|
5048
|
+
if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
|
|
5049
|
+
outline_level = parseInt(oLvl.getAttribute("w:val"), 10);
|
|
5050
|
+
}
|
|
5051
|
+
}
|
|
5052
|
+
if (outline_level === null && style_id && style_cache && style_cache[style_id]) {
|
|
5053
|
+
outline_level = style_cache[style_id].outline_level;
|
|
5054
|
+
}
|
|
5055
|
+
const style_name = style_id && style_cache && style_cache[style_id] ? style_cache[style_id].name : style_id;
|
|
5056
|
+
let normalized_style_name = style_name;
|
|
5057
|
+
if (normalized_style_name && typeof normalized_style_name === "string") {
|
|
5058
|
+
if (normalized_style_name.toLowerCase().startsWith("heading")) {
|
|
5059
|
+
normalized_style_name = normalized_style_name.replace(/^heading/i, "Heading");
|
|
5060
|
+
} else if (normalized_style_name.toLowerCase() === "title") {
|
|
5061
|
+
normalized_style_name = "Title";
|
|
5062
|
+
}
|
|
5063
|
+
}
|
|
5064
|
+
if (outline_level !== null && outline_level >= 0 && outline_level <= 8) {
|
|
5065
|
+
if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")) {
|
|
5066
|
+
return normalized_style_name;
|
|
5067
|
+
}
|
|
5068
|
+
return "(outline_level)";
|
|
3880
5069
|
}
|
|
3881
|
-
|
|
5070
|
+
if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title"))
|
|
5071
|
+
return normalized_style_name;
|
|
5072
|
+
if (normalized_style_name && /Heading[ ]?([1-6])(?![0-9])/.test(normalized_style_name))
|
|
5073
|
+
return normalized_style_name;
|
|
5074
|
+
return "(heuristic)";
|
|
3882
5075
|
}
|
|
3883
5076
|
function _find_owned_end(block_records, heading_indices, current_h_pos, current_level) {
|
|
3884
5077
|
for (let next_h_pos = current_h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
|
|
3885
5078
|
const next_idx = heading_indices[next_h_pos];
|
|
3886
|
-
if (_heading_level(block_records[next_idx].item) <= current_level)
|
|
5079
|
+
if (_heading_level(block_records[next_idx].item) <= current_level)
|
|
5080
|
+
return next_idx;
|
|
3887
5081
|
}
|
|
3888
5082
|
return block_records.length;
|
|
3889
5083
|
}
|
|
@@ -4014,7 +5208,7 @@ var SanitizeReport = class {
|
|
|
4014
5208
|
if (this.warnings.length > 0) {
|
|
4015
5209
|
lines.push(`Result: CLEAN WITH WARNINGS (${this.warnings.length} warning${this.warnings.length > 1 ? "s" : ""})`);
|
|
4016
5210
|
} else {
|
|
4017
|
-
lines.push(
|
|
5211
|
+
lines.push(`Result: CLEAN (${this.tracked_changes_found} changes resolved, ${this.comments_removed} comments removed)`);
|
|
4018
5212
|
}
|
|
4019
5213
|
lines.push(sep);
|
|
4020
5214
|
return lines.join("\n");
|
|
@@ -4033,6 +5227,83 @@ function findDescendantsByLocalName(element, localName) {
|
|
|
4033
5227
|
}
|
|
4034
5228
|
return result;
|
|
4035
5229
|
}
|
|
5230
|
+
function coalesce_runs(doc) {
|
|
5231
|
+
let count = 0;
|
|
5232
|
+
function areRunsIdentical(rPr1, rPr2) {
|
|
5233
|
+
const xml1 = rPr1 ? rPr1.toString() : "";
|
|
5234
|
+
const xml2 = rPr2 ? rPr2.toString() : "";
|
|
5235
|
+
return xml1 === xml2;
|
|
5236
|
+
}
|
|
5237
|
+
function hasSpecialContent(run) {
|
|
5238
|
+
const safeTags = ["w:t", "w:tab", "w:br", "w:cr", "w:delText", "w:rPr"];
|
|
5239
|
+
for (let i = 0; i < run.childNodes.length; i++) {
|
|
5240
|
+
const child = run.childNodes[i];
|
|
5241
|
+
if (child.nodeType === 1) {
|
|
5242
|
+
const tag = child.tagName;
|
|
5243
|
+
if (!safeTags.includes(tag)) return true;
|
|
5244
|
+
}
|
|
5245
|
+
}
|
|
5246
|
+
return false;
|
|
5247
|
+
}
|
|
5248
|
+
function coalesceContainer(container) {
|
|
5249
|
+
const children = Array.from(container.childNodes).filter((n) => n.nodeType === 1);
|
|
5250
|
+
let i = 0;
|
|
5251
|
+
while (i < children.length - 1) {
|
|
5252
|
+
const curr = children[i];
|
|
5253
|
+
const nxt = children[i + 1];
|
|
5254
|
+
if (curr.tagName === "w:r" && nxt.tagName === "w:r") {
|
|
5255
|
+
if (!hasSpecialContent(curr) && !hasSpecialContent(nxt)) {
|
|
5256
|
+
const rPr1 = findChild(curr, "w:rPr");
|
|
5257
|
+
const rPr2 = findChild(nxt, "w:rPr");
|
|
5258
|
+
if (areRunsIdentical(rPr1, rPr2)) {
|
|
5259
|
+
let last_t = null;
|
|
5260
|
+
for (let c = 0; c < curr.childNodes.length; c++) {
|
|
5261
|
+
const child = curr.childNodes[c];
|
|
5262
|
+
if (child.nodeType === 1 && (child.tagName === "w:t" || child.tagName === "w:delText")) {
|
|
5263
|
+
last_t = child;
|
|
5264
|
+
}
|
|
5265
|
+
}
|
|
5266
|
+
const nxtChildren = Array.from(nxt.childNodes).filter((n) => n.nodeType === 1);
|
|
5267
|
+
for (const child of nxtChildren) {
|
|
5268
|
+
if (child.tagName === "w:rPr") continue;
|
|
5269
|
+
if ((child.tagName === "w:t" || child.tagName === "w:delText") && last_t && last_t.tagName === child.tagName) {
|
|
5270
|
+
const t1 = last_t.textContent || "";
|
|
5271
|
+
const t2 = child.textContent || "";
|
|
5272
|
+
const combined = t1 + t2;
|
|
5273
|
+
last_t.textContent = combined;
|
|
5274
|
+
if (combined.trim() !== combined) {
|
|
5275
|
+
last_t.setAttribute("xml:space", "preserve");
|
|
5276
|
+
}
|
|
5277
|
+
} else {
|
|
5278
|
+
curr.appendChild(child);
|
|
5279
|
+
if (child.tagName === "w:t" || child.tagName === "w:delText") {
|
|
5280
|
+
last_t = child;
|
|
5281
|
+
}
|
|
5282
|
+
}
|
|
5283
|
+
}
|
|
5284
|
+
container.removeChild(nxt);
|
|
5285
|
+
children.splice(i + 1, 1);
|
|
5286
|
+
count++;
|
|
5287
|
+
continue;
|
|
5288
|
+
}
|
|
5289
|
+
}
|
|
5290
|
+
}
|
|
5291
|
+
if (["w:ins", "w:del", "w:hyperlink", "w:sdt", "w:smartTag", "w:fldSimple", "w:sdtContent"].includes(curr.tagName)) {
|
|
5292
|
+
coalesceContainer(curr);
|
|
5293
|
+
}
|
|
5294
|
+
i++;
|
|
5295
|
+
}
|
|
5296
|
+
if (children.length > 0) {
|
|
5297
|
+
const last = children[children.length - 1];
|
|
5298
|
+
if (["w:ins", "w:del", "w:hyperlink", "w:sdt", "w:smartTag", "w:fldSimple", "w:sdtContent"].includes(last.tagName)) {
|
|
5299
|
+
coalesceContainer(last);
|
|
5300
|
+
}
|
|
5301
|
+
}
|
|
5302
|
+
}
|
|
5303
|
+
const paragraphs = findAllDescendants(doc.element, "w:p");
|
|
5304
|
+
for (const p of paragraphs) coalesceContainer(p);
|
|
5305
|
+
return count ? [`Adjacent identical runs coalesced: ${count}`] : [];
|
|
5306
|
+
}
|
|
4036
5307
|
function strip_rsid(doc) {
|
|
4037
5308
|
let count = 0;
|
|
4038
5309
|
const rsidAttrs = ["w:rsidR", "w:rsidRPr", "w:rsidRDefault", "w:rsidP", "w:rsidDel", "w:rsidSect", "w:rsidTr"];
|
|
@@ -4388,6 +5659,7 @@ async function finalize_document(doc, options) {
|
|
|
4388
5659
|
report.add_transform_lines(strip_proof_errors(doc));
|
|
4389
5660
|
report.add_transform_lines(strip_empty_properties(doc));
|
|
4390
5661
|
report.add_transform_lines(strip_hidden_text(doc));
|
|
5662
|
+
report.add_transform_lines(coalesce_runs(doc));
|
|
4391
5663
|
report.add_transform_lines(scrub_doc_properties(doc));
|
|
4392
5664
|
report.add_transform_lines(scrub_timestamps(doc));
|
|
4393
5665
|
report.add_transform_lines(strip_custom_xml(doc));
|
|
@@ -4415,6 +5687,26 @@ async function finalize_document(doc, options) {
|
|
|
4415
5687
|
if (options.export_pdf) {
|
|
4416
5688
|
report.warnings.push("PDF export requires the Python/Word COM environment and is skipped in this zero-dependency Node agent.");
|
|
4417
5689
|
}
|
|
5690
|
+
for (const part of doc.pkg.parts) {
|
|
5691
|
+
if (part === doc.part || part.contentType.includes("wordprocessingml") && part.contentType.endsWith("+xml")) {
|
|
5692
|
+
if (part._element.hasAttribute("xmlns:w16du")) {
|
|
5693
|
+
let hasW16du = false;
|
|
5694
|
+
if (Array.from(part._element.attributes || []).some((a) => a.name.startsWith("w16du:") && a.name !== "xmlns:w16du")) {
|
|
5695
|
+
hasW16du = true;
|
|
5696
|
+
}
|
|
5697
|
+
if (!hasW16du) {
|
|
5698
|
+
const allNodes = findAllDescendants(part._element, "*");
|
|
5699
|
+
for (const n of allNodes) {
|
|
5700
|
+
if (n.tagName.startsWith("w16du:") || Array.from(n.attributes || []).some((a) => a.name.startsWith("w16du:"))) {
|
|
5701
|
+
hasW16du = true;
|
|
5702
|
+
break;
|
|
5703
|
+
}
|
|
5704
|
+
}
|
|
5705
|
+
}
|
|
5706
|
+
if (!hasW16du) part._element.removeAttribute("xmlns:w16du");
|
|
5707
|
+
}
|
|
5708
|
+
}
|
|
5709
|
+
}
|
|
4418
5710
|
if (report.warnings.length > 0) report.status = "clean_with_warnings";
|
|
4419
5711
|
const outBuffer = await doc.save();
|
|
4420
5712
|
return { reportText: report.render(), outBuffer };
|
|
@@ -4432,6 +5724,7 @@ function identifyEngine() {
|
|
|
4432
5724
|
RedlineEngine,
|
|
4433
5725
|
apply_edits_to_markdown,
|
|
4434
5726
|
create_unified_diff,
|
|
5727
|
+
create_word_patch_diff,
|
|
4435
5728
|
extractTextFromBuffer,
|
|
4436
5729
|
extract_outline,
|
|
4437
5730
|
finalize_document,
|