@adeu/core 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +3627 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +247 -0
- package/dist/index.d.ts +247 -0
- package/dist/index.js +3579 -0
- package/dist/index.js.map +1 -0
- package/package.json +38 -0
- package/src/comments.test.ts +38 -0
- package/src/comments.ts +451 -0
- package/src/diff.test.ts +62 -0
- package/src/diff.ts +251 -0
- package/src/docx/bridge.ts +189 -0
- package/src/docx/dom.ts +54 -0
- package/src/docx/primitives.ts +65 -0
- package/src/domain.ts +11 -0
- package/src/engine.atomic.test.ts +58 -0
- package/src/engine.batch.test.ts +93 -0
- package/src/engine.safety.test.ts +42 -0
- package/src/engine.tables.test.ts +166 -0
- package/src/engine.ts +735 -0
- package/src/index.test.ts +8 -0
- package/src/index.ts +14 -0
- package/src/ingest.test.ts +44 -0
- package/src/ingest.ts +400 -0
- package/src/mapper.test.ts +66 -0
- package/src/mapper.ts +835 -0
- package/src/markup.test.ts +150 -0
- package/src/markup.ts +323 -0
- package/src/models.ts +51 -0
- package/src/outline.ts +377 -0
- package/src/pagination.ts +239 -0
- package/src/test-utils.ts +142 -0
- package/src/utils/docx.ts +478 -0
- package/tsconfig.json +21 -0
- package/tsup.config.ts +10 -0
- package/vitest.config.ts +12 -0
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,3627 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
+
|
|
30
|
+
// src/index.ts
|
|
31
|
+
var index_exports = {};
|
|
32
|
+
__export(index_exports, {
|
|
33
|
+
BatchValidationError: () => BatchValidationError,
|
|
34
|
+
DocumentMapper: () => DocumentMapper,
|
|
35
|
+
DocumentObject: () => DocumentObject,
|
|
36
|
+
RedlineEngine: () => RedlineEngine,
|
|
37
|
+
apply_edits_to_markdown: () => apply_edits_to_markdown,
|
|
38
|
+
extractTextFromBuffer: () => extractTextFromBuffer,
|
|
39
|
+
extract_outline: () => extract_outline,
|
|
40
|
+
generate_edits_from_text: () => generate_edits_from_text,
|
|
41
|
+
identifyEngine: () => identifyEngine,
|
|
42
|
+
paginate: () => paginate,
|
|
43
|
+
split_structural_appendix: () => split_structural_appendix,
|
|
44
|
+
trim_common_context: () => trim_common_context
|
|
45
|
+
});
|
|
46
|
+
module.exports = __toCommonJS(index_exports);
|
|
47
|
+
|
|
48
|
+
// src/docx/bridge.ts
|
|
49
|
+
var import_jszip = __toESM(require("jszip"), 1);
|
|
50
|
+
|
|
51
|
+
// src/docx/dom.ts
|
|
52
|
+
var import_xmldom = require("@xmldom/xmldom");
|
|
53
|
+
function findChild(element, tagName) {
|
|
54
|
+
for (let i = 0; i < element.childNodes.length; i++) {
|
|
55
|
+
const child = element.childNodes[i];
|
|
56
|
+
if (child.nodeType === 1 && child.tagName === tagName) {
|
|
57
|
+
return child;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
function findAllDescendants(element, tagName) {
|
|
63
|
+
return Array.from(element.getElementsByTagName(tagName));
|
|
64
|
+
}
|
|
65
|
+
function parseXml(xmlString) {
|
|
66
|
+
return new import_xmldom.DOMParser().parseFromString(xmlString, "text/xml");
|
|
67
|
+
}
|
|
68
|
+
function serializeXml(node) {
|
|
69
|
+
return new import_xmldom.XMLSerializer().serializeToString(node);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// src/docx/bridge.ts
|
|
73
|
+
var Relationship = class {
|
|
74
|
+
constructor(id, type, target, isExternal) {
|
|
75
|
+
this.id = id;
|
|
76
|
+
this.type = type;
|
|
77
|
+
this.target = target;
|
|
78
|
+
this.isExternal = isExternal;
|
|
79
|
+
}
|
|
80
|
+
id;
|
|
81
|
+
type;
|
|
82
|
+
target;
|
|
83
|
+
isExternal;
|
|
84
|
+
};
|
|
85
|
+
var Part = class {
|
|
86
|
+
constructor(partname, blob, element, contentType) {
|
|
87
|
+
this.partname = partname;
|
|
88
|
+
this.blob = blob;
|
|
89
|
+
this.contentType = contentType;
|
|
90
|
+
this._element = element;
|
|
91
|
+
}
|
|
92
|
+
partname;
|
|
93
|
+
blob;
|
|
94
|
+
contentType;
|
|
95
|
+
rels = /* @__PURE__ */ new Map();
|
|
96
|
+
_element;
|
|
97
|
+
addRelationship(id, type, target, isExternal = false) {
|
|
98
|
+
this.rels.set(id, new Relationship(id, type, target, isExternal));
|
|
99
|
+
if (this._element.tagName === "Relationships") {
|
|
100
|
+
const doc = this._element.ownerDocument;
|
|
101
|
+
if (doc) {
|
|
102
|
+
const relEl = doc.createElement("Relationship");
|
|
103
|
+
relEl.setAttribute("Id", id);
|
|
104
|
+
relEl.setAttribute("Type", type);
|
|
105
|
+
relEl.setAttribute("Target", target);
|
|
106
|
+
if (isExternal) relEl.setAttribute("TargetMode", "External");
|
|
107
|
+
this._element.appendChild(relEl);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
};
|
|
112
|
+
var DocxPackage = class {
|
|
113
|
+
constructor(zip) {
|
|
114
|
+
this.zip = zip;
|
|
115
|
+
}
|
|
116
|
+
zip;
|
|
117
|
+
parts = [];
|
|
118
|
+
mainDocumentPart;
|
|
119
|
+
getPartByPath(path) {
|
|
120
|
+
const searchPath = path.startsWith("/") ? path.substring(1) : path;
|
|
121
|
+
return this.parts.find((p) => p.partname === searchPath || p.partname === "/" + searchPath);
|
|
122
|
+
}
|
|
123
|
+
nextPartname(pattern) {
|
|
124
|
+
let i = 1;
|
|
125
|
+
while (true) {
|
|
126
|
+
const candidate = pattern.replace("%d", i === 1 ? "" : i.toString());
|
|
127
|
+
if (!this.getPartByPath(candidate)) return candidate;
|
|
128
|
+
i++;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
addPart(partname, contentType, xmlString) {
|
|
132
|
+
const doc = parseXml(xmlString);
|
|
133
|
+
const part = new Part(partname, xmlString, doc.documentElement, contentType);
|
|
134
|
+
this.parts.push(part);
|
|
135
|
+
const ctPart = this.getPartByPath("[Content_Types].xml");
|
|
136
|
+
if (ctPart) {
|
|
137
|
+
const docCT = ctPart._element.ownerDocument;
|
|
138
|
+
if (docCT) {
|
|
139
|
+
const override = docCT.createElement("Override");
|
|
140
|
+
override.setAttribute("PartName", partname);
|
|
141
|
+
override.setAttribute("ContentType", contentType);
|
|
142
|
+
ctPart._element.appendChild(override);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return part;
|
|
146
|
+
}
|
|
147
|
+
getOrCreateRelsPart(sourcePartname) {
|
|
148
|
+
const parts = sourcePartname.split("/");
|
|
149
|
+
const file = parts.pop();
|
|
150
|
+
const relsPath = parts.join("/") + "/_rels/" + file + ".rels";
|
|
151
|
+
let relsPart = this.getPartByPath(relsPath);
|
|
152
|
+
if (!relsPart) {
|
|
153
|
+
const xml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
|
154
|
+
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"></Relationships>`;
|
|
155
|
+
relsPart = this.addPart(relsPath, "application/vnd.openxmlformats-package.relationships+xml", xml);
|
|
156
|
+
}
|
|
157
|
+
return relsPart;
|
|
158
|
+
}
|
|
159
|
+
};
|
|
160
|
+
var DocumentObject = class _DocumentObject {
|
|
161
|
+
constructor(pkg, part) {
|
|
162
|
+
this.pkg = pkg;
|
|
163
|
+
this.part = part;
|
|
164
|
+
}
|
|
165
|
+
pkg;
|
|
166
|
+
part;
|
|
167
|
+
settings = { oddAndEvenPagesHeaderFooter: false };
|
|
168
|
+
// Simplification for the TS port: sections hold header/footer refs
|
|
169
|
+
sections = [];
|
|
170
|
+
get element() {
|
|
171
|
+
return findChild(this.part._element, "w:body") || this.part._element;
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Main entrypoint for loading a DOCX buffer into the DOM wrapper.
|
|
175
|
+
*/
|
|
176
|
+
static async load(buffer) {
|
|
177
|
+
const zip = await import_jszip.default.loadAsync(buffer);
|
|
178
|
+
const pkg = new DocxPackage(zip);
|
|
179
|
+
const ctFile = zip.file("[Content_Types].xml");
|
|
180
|
+
let contentTypes = {};
|
|
181
|
+
if (ctFile) {
|
|
182
|
+
const ctXml = parseXml(await ctFile.async("text"));
|
|
183
|
+
const overrides = findAllDescendants(ctXml.documentElement, "Override");
|
|
184
|
+
for (const override of overrides) {
|
|
185
|
+
contentTypes[override.getAttribute("PartName") || ""] = override.getAttribute("ContentType") || "";
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
for (const [path, file] of Object.entries(zip.files)) {
|
|
189
|
+
if (!file.dir && (path.endsWith(".xml") || path.endsWith(".rels"))) {
|
|
190
|
+
const text = await file.async("text");
|
|
191
|
+
const doc = parseXml(text);
|
|
192
|
+
const cType = contentTypes["/" + path] || "application/xml";
|
|
193
|
+
const part = new Part("/" + path, text, doc.documentElement, cType);
|
|
194
|
+
pkg.parts.push(part);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
const mainPart = pkg.getPartByPath("word/document.xml");
|
|
198
|
+
if (!mainPart) throw new Error("Invalid DOCX: Missing word/document.xml");
|
|
199
|
+
pkg.mainDocumentPart = mainPart;
|
|
200
|
+
const relsPart = pkg.getPartByPath("word/_rels/document.xml.rels");
|
|
201
|
+
if (relsPart) {
|
|
202
|
+
const relElements = findAllDescendants(relsPart._element, "Relationship");
|
|
203
|
+
for (const rel of relElements) {
|
|
204
|
+
const rId = rel.getAttribute("Id");
|
|
205
|
+
const target = rel.getAttribute("Target");
|
|
206
|
+
const type = rel.getAttribute("Type");
|
|
207
|
+
const targetMode = rel.getAttribute("TargetMode");
|
|
208
|
+
if (rId && target && type) {
|
|
209
|
+
mainPart.rels.set(rId, new Relationship(rId, type, target, targetMode === "External"));
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
return new _DocumentObject(pkg, mainPart);
|
|
214
|
+
}
|
|
215
|
+
relateTo(part, relType) {
|
|
216
|
+
let rId = 1;
|
|
217
|
+
while (this.part.rels.has(`rId${rId}`)) rId++;
|
|
218
|
+
const id = `rId${rId}`;
|
|
219
|
+
const target = part.partname.split("/").pop();
|
|
220
|
+
this.part.rels.set(id, new Relationship(id, relType, target, false));
|
|
221
|
+
const relsPart = this.pkg.getOrCreateRelsPart(this.part.partname);
|
|
222
|
+
relsPart.addRelationship(id, relType, target, false);
|
|
223
|
+
}
|
|
224
|
+
async save() {
|
|
225
|
+
for (const part of this.pkg.parts) {
|
|
226
|
+
let xmlStr = serializeXml(part._element.ownerDocument || part._element);
|
|
227
|
+
if (!xmlStr.startsWith("<?xml")) {
|
|
228
|
+
xmlStr = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n' + xmlStr;
|
|
229
|
+
}
|
|
230
|
+
this.pkg.zip.file(part.partname.substring(1), xmlStr);
|
|
231
|
+
}
|
|
232
|
+
return this.pkg.zip.generateAsync({ type: "nodebuffer" });
|
|
233
|
+
}
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
// src/docx/primitives.ts
|
|
237
|
+
var Paragraph = class {
|
|
238
|
+
constructor(_element, _parent) {
|
|
239
|
+
this._element = _element;
|
|
240
|
+
this._parent = _parent;
|
|
241
|
+
}
|
|
242
|
+
_element;
|
|
243
|
+
_parent;
|
|
244
|
+
get text() {
|
|
245
|
+
let t = "";
|
|
246
|
+
const texts = this._element.getElementsByTagName("w:t");
|
|
247
|
+
for (let i = 0; i < texts.length; i++) {
|
|
248
|
+
t += texts[i].textContent || "";
|
|
249
|
+
}
|
|
250
|
+
return t;
|
|
251
|
+
}
|
|
252
|
+
};
|
|
253
|
+
var Run = class {
|
|
254
|
+
constructor(_element, _parent) {
|
|
255
|
+
this._element = _element;
|
|
256
|
+
this._parent = _parent;
|
|
257
|
+
}
|
|
258
|
+
_element;
|
|
259
|
+
_parent;
|
|
260
|
+
};
|
|
261
|
+
var Cell = class {
|
|
262
|
+
constructor(_element, _parent) {
|
|
263
|
+
this._element = _element;
|
|
264
|
+
this._parent = _parent;
|
|
265
|
+
}
|
|
266
|
+
_element;
|
|
267
|
+
_parent;
|
|
268
|
+
};
|
|
269
|
+
var Row = class {
|
|
270
|
+
constructor(_element, _parent) {
|
|
271
|
+
this._element = _element;
|
|
272
|
+
this._parent = _parent;
|
|
273
|
+
const tcs = this._element.getElementsByTagName("w:tc");
|
|
274
|
+
for (let i = 0; i < tcs.length; i++) {
|
|
275
|
+
this.cells.push(new Cell(tcs[i], this));
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
_element;
|
|
279
|
+
_parent;
|
|
280
|
+
cells = [];
|
|
281
|
+
};
|
|
282
|
+
var Table = class {
|
|
283
|
+
constructor(_element, _parent) {
|
|
284
|
+
this._element = _element;
|
|
285
|
+
this._parent = _parent;
|
|
286
|
+
const trs = this._element.getElementsByTagName("w:tr");
|
|
287
|
+
for (let i = 0; i < trs.length; i++) {
|
|
288
|
+
this.rows.push(new Row(trs[i], this));
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
_element;
|
|
292
|
+
_parent;
|
|
293
|
+
rows = [];
|
|
294
|
+
};
|
|
295
|
+
var NotesPart = class {
|
|
296
|
+
constructor(part, note_type) {
|
|
297
|
+
this.part = part;
|
|
298
|
+
this.note_type = note_type;
|
|
299
|
+
this._element = part._element;
|
|
300
|
+
}
|
|
301
|
+
part;
|
|
302
|
+
note_type;
|
|
303
|
+
_element;
|
|
304
|
+
};
|
|
305
|
+
var FootnoteItem = class {
|
|
306
|
+
constructor(_element, _parent, note_type) {
|
|
307
|
+
this._element = _element;
|
|
308
|
+
this._parent = _parent;
|
|
309
|
+
this.note_type = note_type;
|
|
310
|
+
this.id = _element.getAttribute("w:id") || "";
|
|
311
|
+
this.part = _parent.part;
|
|
312
|
+
}
|
|
313
|
+
_element;
|
|
314
|
+
_parent;
|
|
315
|
+
note_type;
|
|
316
|
+
id;
|
|
317
|
+
part;
|
|
318
|
+
};
|
|
319
|
+
|
|
320
|
+
// src/utils/docx.ts
|
|
321
|
+
var QN_W_P = "w:p";
|
|
322
|
+
var QN_W_R = "w:r";
|
|
323
|
+
var QN_W_T = "w:t";
|
|
324
|
+
var QN_W_DELTEXT = "w:delText";
|
|
325
|
+
var QN_W_TAB = "w:tab";
|
|
326
|
+
var QN_W_BR = "w:br";
|
|
327
|
+
var QN_W_CR = "w:cr";
|
|
328
|
+
var QN_W_RPR = "w:rPr";
|
|
329
|
+
var QN_W_RPRCHANGE = "w:rPrChange";
|
|
330
|
+
var QN_W_COMMENTREFERENCE = "w:commentReference";
|
|
331
|
+
var QN_W_FOOTNOTEREFERENCE = "w:footnoteReference";
|
|
332
|
+
var QN_W_ENDNOTEREFERENCE = "w:endnoteReference";
|
|
333
|
+
var QN_W_FLDCHAR = "w:fldChar";
|
|
334
|
+
var QN_W_FLDCHARTYPE = "w:fldCharType";
|
|
335
|
+
var QN_W_INSTRTEXT = "w:instrText";
|
|
336
|
+
var QN_W_INS = "w:ins";
|
|
337
|
+
var QN_W_DEL = "w:del";
|
|
338
|
+
var QN_W_ID = "w:id";
|
|
339
|
+
var QN_W_AUTHOR = "w:author";
|
|
340
|
+
var QN_W_DATE = "w:date";
|
|
341
|
+
var QN_W_COMMENTRANGESTART = "w:commentRangeStart";
|
|
342
|
+
var QN_W_COMMENTRANGEEND = "w:commentRangeEnd";
|
|
343
|
+
var QN_W_HYPERLINK = "w:hyperlink";
|
|
344
|
+
var QN_R_ID = "r:id";
|
|
345
|
+
var QN_W_FLDSIMPLE = "w:fldSimple";
|
|
346
|
+
var QN_W_INSTR = "w:instr";
|
|
347
|
+
var QN_W_BOOKMARKSTART = "w:bookmarkStart";
|
|
348
|
+
var QN_W_NAME = "w:name";
|
|
349
|
+
var QN_W_SDT = "w:sdt";
|
|
350
|
+
var QN_W_SMARTTAG = "w:smartTag";
|
|
351
|
+
var QN_W_SDTCONTENT = "w:sdtContent";
|
|
352
|
+
var QN_W_B = "w:b";
|
|
353
|
+
var QN_W_I = "w:i";
|
|
354
|
+
var QN_W_VAL = "w:val";
|
|
355
|
+
var QN_W_PPR = "w:pPr";
|
|
356
|
+
var QN_W_PSTYLE = "w:pStyle";
|
|
357
|
+
var QN_W_OUTLINELVL = "w:outlineLvl";
|
|
358
|
+
var QN_W_NUMPR = "w:numPr";
|
|
359
|
+
var QN_W_NUMID = "w:numId";
|
|
360
|
+
var QN_W_ILVL = "w:ilvl";
|
|
361
|
+
var _CUSTOM_HEADING_NAME_RE = /Heading[ ]?([1-6])(?![0-9])/;
|
|
362
|
+
function _get_style_cache(part) {
|
|
363
|
+
const pkg = part.package || part.pkg || (part.part ? part.part.pkg : null);
|
|
364
|
+
if (pkg && pkg._adeu_style_cache) {
|
|
365
|
+
return pkg._adeu_style_cache;
|
|
366
|
+
}
|
|
367
|
+
const cache = {};
|
|
368
|
+
let default_pstyle = null;
|
|
369
|
+
const raw_styles = {};
|
|
370
|
+
const stylesPart = pkg?.getPartByPath("word/styles.xml");
|
|
371
|
+
if (!stylesPart) {
|
|
372
|
+
const result2 = [cache, null];
|
|
373
|
+
if (pkg) pkg._adeu_style_cache = result2;
|
|
374
|
+
return result2;
|
|
375
|
+
}
|
|
376
|
+
const styles = findAllDescendants(stylesPart._element, "w:style");
|
|
377
|
+
for (const s of styles) {
|
|
378
|
+
const s_id = s.getAttribute("w:styleId");
|
|
379
|
+
if (!s_id) continue;
|
|
380
|
+
const s_type = s.getAttribute("w:type");
|
|
381
|
+
const is_default = s.getAttribute("w:default") === "1" || s.getAttribute("w:default") === "true";
|
|
382
|
+
if (s_type === "paragraph" && is_default) default_pstyle = s_id;
|
|
383
|
+
const name_el = findChild(s, "w:name");
|
|
384
|
+
const name = name_el ? name_el.getAttribute("w:val") : s_id;
|
|
385
|
+
const based_on_el = findChild(s, "w:basedOn");
|
|
386
|
+
const based_on = based_on_el ? based_on_el.getAttribute("w:val") : null;
|
|
387
|
+
let outline_lvl = null;
|
|
388
|
+
const pPr = findChild(s, "w:pPr");
|
|
389
|
+
if (pPr) {
|
|
390
|
+
const oLvl = findChild(pPr, "w:outlineLvl");
|
|
391
|
+
if (oLvl) {
|
|
392
|
+
const val = oLvl.getAttribute("w:val");
|
|
393
|
+
if (val && /^\d+$/.test(val)) outline_lvl = parseInt(val, 10);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
let bold = null;
|
|
397
|
+
const rPr = findChild(s, "w:rPr");
|
|
398
|
+
if (rPr) {
|
|
399
|
+
const b = findChild(rPr, "w:b");
|
|
400
|
+
if (b) {
|
|
401
|
+
const val = b.getAttribute("w:val");
|
|
402
|
+
bold = val !== "0" && val !== "false" && val !== "off";
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
raw_styles[s_id] = { name, based_on, outline_level: outline_lvl, bold };
|
|
406
|
+
}
|
|
407
|
+
const resolve_style = (s_id, visited) => {
|
|
408
|
+
if (cache[s_id]) return cache[s_id];
|
|
409
|
+
if (visited.has(s_id) || !raw_styles[s_id]) return { name: s_id, outline_level: null, bold: false };
|
|
410
|
+
visited.add(s_id);
|
|
411
|
+
const raw = raw_styles[s_id];
|
|
412
|
+
const based_on_id = raw.based_on;
|
|
413
|
+
let o_lvl = raw.outline_level;
|
|
414
|
+
let bold_val = raw.bold !== null ? raw.bold : false;
|
|
415
|
+
if (based_on_id) {
|
|
416
|
+
const parent = resolve_style(based_on_id, visited);
|
|
417
|
+
if (o_lvl === null) o_lvl = parent.outline_level;
|
|
418
|
+
if (raw.bold === null) bold_val = parent.bold;
|
|
419
|
+
}
|
|
420
|
+
const resolved = { name: raw.name, outline_level: o_lvl, bold: bold_val };
|
|
421
|
+
cache[s_id] = resolved;
|
|
422
|
+
return resolved;
|
|
423
|
+
};
|
|
424
|
+
for (const s_id in raw_styles) resolve_style(s_id, /* @__PURE__ */ new Set());
|
|
425
|
+
const result = [cache, default_pstyle];
|
|
426
|
+
if (pkg) pkg._adeu_style_cache = result;
|
|
427
|
+
return result;
|
|
428
|
+
}
|
|
429
|
+
function _detect_heading_level_from_name(name) {
|
|
430
|
+
if (!name) return null;
|
|
431
|
+
const match = name.match(_CUSTOM_HEADING_NAME_RE);
|
|
432
|
+
return match ? parseInt(match[1], 10) : null;
|
|
433
|
+
}
|
|
434
|
+
function is_native_heading(paragraph, style_cache, default_pstyle) {
|
|
435
|
+
if (!style_cache) {
|
|
436
|
+
[style_cache, default_pstyle] = _get_style_cache(paragraph._parent.part || paragraph._parent);
|
|
437
|
+
}
|
|
438
|
+
const pPr = findChild(paragraph._element, QN_W_PPR);
|
|
439
|
+
if (pPr) {
|
|
440
|
+
const oLvl = findChild(pPr, QN_W_OUTLINELVL);
|
|
441
|
+
if (oLvl) {
|
|
442
|
+
const val = oLvl.getAttribute(QN_W_VAL);
|
|
443
|
+
if (val && /^\d+$/.test(val)) {
|
|
444
|
+
const lvl = parseInt(val, 10);
|
|
445
|
+
if (lvl >= 0 && lvl <= 8) return true;
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
let style_id = default_pstyle;
|
|
450
|
+
if (pPr) {
|
|
451
|
+
const pStyle = findChild(pPr, QN_W_PSTYLE);
|
|
452
|
+
if (pStyle) style_id = pStyle.getAttribute(QN_W_VAL) || default_pstyle;
|
|
453
|
+
}
|
|
454
|
+
const style_info = style_id && style_cache ? style_cache[style_id] : null;
|
|
455
|
+
if (style_info && style_info.outline_level !== null && style_info.outline_level >= 0 && style_info.outline_level <= 8) {
|
|
456
|
+
return true;
|
|
457
|
+
}
|
|
458
|
+
const style_name = style_info ? style_info.name : null;
|
|
459
|
+
if (style_name?.startsWith("Heading")) return true;
|
|
460
|
+
if (style_name === "Title") return true;
|
|
461
|
+
if (style_name && style_name !== "Normal") {
|
|
462
|
+
if (_detect_heading_level_from_name(style_name) !== null) return true;
|
|
463
|
+
}
|
|
464
|
+
return false;
|
|
465
|
+
}
|
|
466
|
+
function get_paragraph_prefix(paragraph, style_cache, default_pstyle) {
|
|
467
|
+
if (!style_cache) {
|
|
468
|
+
[style_cache, default_pstyle] = _get_style_cache(paragraph._parent.part || paragraph._parent);
|
|
469
|
+
}
|
|
470
|
+
const pPr = findChild(paragraph._element, QN_W_PPR);
|
|
471
|
+
if (pPr) {
|
|
472
|
+
const oLvl = findChild(pPr, QN_W_OUTLINELVL);
|
|
473
|
+
if (oLvl) {
|
|
474
|
+
const val = oLvl.getAttribute(QN_W_VAL);
|
|
475
|
+
if (val && /^\d+$/.test(val)) {
|
|
476
|
+
const lvl = parseInt(val, 10);
|
|
477
|
+
if (lvl >= 0 && lvl <= 8) return "#".repeat(lvl + 1) + " ";
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
let style_id = default_pstyle;
|
|
482
|
+
if (pPr) {
|
|
483
|
+
const pStyle = findChild(pPr, QN_W_PSTYLE);
|
|
484
|
+
if (pStyle) style_id = pStyle.getAttribute(QN_W_VAL) || default_pstyle;
|
|
485
|
+
}
|
|
486
|
+
const style_info = style_id && style_cache ? style_cache[style_id] : null;
|
|
487
|
+
if (style_info && style_info.outline_level !== null && style_info.outline_level >= 0 && style_info.outline_level <= 8) {
|
|
488
|
+
return "#".repeat(style_info.outline_level + 1) + " ";
|
|
489
|
+
}
|
|
490
|
+
const style_name = style_info ? style_info.name : null;
|
|
491
|
+
if (style_name?.startsWith("Heading")) {
|
|
492
|
+
const match = style_name.replace("Heading", "").trim();
|
|
493
|
+
if (/^\d+$/.test(match)) return "#".repeat(parseInt(match, 10)) + " ";
|
|
494
|
+
}
|
|
495
|
+
if (style_name === "Title") return "# ";
|
|
496
|
+
if (pPr) {
|
|
497
|
+
const numPr = findChild(pPr, QN_W_NUMPR);
|
|
498
|
+
if (numPr) {
|
|
499
|
+
const numId = findChild(numPr, QN_W_NUMID);
|
|
500
|
+
if (numId && numId.getAttribute(QN_W_VAL) !== "0") {
|
|
501
|
+
let level = 0;
|
|
502
|
+
const ilvl = findChild(numPr, QN_W_ILVL);
|
|
503
|
+
if (ilvl) {
|
|
504
|
+
const valAttr = ilvl.getAttribute(QN_W_VAL);
|
|
505
|
+
if (valAttr) level = parseInt(valAttr, 10) || 0;
|
|
506
|
+
}
|
|
507
|
+
return " ".repeat(level) + "* ";
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
if (style_name && style_name !== "Normal") {
|
|
512
|
+
const custom_level = _detect_heading_level_from_name(style_name);
|
|
513
|
+
if (custom_level !== null) return "#".repeat(custom_level) + " ";
|
|
514
|
+
}
|
|
515
|
+
if (!style_name || style_name === "Normal") {
|
|
516
|
+
const text = paragraph.text.trim();
|
|
517
|
+
if (text && text.length < 100 && text === text.toUpperCase()) {
|
|
518
|
+
let is_bold = false;
|
|
519
|
+
if (style_info?.bold) {
|
|
520
|
+
is_bold = true;
|
|
521
|
+
} else {
|
|
522
|
+
const runs = findAllDescendants(paragraph._element, QN_W_R);
|
|
523
|
+
for (const r of runs) {
|
|
524
|
+
const tList = findAllDescendants(r, QN_W_T);
|
|
525
|
+
const tText = tList.map((t) => t.textContent || "").join("");
|
|
526
|
+
if (tText.trim()) {
|
|
527
|
+
const rPr_run = findChild(r, QN_W_RPR);
|
|
528
|
+
if (rPr_run) {
|
|
529
|
+
const b = findChild(rPr_run, QN_W_B);
|
|
530
|
+
if (b && b.getAttribute(QN_W_VAL) !== "0" && b.getAttribute(QN_W_VAL) !== "false") {
|
|
531
|
+
is_bold = true;
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
break;
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
if (is_bold) return "## ";
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
return "";
|
|
542
|
+
}
|
|
543
|
+
function is_heading_paragraph(paragraph, style_cache, default_pstyle) {
|
|
544
|
+
const prefix = get_paragraph_prefix(paragraph, style_cache, default_pstyle);
|
|
545
|
+
if (!prefix) return false;
|
|
546
|
+
const stripped = prefix.trimEnd();
|
|
547
|
+
return stripped.length > 0 && stripped === "#".repeat(stripped.length);
|
|
548
|
+
}
|
|
549
|
+
function get_run_style_markers(run, is_heading = null) {
|
|
550
|
+
let prefix = "";
|
|
551
|
+
let suffix = "";
|
|
552
|
+
const rPr = findChild(run._element, QN_W_RPR);
|
|
553
|
+
let is_bold = false;
|
|
554
|
+
let is_italic = false;
|
|
555
|
+
if (rPr) {
|
|
556
|
+
const b = findChild(rPr, QN_W_B);
|
|
557
|
+
if (b && b.getAttribute(QN_W_VAL) !== "0" && b.getAttribute(QN_W_VAL) !== "false") is_bold = true;
|
|
558
|
+
const i = findChild(rPr, QN_W_I);
|
|
559
|
+
if (i && i.getAttribute(QN_W_VAL) !== "0" && i.getAttribute(QN_W_VAL) !== "false") is_italic = true;
|
|
560
|
+
}
|
|
561
|
+
if (is_heading === null) {
|
|
562
|
+
const parent = run._parent;
|
|
563
|
+
is_heading = parent instanceof Paragraph ? is_native_heading(parent) : false;
|
|
564
|
+
}
|
|
565
|
+
if (is_bold && !is_heading) {
|
|
566
|
+
prefix += "**";
|
|
567
|
+
suffix = "**" + suffix;
|
|
568
|
+
}
|
|
569
|
+
if (is_italic) {
|
|
570
|
+
prefix += "_";
|
|
571
|
+
suffix = "_" + suffix;
|
|
572
|
+
}
|
|
573
|
+
return [prefix, suffix];
|
|
574
|
+
}
|
|
575
|
+
function apply_formatting_to_segments(text, prefix, suffix) {
|
|
576
|
+
if (!prefix && !suffix) return text;
|
|
577
|
+
if (!text) return "";
|
|
578
|
+
if (!text.includes("\n")) return `${prefix}${text}${suffix}`;
|
|
579
|
+
const parts = text.split("\n");
|
|
580
|
+
return parts.map((p) => p ? `${prefix}${p}${suffix}` : "").join("\n");
|
|
581
|
+
}
|
|
582
|
+
function get_run_text(run) {
|
|
583
|
+
let text = "";
|
|
584
|
+
for (let i = 0; i < run._element.childNodes.length; i++) {
|
|
585
|
+
const child = run._element.childNodes[i];
|
|
586
|
+
if (child.nodeType !== 1) continue;
|
|
587
|
+
if (child.tagName === QN_W_T || child.tagName === QN_W_DELTEXT) {
|
|
588
|
+
const raw = child.textContent || "";
|
|
589
|
+
text += raw.replace(/\t/g, " ");
|
|
590
|
+
} else if (child.tagName === QN_W_TAB) {
|
|
591
|
+
text += " ";
|
|
592
|
+
} else if (child.tagName === QN_W_BR || child.tagName === QN_W_CR) {
|
|
593
|
+
text += "\n";
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
return text;
|
|
597
|
+
}
|
|
598
|
+
function* iter_block_items(parent) {
|
|
599
|
+
const parent_elm = parent._element || parent.element || parent;
|
|
600
|
+
if (parent.constructor.name === "NotesPart") {
|
|
601
|
+
const tag = parent.note_type === "fn" ? "w:footnote" : "w:endnote";
|
|
602
|
+
const notes = findAllDescendants(parent_elm, tag);
|
|
603
|
+
for (const child of notes) {
|
|
604
|
+
if (child.getAttribute("w:type") === "separator" || child.getAttribute("w:type") === "continuationSeparator") continue;
|
|
605
|
+
yield new FootnoteItem(child, parent, parent.note_type);
|
|
606
|
+
}
|
|
607
|
+
return;
|
|
608
|
+
}
|
|
609
|
+
for (let i = 0; i < parent_elm.childNodes.length; i++) {
|
|
610
|
+
const child = parent_elm.childNodes[i];
|
|
611
|
+
if (child.nodeType !== 1) continue;
|
|
612
|
+
if (child.tagName === QN_W_P) {
|
|
613
|
+
yield new Paragraph(child, parent);
|
|
614
|
+
} else if (child.tagName === "w:tbl") {
|
|
615
|
+
yield new Table(child, parent);
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
function* iter_document_parts(doc) {
|
|
620
|
+
yield doc;
|
|
621
|
+
const fnPart = doc.pkg.getPartByPath("word/footnotes.xml");
|
|
622
|
+
const enPart = doc.pkg.getPartByPath("word/endnotes.xml");
|
|
623
|
+
if (fnPart) yield new NotesPart(fnPart, "fn");
|
|
624
|
+
if (enPart) yield new NotesPart(enPart, "en");
|
|
625
|
+
}
|
|
626
|
+
function _is_page_instr(instr) {
|
|
627
|
+
if (!instr) return false;
|
|
628
|
+
const parts = instr.toUpperCase().trim().split(/\s+/);
|
|
629
|
+
return parts.length > 0 && (parts[0] === "PAGE" || parts[0] === "NUMPAGES");
|
|
630
|
+
}
|
|
631
|
+
function* iter_paragraph_content(paragraph) {
|
|
632
|
+
let in_complex_field = false;
|
|
633
|
+
let current_instr = "";
|
|
634
|
+
let hide_result = false;
|
|
635
|
+
function* process_run_element(r_element) {
|
|
636
|
+
let c_id = null;
|
|
637
|
+
const rPr = findChild(r_element, QN_W_RPR);
|
|
638
|
+
if (rPr) {
|
|
639
|
+
const rPrChange = findChild(rPr, QN_W_RPRCHANGE);
|
|
640
|
+
if (rPrChange) {
|
|
641
|
+
c_id = rPrChange.getAttribute(QN_W_ID);
|
|
642
|
+
yield { type: "fmt_start", id: c_id, author: rPrChange.getAttribute(QN_W_AUTHOR) || void 0, date: rPrChange.getAttribute(QN_W_DATE) || void 0 };
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
for (let i = 0; i < r_element.childNodes.length; i++) {
|
|
646
|
+
const child = r_element.childNodes[i];
|
|
647
|
+
if (child.nodeType !== 1) continue;
|
|
648
|
+
const tag = child.tagName;
|
|
649
|
+
if (tag === QN_W_COMMENTREFERENCE) {
|
|
650
|
+
const ref_id = child.getAttribute(QN_W_ID);
|
|
651
|
+
if (ref_id) yield { type: "ref", id: ref_id };
|
|
652
|
+
} else if (tag === QN_W_FOOTNOTEREFERENCE) {
|
|
653
|
+
const f_id = child.getAttribute(QN_W_ID);
|
|
654
|
+
if (f_id) yield { type: "footnote", id: f_id };
|
|
655
|
+
} else if (tag === QN_W_ENDNOTEREFERENCE) {
|
|
656
|
+
const e_id = child.getAttribute(QN_W_ID);
|
|
657
|
+
if (e_id) yield { type: "endnote", id: e_id };
|
|
658
|
+
} else if (tag === QN_W_FLDCHAR) {
|
|
659
|
+
const fld_type = child.getAttribute(QN_W_FLDCHARTYPE);
|
|
660
|
+
if (fld_type === "begin") {
|
|
661
|
+
in_complex_field = true;
|
|
662
|
+
current_instr = "";
|
|
663
|
+
} else if (fld_type === "separate") {
|
|
664
|
+
if (_is_page_instr(current_instr)) hide_result = true;
|
|
665
|
+
else {
|
|
666
|
+
const parts = current_instr.trim().split(/\s+/);
|
|
667
|
+
if (parts.length > 1 && parts[0] === "REF") yield { type: "xref_start", id: parts[1] };
|
|
668
|
+
}
|
|
669
|
+
} else if (fld_type === "end") {
|
|
670
|
+
if (!hide_result) {
|
|
671
|
+
const parts = current_instr.trim().split(/\s+/);
|
|
672
|
+
if (parts.length > 1 && parts[0] === "REF") yield { type: "xref_end", id: parts[1] };
|
|
673
|
+
}
|
|
674
|
+
in_complex_field = false;
|
|
675
|
+
current_instr = "";
|
|
676
|
+
hide_result = false;
|
|
677
|
+
}
|
|
678
|
+
} else if (tag === QN_W_INSTRTEXT && in_complex_field && !hide_result) {
|
|
679
|
+
current_instr += child.textContent || "";
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
if (!hide_result) yield new Run(r_element, paragraph);
|
|
683
|
+
if (c_id !== null) yield { type: "fmt_end", id: c_id };
|
|
684
|
+
}
|
|
685
|
+
function* traverse_node(node) {
|
|
686
|
+
for (let i = 0; i < node.childNodes.length; i++) {
|
|
687
|
+
const child = node.childNodes[i];
|
|
688
|
+
if (child.nodeType !== 1) continue;
|
|
689
|
+
const tag = child.tagName;
|
|
690
|
+
if (tag === QN_W_R) yield* process_run_element(child);
|
|
691
|
+
else if (tag === QN_W_INS) {
|
|
692
|
+
const i_id = child.getAttribute(QN_W_ID);
|
|
693
|
+
yield { type: "ins_start", id: i_id, author: child.getAttribute(QN_W_AUTHOR) || void 0, date: child.getAttribute(QN_W_DATE) || void 0 };
|
|
694
|
+
yield* traverse_node(child);
|
|
695
|
+
yield { type: "ins_end", id: i_id };
|
|
696
|
+
} else if (tag === QN_W_DEL) {
|
|
697
|
+
const d_id = child.getAttribute(QN_W_ID);
|
|
698
|
+
yield { type: "del_start", id: d_id, author: child.getAttribute(QN_W_AUTHOR) || void 0, date: child.getAttribute(QN_W_DATE) || void 0 };
|
|
699
|
+
yield* traverse_node(child);
|
|
700
|
+
yield { type: "del_end", id: d_id };
|
|
701
|
+
} else if (tag === QN_W_COMMENTRANGESTART) yield { type: "start", id: child.getAttribute(QN_W_ID) };
|
|
702
|
+
else if (tag === QN_W_COMMENTRANGEEND) yield { type: "end", id: child.getAttribute(QN_W_ID) };
|
|
703
|
+
else if (tag === QN_W_HYPERLINK) {
|
|
704
|
+
const rId = child.getAttribute(QN_R_ID);
|
|
705
|
+
let url = "";
|
|
706
|
+
if (rId && paragraph._parent.part) {
|
|
707
|
+
const rel = paragraph._parent.part.rels.get(rId);
|
|
708
|
+
if (rel && rel.isExternal) url = rel.target;
|
|
709
|
+
}
|
|
710
|
+
if (url) yield { type: "hyperlink_start", id: rId, date: url };
|
|
711
|
+
yield* traverse_node(child);
|
|
712
|
+
if (url) yield { type: "hyperlink_end", id: rId, date: url };
|
|
713
|
+
} else if (tag === QN_W_FLDSIMPLE) {
|
|
714
|
+
const instr = child.getAttribute(QN_W_INSTR) || "";
|
|
715
|
+
const parts = instr.trim().split(/\s+/);
|
|
716
|
+
const target = parts.length > 1 && parts[0] === "REF" ? parts[1] : "";
|
|
717
|
+
if (target) yield { type: "xref_start", id: target };
|
|
718
|
+
yield* traverse_node(child);
|
|
719
|
+
if (target) yield { type: "xref_end", id: target };
|
|
720
|
+
} else if (tag === QN_W_BOOKMARKSTART) {
|
|
721
|
+
const b_name = child.getAttribute(QN_W_NAME);
|
|
722
|
+
if (b_name && (!b_name.startsWith("_") || b_name.startsWith("_Ref"))) yield { type: "bookmark", id: b_name };
|
|
723
|
+
} else if (tag === QN_W_SDT || tag === QN_W_SMARTTAG || tag === QN_W_SDTCONTENT) {
|
|
724
|
+
yield* traverse_node(child);
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
yield* traverse_node(paragraph._element);
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
// src/domain.ts
|
|
732
|
+
function build_structural_appendix(doc, base_text) {
|
|
733
|
+
return "";
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
// src/comments.ts
|
|
737
|
+
var NS = {
|
|
738
|
+
w: "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
|
|
739
|
+
w14: "http://schemas.microsoft.com/office/word/2010/wordml",
|
|
740
|
+
w15: "http://schemas.microsoft.com/office/word/2012/wordml",
|
|
741
|
+
w16cid: "http://schemas.microsoft.com/office/word/2016/wordml/cid",
|
|
742
|
+
w16cex: "http://schemas.microsoft.com/office/word/2018/wordml/cex",
|
|
743
|
+
mc: "http://schemas.openxmlformats.org/markup-compatibility/2006"
|
|
744
|
+
};
|
|
745
|
+
var CT = {
|
|
746
|
+
COMMENTS: "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml",
|
|
747
|
+
EXTENDED: "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml",
|
|
748
|
+
IDS: "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml",
|
|
749
|
+
EXTENSIBLE: "application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml"
|
|
750
|
+
};
|
|
751
|
+
var RT = {
|
|
752
|
+
COMMENTS: "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments",
|
|
753
|
+
EXTENDED: "http://schemas.microsoft.com/office/2011/relationships/commentsExtended",
|
|
754
|
+
IDS: "http://schemas.microsoft.com/office/2016/09/relationships/commentsIds",
|
|
755
|
+
EXTENSIBLE: "http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible"
|
|
756
|
+
};
|
|
757
|
+
var CommentsManager = class {
|
|
758
|
+
constructor(doc) {
|
|
759
|
+
this.doc = doc;
|
|
760
|
+
}
|
|
761
|
+
doc;
|
|
762
|
+
_commentsPart = null;
|
|
763
|
+
_extendedPart = null;
|
|
764
|
+
_idsPart = null;
|
|
765
|
+
_extensiblePart = null;
|
|
766
|
+
_nextId = null;
|
|
767
|
+
get commentsPart() {
|
|
768
|
+
if (!this._commentsPart) {
|
|
769
|
+
this._commentsPart = this._getOrCreateCommentsPart();
|
|
770
|
+
this._ensureNamespaces();
|
|
771
|
+
}
|
|
772
|
+
return this._commentsPart;
|
|
773
|
+
}
|
|
774
|
+
get extendedPart() {
|
|
775
|
+
if (!this._extendedPart) this._extendedPart = this._getOrCreateExtendedPart();
|
|
776
|
+
return this._extendedPart;
|
|
777
|
+
}
|
|
778
|
+
get idsPart() {
|
|
779
|
+
if (!this._idsPart) this._idsPart = this._getOrCreateIdsPart();
|
|
780
|
+
return this._idsPart;
|
|
781
|
+
}
|
|
782
|
+
get extensiblePart() {
|
|
783
|
+
if (!this._extensiblePart) this._extensiblePart = this._getOrCreateExtensiblePart();
|
|
784
|
+
return this._extensiblePart;
|
|
785
|
+
}
|
|
786
|
+
get nextId() {
|
|
787
|
+
if (this._nextId === null) this._nextId = this._getNextCommentId();
|
|
788
|
+
return this._nextId;
|
|
789
|
+
}
|
|
790
|
+
set nextId(value) {
|
|
791
|
+
this._nextId = value;
|
|
792
|
+
}
|
|
793
|
+
_getExistingPartByType(contentType) {
|
|
794
|
+
return this.doc.pkg.parts.find((p) => p.contentType === contentType) || null;
|
|
795
|
+
}
|
|
796
|
+
_linkPart(part, relType) {
|
|
797
|
+
for (const rel of this.doc.part.rels.values()) {
|
|
798
|
+
if (!rel.isExternal && rel.target === part.partname.split("/").pop()) {
|
|
799
|
+
return part;
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
this.doc.relateTo(part, relType);
|
|
803
|
+
return part;
|
|
804
|
+
}
|
|
805
|
+
_getOrCreateCommentsPart() {
|
|
806
|
+
let part = this._getExistingPartByType(CT.COMMENTS);
|
|
807
|
+
if (part) return this._linkPart(part, RT.COMMENTS);
|
|
808
|
+
const partname = this.doc.pkg.nextPartname("/word/comments%d.xml");
|
|
809
|
+
const xml = `<w:comments xmlns:w="${NS.w}" xmlns:w14="${NS.w14}" xmlns:w15="${NS.w15}" xmlns:w16cid="${NS.w16cid}" xmlns:w16cex="${NS.w16cex}" xmlns:mc="${NS.mc}" mc:Ignorable="w14 w15 w16cid w16cex"></w:comments>`;
|
|
810
|
+
part = this.doc.pkg.addPart(partname, CT.COMMENTS, xml);
|
|
811
|
+
this.doc.relateTo(part, RT.COMMENTS);
|
|
812
|
+
return part;
|
|
813
|
+
}
|
|
814
|
+
_getOrCreateExtendedPart() {
|
|
815
|
+
let part = this._getExistingPartByType(CT.EXTENDED);
|
|
816
|
+
if (part) return this._linkPart(part, RT.EXTENDED);
|
|
817
|
+
const partname = this.doc.pkg.nextPartname("/word/commentsExtended%d.xml");
|
|
818
|
+
const xml = `<w15:commentsEx xmlns:w15="${NS.w15}"></w15:commentsEx>`;
|
|
819
|
+
part = this.doc.pkg.addPart(partname, CT.EXTENDED, xml);
|
|
820
|
+
this.doc.relateTo(part, RT.EXTENDED);
|
|
821
|
+
return part;
|
|
822
|
+
}
|
|
823
|
+
_getOrCreateIdsPart() {
|
|
824
|
+
let part = this._getExistingPartByType(CT.IDS);
|
|
825
|
+
if (part) return this._linkPart(part, RT.IDS);
|
|
826
|
+
const partname = this.doc.pkg.nextPartname("/word/commentsIds%d.xml");
|
|
827
|
+
const xml = `<w16cid:commentsIds xmlns:w16cid="${NS.w16cid}"></w16cid:commentsIds>`;
|
|
828
|
+
part = this.doc.pkg.addPart(partname, CT.IDS, xml);
|
|
829
|
+
this.doc.relateTo(part, RT.IDS);
|
|
830
|
+
return part;
|
|
831
|
+
}
|
|
832
|
+
_getOrCreateExtensiblePart() {
|
|
833
|
+
let part = this._getExistingPartByType(CT.EXTENSIBLE);
|
|
834
|
+
if (part) return this._linkPart(part, RT.EXTENSIBLE);
|
|
835
|
+
const partname = this.doc.pkg.nextPartname("/word/commentsExtensible%d.xml");
|
|
836
|
+
const xml = `<w16cex:commentsExtensible xmlns:w16cex="${NS.w16cex}"></w16cex:commentsExtensible>`;
|
|
837
|
+
part = this.doc.pkg.addPart(partname, CT.EXTENSIBLE, xml);
|
|
838
|
+
this.doc.relateTo(part, RT.EXTENSIBLE);
|
|
839
|
+
return part;
|
|
840
|
+
}
|
|
841
|
+
_ensureNamespaces() {
|
|
842
|
+
}
|
|
843
|
+
_getNextCommentId() {
|
|
844
|
+
const ids = [0];
|
|
845
|
+
const part = this._getExistingPartByType(CT.COMMENTS);
|
|
846
|
+
if (part) {
|
|
847
|
+
const comments = findAllDescendants(part._element, "w:comment");
|
|
848
|
+
for (const c of comments) {
|
|
849
|
+
const idStr = c.getAttribute("w:id");
|
|
850
|
+
if (idStr) ids.push(parseInt(idStr, 10) || 0);
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
return Math.max(...ids) + 1;
|
|
854
|
+
}
|
|
855
|
+
_generateHexId() {
|
|
856
|
+
return Math.floor(Math.random() * 4294967295).toString(16).toUpperCase().padStart(8, "0");
|
|
857
|
+
}
|
|
858
|
+
_getInitials(author) {
|
|
859
|
+
if (!author) return "";
|
|
860
|
+
return author.split(" ").filter(Boolean).map((p) => p[0]).join("").toUpperCase();
|
|
861
|
+
}
|
|
862
|
+
_findParaIdForComment(commentId) {
|
|
863
|
+
if (!this._commentsPart) return null;
|
|
864
|
+
for (const c of findAllDescendants(this._commentsPart._element, "w:comment")) {
|
|
865
|
+
if (c.getAttribute("w:id") === commentId) {
|
|
866
|
+
for (const p of findAllDescendants(c, "w:p")) {
|
|
867
|
+
const pid = p.getAttribute("w14:paraId");
|
|
868
|
+
if (pid) return pid;
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
return null;
|
|
873
|
+
}
|
|
874
|
+
_findThreadRootParaId(commentId) {
|
|
875
|
+
const directParaId = this._findParaIdForComment(commentId);
|
|
876
|
+
const extPart = this._getExistingPartByType(CT.EXTENDED);
|
|
877
|
+
if (!directParaId || !extPart) return directParaId;
|
|
878
|
+
for (let i = 0; i < extPart._element.childNodes.length; i++) {
|
|
879
|
+
const child = extPart._element.childNodes[i];
|
|
880
|
+
if (child.nodeType !== 1) continue;
|
|
881
|
+
if (child.getAttribute("w15:paraId") === directParaId) {
|
|
882
|
+
const parent = child.getAttribute("w15:paraIdParent");
|
|
883
|
+
if (parent) return parent;
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
return directParaId;
|
|
887
|
+
}
|
|
888
|
+
addComment(author, text, parentId = null) {
|
|
889
|
+
const commentId = this.nextId.toString();
|
|
890
|
+
this.nextId++;
|
|
891
|
+
const now = (/* @__PURE__ */ new Date()).toISOString().replace(/\.\d{3}Z$/, "Z");
|
|
892
|
+
const doc = this.commentsPart._element.ownerDocument;
|
|
893
|
+
const comment = doc.createElement("w:comment");
|
|
894
|
+
comment.setAttribute("w:id", commentId);
|
|
895
|
+
comment.setAttribute("w:author", author);
|
|
896
|
+
comment.setAttribute("w:date", now);
|
|
897
|
+
const initials = this._getInitials(author);
|
|
898
|
+
if (initials) comment.setAttribute("w:initials", initials);
|
|
899
|
+
const extPart = this._getExistingPartByType(CT.EXTENDED);
|
|
900
|
+
if (parentId && !extPart) {
|
|
901
|
+
comment.setAttribute("w15:p", parentId);
|
|
902
|
+
}
|
|
903
|
+
const paraId = this._generateHexId();
|
|
904
|
+
const rsid = this._generateHexId();
|
|
905
|
+
const p = doc.createElement("w:p");
|
|
906
|
+
p.setAttribute("w14:paraId", paraId);
|
|
907
|
+
p.setAttribute("w14:textId", "77777777");
|
|
908
|
+
p.setAttribute("w:rsidR", rsid);
|
|
909
|
+
p.setAttribute("w:rsidRDefault", rsid);
|
|
910
|
+
p.setAttribute("w:rsidP", rsid);
|
|
911
|
+
const pPr = doc.createElement("w:pPr");
|
|
912
|
+
const pStyle = doc.createElement("w:pStyle");
|
|
913
|
+
pStyle.setAttribute("w:val", "CommentText");
|
|
914
|
+
pPr.appendChild(pStyle);
|
|
915
|
+
p.appendChild(pPr);
|
|
916
|
+
const rRef = doc.createElement("w:r");
|
|
917
|
+
const rPrRef = doc.createElement("w:rPr");
|
|
918
|
+
const rStyleRef = doc.createElement("w:rStyle");
|
|
919
|
+
rStyleRef.setAttribute("w:val", "CommentReference");
|
|
920
|
+
rPrRef.appendChild(rStyleRef);
|
|
921
|
+
rRef.appendChild(rPrRef);
|
|
922
|
+
rRef.appendChild(doc.createElement("w:annotationRef"));
|
|
923
|
+
p.appendChild(rRef);
|
|
924
|
+
const r = doc.createElement("w:r");
|
|
925
|
+
const t = doc.createElement("w:t");
|
|
926
|
+
t.textContent = text;
|
|
927
|
+
r.appendChild(t);
|
|
928
|
+
p.appendChild(r);
|
|
929
|
+
comment.appendChild(p);
|
|
930
|
+
this.commentsPart._element.appendChild(comment);
|
|
931
|
+
if (this.extendedPart) {
|
|
932
|
+
const parentParaId = parentId ? this._findThreadRootParaId(parentId) : null;
|
|
933
|
+
const exDoc = this.extendedPart._element.ownerDocument;
|
|
934
|
+
const commentEx = exDoc.createElement("w15:commentEx");
|
|
935
|
+
commentEx.setAttribute("w15:paraId", paraId);
|
|
936
|
+
if (parentParaId) commentEx.setAttribute("w15:paraIdParent", parentParaId);
|
|
937
|
+
commentEx.setAttribute("w15:done", "0");
|
|
938
|
+
this.extendedPart._element.appendChild(commentEx);
|
|
939
|
+
}
|
|
940
|
+
if (this.idsPart) {
|
|
941
|
+
const idsDoc = this.idsPart._element.ownerDocument;
|
|
942
|
+
const commentIdEl = idsDoc.createElement("w16cid:commentId");
|
|
943
|
+
commentIdEl.setAttribute("w16cid:paraId", paraId);
|
|
944
|
+
commentIdEl.setAttribute("w16cid:durableId", this._generateHexId());
|
|
945
|
+
this.idsPart._element.appendChild(commentIdEl);
|
|
946
|
+
}
|
|
947
|
+
if (this.extensiblePart) {
|
|
948
|
+
let durableId = null;
|
|
949
|
+
for (let i = 0; i < this.idsPart._element.childNodes.length; i++) {
|
|
950
|
+
const child = this.idsPart._element.childNodes[i];
|
|
951
|
+
if (child.nodeType === 1 && child.getAttribute("w16cid:paraId") === paraId) {
|
|
952
|
+
durableId = child.getAttribute("w16cid:durableId");
|
|
953
|
+
break;
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
if (durableId) {
|
|
957
|
+
const cexDoc = this.extensiblePart._element.ownerDocument;
|
|
958
|
+
const extEl = cexDoc.createElement("w16cex:commentExtensible");
|
|
959
|
+
extEl.setAttribute("w16cex:durableId", durableId);
|
|
960
|
+
extEl.setAttribute("w16cex:dateUtc", now);
|
|
961
|
+
this.extensiblePart._element.appendChild(extEl);
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
return commentId;
|
|
965
|
+
}
|
|
966
|
+
deleteComment(commentId) {
|
|
967
|
+
if (!this._commentsPart) return;
|
|
968
|
+
let commentEl = null;
|
|
969
|
+
for (const c of findAllDescendants(this._commentsPart._element, "w:comment")) {
|
|
970
|
+
if (c.getAttribute("w:id") === commentId) {
|
|
971
|
+
commentEl = c;
|
|
972
|
+
break;
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
if (!commentEl) return;
|
|
976
|
+
let paraId = null;
|
|
977
|
+
for (const p of findAllDescendants(commentEl, "w:p")) {
|
|
978
|
+
const pid = p.getAttribute("w14:paraId");
|
|
979
|
+
if (pid) {
|
|
980
|
+
paraId = pid;
|
|
981
|
+
break;
|
|
982
|
+
}
|
|
983
|
+
}
|
|
984
|
+
if (paraId) {
|
|
985
|
+
const repliesToDelete = [];
|
|
986
|
+
if (this.extendedPart) {
|
|
987
|
+
for (let i = 0; i < this.extendedPart._element.childNodes.length; i++) {
|
|
988
|
+
const child = this.extendedPart._element.childNodes[i];
|
|
989
|
+
if (child.nodeType !== 1) continue;
|
|
990
|
+
if (child.getAttribute("w15:paraIdParent") === paraId) {
|
|
991
|
+
const childParaId = child.getAttribute("w15:paraId");
|
|
992
|
+
if (childParaId) {
|
|
993
|
+
for (const c of findAllDescendants(this._commentsPart._element, "w:comment")) {
|
|
994
|
+
for (const p of findAllDescendants(c, "w:p")) {
|
|
995
|
+
if (p.getAttribute("w14:paraId") === childParaId) {
|
|
996
|
+
const cid = c.getAttribute("w:id");
|
|
997
|
+
if (cid) repliesToDelete.push(cid);
|
|
998
|
+
break;
|
|
999
|
+
}
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
for (const repId of repliesToDelete) {
|
|
1007
|
+
this.deleteComment(repId);
|
|
1008
|
+
}
|
|
1009
|
+
let durableId = null;
|
|
1010
|
+
if (this.idsPart) {
|
|
1011
|
+
const toRemove = [];
|
|
1012
|
+
for (let i = 0; i < this.idsPart._element.childNodes.length; i++) {
|
|
1013
|
+
const child = this.idsPart._element.childNodes[i];
|
|
1014
|
+
if (child.nodeType === 1 && child.getAttribute("w16cid:paraId") === paraId) {
|
|
1015
|
+
durableId = child.getAttribute("w16cid:durableId");
|
|
1016
|
+
toRemove.push(child);
|
|
1017
|
+
}
|
|
1018
|
+
}
|
|
1019
|
+
toRemove.forEach((c) => this.idsPart._element.removeChild(c));
|
|
1020
|
+
}
|
|
1021
|
+
if (this.extendedPart) {
|
|
1022
|
+
const toRemove = [];
|
|
1023
|
+
for (let i = 0; i < this.extendedPart._element.childNodes.length; i++) {
|
|
1024
|
+
const child = this.extendedPart._element.childNodes[i];
|
|
1025
|
+
if (child.nodeType === 1 && child.getAttribute("w15:paraId") === paraId) {
|
|
1026
|
+
toRemove.push(child);
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
toRemove.forEach((c) => this.extendedPart._element.removeChild(c));
|
|
1030
|
+
}
|
|
1031
|
+
if (durableId && this.extensiblePart) {
|
|
1032
|
+
const toRemove = [];
|
|
1033
|
+
for (let i = 0; i < this.extensiblePart._element.childNodes.length; i++) {
|
|
1034
|
+
const child = this.extensiblePart._element.childNodes[i];
|
|
1035
|
+
if (child.nodeType === 1 && child.getAttribute("w16cex:durableId") === durableId) {
|
|
1036
|
+
toRemove.push(child);
|
|
1037
|
+
}
|
|
1038
|
+
}
|
|
1039
|
+
toRemove.forEach((c) => this.extensiblePart._element.removeChild(c));
|
|
1040
|
+
}
|
|
1041
|
+
}
|
|
1042
|
+
if (commentEl.parentNode) {
|
|
1043
|
+
commentEl.parentNode.removeChild(commentEl);
|
|
1044
|
+
}
|
|
1045
|
+
}
|
|
1046
|
+
};
|
|
1047
|
+
function extract_comments_data(pkg) {
|
|
1048
|
+
const docObj = {
|
|
1049
|
+
pkg,
|
|
1050
|
+
part: pkg.mainDocumentPart,
|
|
1051
|
+
relateTo: () => {
|
|
1052
|
+
}
|
|
1053
|
+
// Mock since extraction is read-only
|
|
1054
|
+
};
|
|
1055
|
+
const mgr = new CommentsManager(docObj);
|
|
1056
|
+
const data = {};
|
|
1057
|
+
const part = pkg.parts.find((p) => p.contentType === CT.COMMENTS);
|
|
1058
|
+
if (!part) return data;
|
|
1059
|
+
const para_id_to_cid = {};
|
|
1060
|
+
const comments = findAllDescendants(part._element, "w:comment");
|
|
1061
|
+
for (const c of comments) {
|
|
1062
|
+
const c_id = c.getAttribute("w:id");
|
|
1063
|
+
if (!c_id) continue;
|
|
1064
|
+
const c_author = c.getAttribute("w:author") || "Unknown";
|
|
1065
|
+
const c_date = c.getAttribute("w:date") || "";
|
|
1066
|
+
let is_resolved = false;
|
|
1067
|
+
const val = c.getAttribute("w15:done");
|
|
1068
|
+
if (val === "1" || val === "true" || val === "on") is_resolved = true;
|
|
1069
|
+
let parent_id = c.getAttribute("w15:p") || null;
|
|
1070
|
+
const p_elems = findAllDescendants(c, "w:p");
|
|
1071
|
+
for (const p of p_elems) {
|
|
1072
|
+
const pid = p.getAttribute("w14:paraId");
|
|
1073
|
+
if (pid) para_id_to_cid[pid] = c_id;
|
|
1074
|
+
}
|
|
1075
|
+
const text_parts = [];
|
|
1076
|
+
for (const p of p_elems) {
|
|
1077
|
+
const t_elems = findAllDescendants(p, "w:t");
|
|
1078
|
+
for (const t of t_elems) {
|
|
1079
|
+
if (t.textContent) text_parts.push(t.textContent);
|
|
1080
|
+
}
|
|
1081
|
+
text_parts.push("\n");
|
|
1082
|
+
}
|
|
1083
|
+
const full_text = text_parts.join("").trim();
|
|
1084
|
+
data[c_id] = {
|
|
1085
|
+
author: c_author,
|
|
1086
|
+
text: full_text,
|
|
1087
|
+
date: c_date,
|
|
1088
|
+
resolved: is_resolved,
|
|
1089
|
+
parent_id
|
|
1090
|
+
};
|
|
1091
|
+
}
|
|
1092
|
+
const extPart = pkg.parts.find((p) => p.contentType === CT.EXTENDED);
|
|
1093
|
+
if (extPart) {
|
|
1094
|
+
const children = extPart._element.childNodes;
|
|
1095
|
+
for (let i = 0; i < children.length; i++) {
|
|
1096
|
+
const child = children[i];
|
|
1097
|
+
if (child.nodeType !== 1) continue;
|
|
1098
|
+
const para_id = child.getAttribute("w15:paraId");
|
|
1099
|
+
const parent_para_id = child.getAttribute("w15:paraIdParent");
|
|
1100
|
+
const done_val = child.getAttribute("w15:done");
|
|
1101
|
+
if (para_id) {
|
|
1102
|
+
const c_id = para_id_to_cid[para_id];
|
|
1103
|
+
if (c_id && data[c_id]) {
|
|
1104
|
+
if (parent_para_id) {
|
|
1105
|
+
const p_id = para_id_to_cid[parent_para_id];
|
|
1106
|
+
if (p_id) data[c_id].parent_id = p_id;
|
|
1107
|
+
}
|
|
1108
|
+
if (done_val === "1" || done_val === "true" || done_val === "on") {
|
|
1109
|
+
data[c_id].resolved = true;
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
return data;
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
// src/ingest.ts
|
|
1119
|
+
async function extractTextFromBuffer(buffer, cleanView = false) {
|
|
1120
|
+
const doc = await DocumentObject.load(buffer);
|
|
1121
|
+
return _extractTextFromDoc(doc, cleanView);
|
|
1122
|
+
}
|
|
1123
|
+
function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
|
|
1124
|
+
const comments_map = extract_comments_data(doc.pkg);
|
|
1125
|
+
const full_text = [];
|
|
1126
|
+
let cursor = 0;
|
|
1127
|
+
for (const part of iter_document_parts(doc)) {
|
|
1128
|
+
const part_cursor = full_text.length > 0 ? cursor + 2 : cursor;
|
|
1129
|
+
const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor);
|
|
1130
|
+
if (part_text) {
|
|
1131
|
+
if (full_text.length > 0) cursor += 2;
|
|
1132
|
+
full_text.push(part_text);
|
|
1133
|
+
cursor += part_text.length;
|
|
1134
|
+
}
|
|
1135
|
+
}
|
|
1136
|
+
let base_text = full_text.join("\n\n");
|
|
1137
|
+
if (includeAppendix) {
|
|
1138
|
+
const appendix = build_structural_appendix(doc, base_text);
|
|
1139
|
+
if (appendix) base_text += appendix;
|
|
1140
|
+
}
|
|
1141
|
+
return base_text;
|
|
1142
|
+
}
|
|
1143
|
+
function _extract_blocks(container, comments_map, cleanView, cursor) {
|
|
1144
|
+
const part = container.part || container;
|
|
1145
|
+
const [style_cache, default_pstyle] = _get_style_cache(part);
|
|
1146
|
+
const blocks = [];
|
|
1147
|
+
let local_cursor = cursor;
|
|
1148
|
+
let is_first_block = true;
|
|
1149
|
+
let is_first_para = true;
|
|
1150
|
+
for (const item of iter_block_items(container)) {
|
|
1151
|
+
if (!is_first_block) local_cursor += 2;
|
|
1152
|
+
const block_start = local_cursor;
|
|
1153
|
+
if (item.constructor.name === "FootnoteItem") {
|
|
1154
|
+
const fn_text = _extract_blocks(item, comments_map, cleanView, block_start);
|
|
1155
|
+
if (fn_text) {
|
|
1156
|
+
blocks.push(fn_text);
|
|
1157
|
+
local_cursor = block_start + fn_text.length;
|
|
1158
|
+
is_first_block = false;
|
|
1159
|
+
} else if (!is_first_block) {
|
|
1160
|
+
local_cursor -= 2;
|
|
1161
|
+
}
|
|
1162
|
+
} else if (item instanceof Paragraph) {
|
|
1163
|
+
let prefix = get_paragraph_prefix(item, style_cache, default_pstyle);
|
|
1164
|
+
if (is_first_para && container.constructor.name === "FootnoteItem") {
|
|
1165
|
+
prefix = `[^${container.note_type}-${container.id}]: ` + prefix;
|
|
1166
|
+
}
|
|
1167
|
+
const p_text = build_paragraph_text(item, comments_map, cleanView, style_cache, default_pstyle);
|
|
1168
|
+
const full_block = prefix + p_text;
|
|
1169
|
+
blocks.push(full_block);
|
|
1170
|
+
local_cursor = block_start + full_block.length;
|
|
1171
|
+
is_first_para = false;
|
|
1172
|
+
is_first_block = false;
|
|
1173
|
+
} else if (item instanceof Table) {
|
|
1174
|
+
const table_text = extract_table(item, comments_map, cleanView, block_start);
|
|
1175
|
+
if (table_text) {
|
|
1176
|
+
blocks.push(table_text);
|
|
1177
|
+
local_cursor = block_start + table_text.length;
|
|
1178
|
+
is_first_block = false;
|
|
1179
|
+
} else if (!is_first_block) {
|
|
1180
|
+
local_cursor -= 2;
|
|
1181
|
+
}
|
|
1182
|
+
is_first_para = false;
|
|
1183
|
+
}
|
|
1184
|
+
}
|
|
1185
|
+
return blocks.join("\n\n");
|
|
1186
|
+
}
|
|
1187
|
+
function extract_table(table, comments_map, cleanView, cursor) {
|
|
1188
|
+
const rows_text = [];
|
|
1189
|
+
let rows_processed = 0;
|
|
1190
|
+
let local_cursor = cursor;
|
|
1191
|
+
for (const row of table.rows) {
|
|
1192
|
+
const cell_texts = [];
|
|
1193
|
+
const seen_cells = /* @__PURE__ */ new Set();
|
|
1194
|
+
const trPr = findChild(row._element, "w:trPr");
|
|
1195
|
+
const ins = trPr ? findChild(trPr, "w:ins") : null;
|
|
1196
|
+
const del_node = trPr ? findChild(trPr, "w:del") : null;
|
|
1197
|
+
if (cleanView && del_node) continue;
|
|
1198
|
+
const row_start = local_cursor + (rows_processed > 0 ? 1 : 0);
|
|
1199
|
+
const wrapper_prefix_len = !cleanView && ins ? 4 : !cleanView && del_node ? 4 : 0;
|
|
1200
|
+
let cell_cursor = row_start + wrapper_prefix_len;
|
|
1201
|
+
let first_cell = true;
|
|
1202
|
+
for (const cell of row.cells) {
|
|
1203
|
+
if (seen_cells.has(cell)) continue;
|
|
1204
|
+
seen_cells.add(cell);
|
|
1205
|
+
if (!first_cell) cell_cursor += 3;
|
|
1206
|
+
const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor);
|
|
1207
|
+
cell_texts.push(cell_content);
|
|
1208
|
+
cell_cursor += cell_content.length;
|
|
1209
|
+
first_cell = false;
|
|
1210
|
+
}
|
|
1211
|
+
let row_str = cell_texts.join(" | ");
|
|
1212
|
+
if (!cleanView) {
|
|
1213
|
+
if (ins) row_str = `{++ ${row_str} |Chg:${ins.getAttribute("w:id")}++}`;
|
|
1214
|
+
else if (del_node) row_str = `{-- ${row_str} |Chg:${del_node.getAttribute("w:id")}--}`;
|
|
1215
|
+
}
|
|
1216
|
+
rows_text.push(row_str);
|
|
1217
|
+
local_cursor = row_start + row_str.length;
|
|
1218
|
+
rows_processed++;
|
|
1219
|
+
}
|
|
1220
|
+
return rows_text.join("\n");
|
|
1221
|
+
}
|
|
1222
|
+
function build_paragraph_text(paragraph, comments_map, cleanView, style_cache, default_pstyle) {
|
|
1223
|
+
const parts = [];
|
|
1224
|
+
const active_ins = {};
|
|
1225
|
+
const active_del = {};
|
|
1226
|
+
const active_comments = /* @__PURE__ */ new Set();
|
|
1227
|
+
const active_fmt = {};
|
|
1228
|
+
const deferred_meta_states = [];
|
|
1229
|
+
let pending_text = "";
|
|
1230
|
+
let current_wrappers = ["", ""];
|
|
1231
|
+
let current_style = ["", ""];
|
|
1232
|
+
const items = Array.from(iter_paragraph_content(paragraph));
|
|
1233
|
+
const is_heading = is_heading_paragraph(paragraph, style_cache, default_pstyle);
|
|
1234
|
+
const native_heading = is_native_heading(paragraph, style_cache, default_pstyle);
|
|
1235
|
+
let leading_strip_active = is_heading;
|
|
1236
|
+
for (let i = 0; i < items.length; i++) {
|
|
1237
|
+
const item = items[i];
|
|
1238
|
+
if (item instanceof Run) {
|
|
1239
|
+
const [prefix, suffix] = get_run_style_markers(item, native_heading);
|
|
1240
|
+
const text = get_run_text(item);
|
|
1241
|
+
if (cleanView && Object.keys(active_del).length > 0) continue;
|
|
1242
|
+
if (leading_strip_active) {
|
|
1243
|
+
if (!text || !text.trim()) continue;
|
|
1244
|
+
leading_strip_active = false;
|
|
1245
|
+
}
|
|
1246
|
+
const seg = apply_formatting_to_segments(text, prefix, suffix);
|
|
1247
|
+
if (seg) {
|
|
1248
|
+
const new_wrappers = cleanView ? ["", ""] : _get_wrappers(active_ins, active_del, active_comments, active_fmt);
|
|
1249
|
+
const new_style = [prefix, suffix];
|
|
1250
|
+
if (pending_text && new_wrappers[0] === current_wrappers[0] && new_wrappers[1] === current_wrappers[1]) {
|
|
1251
|
+
if (new_style[0] === current_style[0] && new_style[1] === current_style[1] && current_style[0] !== "" && pending_text.endsWith(current_style[1]) && seg.startsWith(new_style[0])) {
|
|
1252
|
+
pending_text = pending_text.slice(0, -current_style[1].length) + seg.slice(new_style[0].length);
|
|
1253
|
+
} else {
|
|
1254
|
+
pending_text += seg;
|
|
1255
|
+
}
|
|
1256
|
+
current_style = new_style;
|
|
1257
|
+
} else {
|
|
1258
|
+
if (pending_text) parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
|
|
1259
|
+
pending_text = seg;
|
|
1260
|
+
current_wrappers = new_wrappers;
|
|
1261
|
+
current_style = new_style;
|
|
1262
|
+
}
|
|
1263
|
+
if (!cleanView) {
|
|
1264
|
+
const has_meta = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || active_comments.size > 0 || Object.keys(active_fmt).length > 0;
|
|
1265
|
+
if (has_meta) {
|
|
1266
|
+
deferred_meta_states.push([{ ...active_ins }, { ...active_del }, new Set(active_comments), { ...active_fmt }]);
|
|
1267
|
+
}
|
|
1268
|
+
let should_defer = false;
|
|
1269
|
+
const is_redline = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || Object.keys(active_fmt).length > 0;
|
|
1270
|
+
if (is_redline) {
|
|
1271
|
+
let j = i + 1;
|
|
1272
|
+
let next_is_redline = false;
|
|
1273
|
+
let temp_ins = Object.keys(active_ins).length;
|
|
1274
|
+
let temp_del = Object.keys(active_del).length;
|
|
1275
|
+
let temp_fmt = Object.keys(active_fmt).length;
|
|
1276
|
+
while (j < items.length) {
|
|
1277
|
+
const next_item = items[j];
|
|
1278
|
+
if (next_item instanceof Run) {
|
|
1279
|
+
if (!get_run_text(next_item)) {
|
|
1280
|
+
j++;
|
|
1281
|
+
continue;
|
|
1282
|
+
}
|
|
1283
|
+
if (temp_ins > 0 || temp_del > 0 || temp_fmt > 0) next_is_redline = true;
|
|
1284
|
+
break;
|
|
1285
|
+
} else {
|
|
1286
|
+
const ev = next_item;
|
|
1287
|
+
if (ev.type === "ins_start") temp_ins++;
|
|
1288
|
+
else if (ev.type === "ins_end") temp_ins = Math.max(0, temp_ins - 1);
|
|
1289
|
+
else if (ev.type === "del_start") temp_del++;
|
|
1290
|
+
else if (ev.type === "del_end") temp_del = Math.max(0, temp_del - 1);
|
|
1291
|
+
else if (ev.type === "fmt_start") temp_fmt++;
|
|
1292
|
+
else if (ev.type === "fmt_end") temp_fmt = Math.max(0, temp_fmt - 1);
|
|
1293
|
+
}
|
|
1294
|
+
j++;
|
|
1295
|
+
}
|
|
1296
|
+
if (next_is_redline) should_defer = true;
|
|
1297
|
+
}
|
|
1298
|
+
if (!should_defer && deferred_meta_states.length > 0) {
|
|
1299
|
+
const meta_block = _build_merged_meta_block(deferred_meta_states, comments_map);
|
|
1300
|
+
if (meta_block) {
|
|
1301
|
+
if (pending_text) {
|
|
1302
|
+
parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
|
|
1303
|
+
pending_text = "";
|
|
1304
|
+
current_wrappers = ["", ""];
|
|
1305
|
+
current_style = ["", ""];
|
|
1306
|
+
}
|
|
1307
|
+
parts.push(`{>>${meta_block}<<}`);
|
|
1308
|
+
}
|
|
1309
|
+
deferred_meta_states.length = 0;
|
|
1310
|
+
}
|
|
1311
|
+
}
|
|
1312
|
+
}
|
|
1313
|
+
} else {
|
|
1314
|
+
const ev = item;
|
|
1315
|
+
leading_strip_active = false;
|
|
1316
|
+
if (!["ins_start", "ins_end", "del_start", "del_end", "fmt_start", "fmt_end"].includes(ev.type)) {
|
|
1317
|
+
if (pending_text) {
|
|
1318
|
+
parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
|
|
1319
|
+
pending_text = "";
|
|
1320
|
+
current_wrappers = ["", ""];
|
|
1321
|
+
current_style = ["", ""];
|
|
1322
|
+
}
|
|
1323
|
+
}
|
|
1324
|
+
if (ev.type === "start") active_comments.add(ev.id);
|
|
1325
|
+
else if (ev.type === "end") active_comments.delete(ev.id);
|
|
1326
|
+
else if (ev.type === "ins_start") active_ins[ev.id] = ev;
|
|
1327
|
+
else if (ev.type === "ins_end") delete active_ins[ev.id];
|
|
1328
|
+
else if (ev.type === "del_start") active_del[ev.id] = ev;
|
|
1329
|
+
else if (ev.type === "del_end") delete active_del[ev.id];
|
|
1330
|
+
else if (ev.type === "fmt_start") active_fmt[ev.id] = ev;
|
|
1331
|
+
else if (ev.type === "fmt_end") delete active_fmt[ev.id];
|
|
1332
|
+
else if (ev.type === "footnote" || ev.type === "endnote") {
|
|
1333
|
+
if (pending_text) {
|
|
1334
|
+
parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
|
|
1335
|
+
pending_text = "";
|
|
1336
|
+
current_wrappers = ["", ""];
|
|
1337
|
+
current_style = ["", ""];
|
|
1338
|
+
}
|
|
1339
|
+
parts.push(`[^${ev.type === "footnote" ? "fn" : "en"}-${ev.id}]`);
|
|
1340
|
+
} else if (ev.type === "hyperlink_start") {
|
|
1341
|
+
if (pending_text) {
|
|
1342
|
+
parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
|
|
1343
|
+
pending_text = "";
|
|
1344
|
+
current_wrappers = ["", ""];
|
|
1345
|
+
current_style = ["", ""];
|
|
1346
|
+
}
|
|
1347
|
+
parts.push("[");
|
|
1348
|
+
} else if (ev.type === "hyperlink_end") {
|
|
1349
|
+
if (pending_text) {
|
|
1350
|
+
parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
|
|
1351
|
+
pending_text = "";
|
|
1352
|
+
current_wrappers = ["", ""];
|
|
1353
|
+
current_style = ["", ""];
|
|
1354
|
+
}
|
|
1355
|
+
parts.push(`](${ev.date})`);
|
|
1356
|
+
} else if (ev.type === "xref_start") {
|
|
1357
|
+
if (pending_text) {
|
|
1358
|
+
parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
|
|
1359
|
+
pending_text = "";
|
|
1360
|
+
current_wrappers = ["", ""];
|
|
1361
|
+
current_style = ["", ""];
|
|
1362
|
+
}
|
|
1363
|
+
parts.push("[~");
|
|
1364
|
+
} else if (ev.type === "xref_end") {
|
|
1365
|
+
if (pending_text) {
|
|
1366
|
+
parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
|
|
1367
|
+
pending_text = "";
|
|
1368
|
+
current_wrappers = ["", ""];
|
|
1369
|
+
current_style = ["", ""];
|
|
1370
|
+
}
|
|
1371
|
+
parts.push(`~](#${ev.id})`);
|
|
1372
|
+
} else if (ev.type === "bookmark") {
|
|
1373
|
+
if (pending_text) {
|
|
1374
|
+
parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
|
|
1375
|
+
pending_text = "";
|
|
1376
|
+
current_wrappers = ["", ""];
|
|
1377
|
+
current_style = ["", ""];
|
|
1378
|
+
}
|
|
1379
|
+
parts.push(`{#${ev.id}}`);
|
|
1380
|
+
}
|
|
1381
|
+
}
|
|
1382
|
+
}
|
|
1383
|
+
if (pending_text) parts.push(`${current_wrappers[0]}${pending_text}${current_wrappers[1]}`);
|
|
1384
|
+
if (deferred_meta_states.length > 0) {
|
|
1385
|
+
const meta_block = _build_merged_meta_block(deferred_meta_states, comments_map);
|
|
1386
|
+
if (meta_block) parts.push(`{>>${meta_block}<<}`);
|
|
1387
|
+
}
|
|
1388
|
+
return parts.join("");
|
|
1389
|
+
}
|
|
1390
|
+
function _get_wrappers(ins, del, comments, fmt) {
|
|
1391
|
+
if (Object.keys(del).length > 0) return ["{--", "--}"];
|
|
1392
|
+
if (Object.keys(ins).length > 0) return ["{++", "++}"];
|
|
1393
|
+
if (comments.size > 0 || Object.keys(fmt).length > 0) return ["{==", "==}"];
|
|
1394
|
+
return ["", ""];
|
|
1395
|
+
}
|
|
1396
|
+
function _build_merged_meta_block(states_list, comments_map) {
|
|
1397
|
+
const change_lines = [];
|
|
1398
|
+
const comment_lines = [];
|
|
1399
|
+
const seen_sigs = /* @__PURE__ */ new Set();
|
|
1400
|
+
for (const [ins_map, del_map, comments_set, fmt_map] of states_list) {
|
|
1401
|
+
let render_comment2 = function(cid) {
|
|
1402
|
+
if (!comments_map[cid]) return;
|
|
1403
|
+
const sig = `Com:${cid}`;
|
|
1404
|
+
if (seen_sigs.has(sig)) return;
|
|
1405
|
+
const data = comments_map[cid];
|
|
1406
|
+
let header = `[${sig}] ${data.author}`;
|
|
1407
|
+
if (data.date) header += ` @ ${data.date}`;
|
|
1408
|
+
if (data.resolved) header += `(RESOLVED)`;
|
|
1409
|
+
comment_lines.push(`${header}: ${data.text}`);
|
|
1410
|
+
seen_sigs.add(sig);
|
|
1411
|
+
if (children_map[cid]) {
|
|
1412
|
+
const children = children_map[cid].sort((a, b) => (comments_map[a]?.date || "").localeCompare(comments_map[b]?.date || ""));
|
|
1413
|
+
for (const child_id of children) {
|
|
1414
|
+
render_comment2(child_id);
|
|
1415
|
+
}
|
|
1416
|
+
}
|
|
1417
|
+
};
|
|
1418
|
+
var render_comment = render_comment2;
|
|
1419
|
+
for (const [uid, meta] of Object.entries(ins_map)) {
|
|
1420
|
+
const sig = `Chg:${uid}`;
|
|
1421
|
+
if (!seen_sigs.has(sig)) {
|
|
1422
|
+
change_lines.push(`[${sig} insert] ${meta.author || "Unknown"}`);
|
|
1423
|
+
seen_sigs.add(sig);
|
|
1424
|
+
}
|
|
1425
|
+
}
|
|
1426
|
+
for (const [uid, meta] of Object.entries(del_map)) {
|
|
1427
|
+
const sig = `Chg:${uid}`;
|
|
1428
|
+
if (!seen_sigs.has(sig)) {
|
|
1429
|
+
change_lines.push(`[${sig} delete] ${meta.author || "Unknown"}`);
|
|
1430
|
+
seen_sigs.add(sig);
|
|
1431
|
+
}
|
|
1432
|
+
}
|
|
1433
|
+
for (const [uid, meta] of Object.entries(fmt_map)) {
|
|
1434
|
+
const sig = `Chg:${uid}`;
|
|
1435
|
+
if (!seen_sigs.has(sig)) {
|
|
1436
|
+
change_lines.push(`[${sig} format] ${meta.author || "Unknown"}`);
|
|
1437
|
+
seen_sigs.add(sig);
|
|
1438
|
+
}
|
|
1439
|
+
}
|
|
1440
|
+
const children_map = {};
|
|
1441
|
+
for (const [c_id, data] of Object.entries(comments_map)) {
|
|
1442
|
+
const p_id = data.parent_id;
|
|
1443
|
+
if (p_id) {
|
|
1444
|
+
if (!children_map[p_id]) children_map[p_id] = [];
|
|
1445
|
+
children_map[p_id].push(c_id);
|
|
1446
|
+
}
|
|
1447
|
+
}
|
|
1448
|
+
const sorted_ids = Array.from(comments_set).sort();
|
|
1449
|
+
for (const c_id of sorted_ids) {
|
|
1450
|
+
render_comment2(c_id);
|
|
1451
|
+
}
|
|
1452
|
+
}
|
|
1453
|
+
return [...change_lines, ...comment_lines].join("\n");
|
|
1454
|
+
}
|
|
1455
|
+
|
|
1456
|
+
// src/mapper.ts
|
|
1457
|
+
var DocumentMapper = class {
|
|
1458
|
+
doc;
|
|
1459
|
+
clean_view;
|
|
1460
|
+
comments_map;
|
|
1461
|
+
full_text = "";
|
|
1462
|
+
spans = [];
|
|
1463
|
+
appendix_start_index = -1;
|
|
1464
|
+
_text_chunks = [];
|
|
1465
|
+
constructor(doc, clean_view = false) {
|
|
1466
|
+
this.doc = doc;
|
|
1467
|
+
this.clean_view = clean_view;
|
|
1468
|
+
this.comments_map = extract_comments_data(doc.pkg);
|
|
1469
|
+
this._build_map();
|
|
1470
|
+
}
|
|
1471
|
+
_build_map() {
|
|
1472
|
+
let current_offset = 0;
|
|
1473
|
+
this.spans = [];
|
|
1474
|
+
this._text_chunks = [];
|
|
1475
|
+
this.full_text = "";
|
|
1476
|
+
for (const part of iter_document_parts(this.doc)) {
|
|
1477
|
+
current_offset = this._map_blocks(part, current_offset);
|
|
1478
|
+
if (this.spans.length > 0 && this.spans[this.spans.length - 1].text !== "\n\n") {
|
|
1479
|
+
this._add_virtual_text("\n\n", current_offset, null);
|
|
1480
|
+
current_offset += 2;
|
|
1481
|
+
}
|
|
1482
|
+
}
|
|
1483
|
+
while (this.spans.length > 0 && this.spans[this.spans.length - 1].text === "\n\n") {
|
|
1484
|
+
this.spans.pop();
|
|
1485
|
+
this._text_chunks.pop();
|
|
1486
|
+
}
|
|
1487
|
+
this.full_text = this._text_chunks.join("");
|
|
1488
|
+
this.appendix_start_index = -1;
|
|
1489
|
+
}
|
|
1490
|
+
_map_blocks(container, offset) {
|
|
1491
|
+
let current = offset;
|
|
1492
|
+
const c_type = container.constructor.name;
|
|
1493
|
+
const part = container.part || container;
|
|
1494
|
+
const [style_cache, default_pstyle] = _get_style_cache(part);
|
|
1495
|
+
if (c_type === "NotesPart") {
|
|
1496
|
+
const header = container.note_type === "fn" ? "## Footnotes" : "## Endnotes";
|
|
1497
|
+
const sep = `---
|
|
1498
|
+
${header}`;
|
|
1499
|
+
this._add_virtual_text(sep, current, null);
|
|
1500
|
+
current += sep.length;
|
|
1501
|
+
this._add_virtual_text("\n\n", current, null);
|
|
1502
|
+
current += 2;
|
|
1503
|
+
}
|
|
1504
|
+
let is_first_para = true;
|
|
1505
|
+
let previous_item = null;
|
|
1506
|
+
for (const item of iter_block_items(container)) {
|
|
1507
|
+
const i_type = item.constructor.name;
|
|
1508
|
+
if (i_type === "FootnoteItem") {
|
|
1509
|
+
current = this._map_blocks(item, current);
|
|
1510
|
+
} else if (item instanceof Paragraph) {
|
|
1511
|
+
if (!is_first_para) {
|
|
1512
|
+
const prev_para = previous_item instanceof Paragraph ? previous_item : null;
|
|
1513
|
+
this._add_virtual_text("\n\n", current, prev_para);
|
|
1514
|
+
current += 2;
|
|
1515
|
+
}
|
|
1516
|
+
let prefix = get_paragraph_prefix(item, style_cache, default_pstyle);
|
|
1517
|
+
if (is_first_para && c_type === "FootnoteItem") {
|
|
1518
|
+
prefix = `[^${container.note_type}-${container.id}]: ` + prefix;
|
|
1519
|
+
}
|
|
1520
|
+
if (prefix) {
|
|
1521
|
+
this._add_virtual_text(prefix, current, item);
|
|
1522
|
+
current += prefix.length;
|
|
1523
|
+
}
|
|
1524
|
+
current = this._map_paragraph_content(item, current, style_cache, default_pstyle);
|
|
1525
|
+
is_first_para = false;
|
|
1526
|
+
previous_item = item;
|
|
1527
|
+
} else if (item instanceof Table) {
|
|
1528
|
+
if (!is_first_para) {
|
|
1529
|
+
const prev_para = previous_item instanceof Paragraph ? previous_item : null;
|
|
1530
|
+
this._add_virtual_text("\n\n", current, prev_para);
|
|
1531
|
+
current += 2;
|
|
1532
|
+
}
|
|
1533
|
+
current = this._map_table(item, current);
|
|
1534
|
+
is_first_para = false;
|
|
1535
|
+
previous_item = item;
|
|
1536
|
+
}
|
|
1537
|
+
}
|
|
1538
|
+
return current;
|
|
1539
|
+
}
|
|
1540
|
+
_map_table(table, offset) {
|
|
1541
|
+
let current = offset;
|
|
1542
|
+
let rows_processed = 0;
|
|
1543
|
+
for (const row of table.rows) {
|
|
1544
|
+
const tr = row._element;
|
|
1545
|
+
const trPr = findChild(tr, "w:trPr");
|
|
1546
|
+
const ins = trPr ? findChild(trPr, "w:ins") : null;
|
|
1547
|
+
const del_node = trPr ? findChild(trPr, "w:del") : null;
|
|
1548
|
+
if (this.clean_view && del_node) continue;
|
|
1549
|
+
if (rows_processed > 0) {
|
|
1550
|
+
this._add_virtual_text("\n", current, null);
|
|
1551
|
+
current += 1;
|
|
1552
|
+
}
|
|
1553
|
+
if (ins && !this.clean_view) {
|
|
1554
|
+
this._add_virtual_text("{++ ", current, null);
|
|
1555
|
+
current += 4;
|
|
1556
|
+
} else if (del_node && !this.clean_view) {
|
|
1557
|
+
this._add_virtual_text("{-- ", current, null);
|
|
1558
|
+
current += 4;
|
|
1559
|
+
}
|
|
1560
|
+
const seen_cells = /* @__PURE__ */ new Set();
|
|
1561
|
+
let cells_processed = 0;
|
|
1562
|
+
for (const cell of row.cells) {
|
|
1563
|
+
if (seen_cells.has(cell)) continue;
|
|
1564
|
+
seen_cells.add(cell);
|
|
1565
|
+
if (cells_processed > 0) {
|
|
1566
|
+
this._add_virtual_text(" | ", current, null);
|
|
1567
|
+
current += 3;
|
|
1568
|
+
}
|
|
1569
|
+
current = this._map_blocks(cell, current);
|
|
1570
|
+
cells_processed += 1;
|
|
1571
|
+
}
|
|
1572
|
+
if (ins && !this.clean_view) {
|
|
1573
|
+
const suffix = ` |Chg:${ins.getAttribute("w:id")}++}`;
|
|
1574
|
+
this._add_virtual_text(suffix, current, null);
|
|
1575
|
+
current += suffix.length;
|
|
1576
|
+
} else if (del_node && !this.clean_view) {
|
|
1577
|
+
const suffix = ` |Chg:${del_node.getAttribute("w:id")}--}`;
|
|
1578
|
+
this._add_virtual_text(suffix, current, null);
|
|
1579
|
+
current += suffix.length;
|
|
1580
|
+
}
|
|
1581
|
+
rows_processed += 1;
|
|
1582
|
+
}
|
|
1583
|
+
return current;
|
|
1584
|
+
}
|
|
1585
|
+
_strip_markdown_formatting(text) {
|
|
1586
|
+
let result = text;
|
|
1587
|
+
result = result.replace(/^#+\s*/gm, "");
|
|
1588
|
+
result = result.replace(/\*\*(\w[\w\s]*\w|\w{2,})\*\*/g, "$1");
|
|
1589
|
+
result = result.replace(/__(\w[\w\s]*\w|\w{2,})__/g, "$1");
|
|
1590
|
+
result = result.replace(/(?<!\w)_(\w[\w\s]*\w|\w{2,})_(?!\w)/g, "$1");
|
|
1591
|
+
result = result.replace(/(?<!\w)\*(\w[\w\s]*\w|\w{2,})\*(?!\w)/g, "$1");
|
|
1592
|
+
return result;
|
|
1593
|
+
}
|
|
1594
|
+
_map_paragraph_content(paragraph, start_offset, style_cache, default_pstyle) {
|
|
1595
|
+
let current = start_offset;
|
|
1596
|
+
const span = { start: current, end: current, text: "", run: null, paragraph };
|
|
1597
|
+
this.spans.push(span);
|
|
1598
|
+
const active_ids = /* @__PURE__ */ new Set();
|
|
1599
|
+
const active_ins = {};
|
|
1600
|
+
const active_del = {};
|
|
1601
|
+
const active_fmt = {};
|
|
1602
|
+
let deferred_meta_states = [];
|
|
1603
|
+
let current_wrappers = ["", ""];
|
|
1604
|
+
let current_style = ["", ""];
|
|
1605
|
+
let active_hyperlink_id = null;
|
|
1606
|
+
let pending_runs = [];
|
|
1607
|
+
const flush_pending_runs = () => {
|
|
1608
|
+
if (pending_runs.length === 0) return;
|
|
1609
|
+
const [s_tok, e_tok] = current_wrappers;
|
|
1610
|
+
if (s_tok) {
|
|
1611
|
+
this._add_virtual_text(s_tok, current, paragraph);
|
|
1612
|
+
current += s_tok.length;
|
|
1613
|
+
}
|
|
1614
|
+
for (const [kind, txt, r_obj, i_id, d_id] of pending_runs) {
|
|
1615
|
+
if (kind === "virtual") {
|
|
1616
|
+
this._add_virtual_text(txt, current, paragraph, active_hyperlink_id);
|
|
1617
|
+
} else {
|
|
1618
|
+
const s = {
|
|
1619
|
+
start: current,
|
|
1620
|
+
end: current + txt.length,
|
|
1621
|
+
text: txt,
|
|
1622
|
+
run: r_obj,
|
|
1623
|
+
paragraph,
|
|
1624
|
+
ins_id: i_id || void 0,
|
|
1625
|
+
del_id: d_id || void 0,
|
|
1626
|
+
hyperlink_id: active_hyperlink_id || void 0
|
|
1627
|
+
};
|
|
1628
|
+
this.spans.push(s);
|
|
1629
|
+
this._text_chunks.push(txt);
|
|
1630
|
+
}
|
|
1631
|
+
current += txt.length;
|
|
1632
|
+
}
|
|
1633
|
+
if (e_tok) {
|
|
1634
|
+
this._add_virtual_text(e_tok, current, paragraph);
|
|
1635
|
+
current += e_tok.length;
|
|
1636
|
+
}
|
|
1637
|
+
pending_runs = [];
|
|
1638
|
+
};
|
|
1639
|
+
const items = Array.from(iter_paragraph_content(paragraph));
|
|
1640
|
+
const is_heading = is_heading_paragraph(paragraph, style_cache, default_pstyle);
|
|
1641
|
+
const native_heading = is_native_heading(paragraph, style_cache, default_pstyle);
|
|
1642
|
+
let leading_strip_active = is_heading;
|
|
1643
|
+
for (let i = 0; i < items.length; i++) {
|
|
1644
|
+
const item = items[i];
|
|
1645
|
+
if (item instanceof Run) {
|
|
1646
|
+
const [prefix, suffix] = get_run_style_markers(item, native_heading);
|
|
1647
|
+
const run_parts = [];
|
|
1648
|
+
const text = get_run_text(item);
|
|
1649
|
+
if (leading_strip_active) {
|
|
1650
|
+
if (text === "" || /^\s*$/.test(text)) continue;
|
|
1651
|
+
leading_strip_active = false;
|
|
1652
|
+
}
|
|
1653
|
+
if (text.includes("\n") && (prefix || suffix)) {
|
|
1654
|
+
const parts = text.split("\n");
|
|
1655
|
+
for (let idx = 0; idx < parts.length; idx++) {
|
|
1656
|
+
if (idx > 0) run_parts.push(["real", "\n", item]);
|
|
1657
|
+
if (parts[idx]) {
|
|
1658
|
+
if (prefix) run_parts.push(["virtual", prefix, null]);
|
|
1659
|
+
run_parts.push(["real", parts[idx], item]);
|
|
1660
|
+
if (suffix) run_parts.push(["virtual", suffix, null]);
|
|
1661
|
+
}
|
|
1662
|
+
}
|
|
1663
|
+
} else {
|
|
1664
|
+
if (prefix) run_parts.push(["virtual", prefix, null]);
|
|
1665
|
+
if (text) run_parts.push(["real", text, item]);
|
|
1666
|
+
if (suffix) run_parts.push(["virtual", suffix, null]);
|
|
1667
|
+
}
|
|
1668
|
+
if (this.clean_view && Object.keys(active_del).length > 0) {
|
|
1669
|
+
}
|
|
1670
|
+
const full_seg_text = run_parts.map((x) => x[1]).join("");
|
|
1671
|
+
const curr_ins_id = Object.keys(active_ins).pop() || null;
|
|
1672
|
+
const curr_del_id = Object.keys(active_del).pop() || null;
|
|
1673
|
+
if (full_seg_text && !(this.clean_view && curr_del_id)) {
|
|
1674
|
+
const new_wrappers = this.clean_view ? ["", ""] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
|
|
1675
|
+
const new_style = [prefix, suffix];
|
|
1676
|
+
if (pending_runs.length > 0 && new_wrappers[0] === current_wrappers[0] && new_wrappers[1] === current_wrappers[1]) {
|
|
1677
|
+
let skip_leading_prefix = false;
|
|
1678
|
+
if (new_style[0] === current_style[0] && new_style[1] === current_style[1] && current_style[0] !== "" && pending_runs[pending_runs.length - 1][0] === "virtual" && pending_runs[pending_runs.length - 1][1] === current_style[1]) {
|
|
1679
|
+
pending_runs.pop();
|
|
1680
|
+
skip_leading_prefix = true;
|
|
1681
|
+
}
|
|
1682
|
+
for (const [kind, txt, r_obj] of run_parts) {
|
|
1683
|
+
if (skip_leading_prefix && kind === "virtual" && txt === new_style[0]) {
|
|
1684
|
+
skip_leading_prefix = false;
|
|
1685
|
+
continue;
|
|
1686
|
+
}
|
|
1687
|
+
pending_runs.push([kind, txt, r_obj, curr_ins_id, curr_del_id]);
|
|
1688
|
+
}
|
|
1689
|
+
current_style = new_style;
|
|
1690
|
+
} else {
|
|
1691
|
+
flush_pending_runs();
|
|
1692
|
+
current_wrappers = new_wrappers;
|
|
1693
|
+
current_style = new_style;
|
|
1694
|
+
for (const [kind, txt, r_obj] of run_parts) {
|
|
1695
|
+
pending_runs.push([kind, txt, r_obj, curr_ins_id, curr_del_id]);
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
}
|
|
1699
|
+
if (!this.clean_view) {
|
|
1700
|
+
const has_meta = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || active_ids.size > 0 || Object.keys(active_fmt).length > 0;
|
|
1701
|
+
if (has_meta) {
|
|
1702
|
+
deferred_meta_states.push([{ ...active_ins }, { ...active_del }, new Set(active_ids), { ...active_fmt }]);
|
|
1703
|
+
}
|
|
1704
|
+
let should_defer = false;
|
|
1705
|
+
const is_redline = curr_ins_id !== null || curr_del_id !== null || Object.keys(active_fmt).length > 0;
|
|
1706
|
+
if (is_redline) {
|
|
1707
|
+
let j = i + 1;
|
|
1708
|
+
let next_is_redline = false;
|
|
1709
|
+
let temp_ins_count = Object.keys(active_ins).length;
|
|
1710
|
+
let temp_del_count = Object.keys(active_del).length;
|
|
1711
|
+
let temp_fmt_count = Object.keys(active_fmt).length;
|
|
1712
|
+
while (j < items.length) {
|
|
1713
|
+
const next_item = items[j];
|
|
1714
|
+
if (next_item instanceof Run) {
|
|
1715
|
+
if (!get_run_text(next_item)) {
|
|
1716
|
+
j++;
|
|
1717
|
+
continue;
|
|
1718
|
+
}
|
|
1719
|
+
if (temp_ins_count > 0 || temp_del_count > 0 || temp_fmt_count > 0) {
|
|
1720
|
+
next_is_redline = true;
|
|
1721
|
+
}
|
|
1722
|
+
break;
|
|
1723
|
+
} else {
|
|
1724
|
+
const ev = next_item;
|
|
1725
|
+
if (ev.type === "ins_start") temp_ins_count++;
|
|
1726
|
+
else if (ev.type === "ins_end") temp_ins_count = Math.max(0, temp_ins_count - 1);
|
|
1727
|
+
else if (ev.type === "del_start") temp_del_count++;
|
|
1728
|
+
else if (ev.type === "del_end") temp_del_count = Math.max(0, temp_del_count - 1);
|
|
1729
|
+
else if (ev.type === "fmt_start") temp_fmt_count++;
|
|
1730
|
+
else if (ev.type === "fmt_end") temp_fmt_count = Math.max(0, temp_fmt_count - 1);
|
|
1731
|
+
}
|
|
1732
|
+
j++;
|
|
1733
|
+
}
|
|
1734
|
+
if (next_is_redline) should_defer = true;
|
|
1735
|
+
}
|
|
1736
|
+
if (!should_defer && deferred_meta_states.length > 0) {
|
|
1737
|
+
const meta_block = this._build_merged_meta_block(deferred_meta_states);
|
|
1738
|
+
if (meta_block) {
|
|
1739
|
+
flush_pending_runs();
|
|
1740
|
+
current_wrappers = ["", ""];
|
|
1741
|
+
current_style = ["", ""];
|
|
1742
|
+
const full_meta = `{>>${meta_block}<<}`;
|
|
1743
|
+
this._add_virtual_text(full_meta, current, paragraph);
|
|
1744
|
+
current += full_meta.length;
|
|
1745
|
+
}
|
|
1746
|
+
deferred_meta_states = [];
|
|
1747
|
+
}
|
|
1748
|
+
}
|
|
1749
|
+
} else {
|
|
1750
|
+
const ev = item;
|
|
1751
|
+
leading_strip_active = false;
|
|
1752
|
+
flush_pending_runs();
|
|
1753
|
+
current_wrappers = ["", ""];
|
|
1754
|
+
current_style = ["", ""];
|
|
1755
|
+
if (ev.type === "start") active_ids.add(ev.id);
|
|
1756
|
+
else if (ev.type === "end") active_ids.delete(ev.id);
|
|
1757
|
+
else if (ev.type === "ins_start") active_ins[ev.id] = ev;
|
|
1758
|
+
else if (ev.type === "ins_end") delete active_ins[ev.id];
|
|
1759
|
+
else if (ev.type === "del_start") active_del[ev.id] = ev;
|
|
1760
|
+
else if (ev.type === "del_end") delete active_del[ev.id];
|
|
1761
|
+
else if (ev.type === "fmt_start") active_fmt[ev.id] = ev;
|
|
1762
|
+
else if (ev.type === "fmt_end") delete active_fmt[ev.id];
|
|
1763
|
+
else if (ev.type === "footnote" || ev.type === "endnote") {
|
|
1764
|
+
flush_pending_runs();
|
|
1765
|
+
current_wrappers = ["", ""];
|
|
1766
|
+
current_style = ["", ""];
|
|
1767
|
+
const prefix_str = ev.type === "footnote" ? "fn" : "en";
|
|
1768
|
+
const txt = `[^${prefix_str}-${ev.id}]`;
|
|
1769
|
+
this._add_virtual_text(txt, current, paragraph);
|
|
1770
|
+
current += txt.length;
|
|
1771
|
+
} else if (ev.type === "hyperlink_start") {
|
|
1772
|
+
flush_pending_runs();
|
|
1773
|
+
current_wrappers = ["", ""];
|
|
1774
|
+
current_style = ["", ""];
|
|
1775
|
+
this._add_virtual_text("[", current, paragraph, ev.id);
|
|
1776
|
+
current += 1;
|
|
1777
|
+
active_hyperlink_id = ev.id;
|
|
1778
|
+
} else if (ev.type === "hyperlink_end") {
|
|
1779
|
+
flush_pending_runs();
|
|
1780
|
+
current_wrappers = ["", ""];
|
|
1781
|
+
current_style = ["", ""];
|
|
1782
|
+
const txt = `](${ev.date})`;
|
|
1783
|
+
this._add_virtual_text(txt, current, paragraph, ev.id);
|
|
1784
|
+
current += txt.length;
|
|
1785
|
+
active_hyperlink_id = null;
|
|
1786
|
+
} else if (ev.type === "xref_start") {
|
|
1787
|
+
flush_pending_runs();
|
|
1788
|
+
current_wrappers = ["", ""];
|
|
1789
|
+
current_style = ["", ""];
|
|
1790
|
+
this._add_virtual_text("[~", current, paragraph);
|
|
1791
|
+
current += 2;
|
|
1792
|
+
} else if (ev.type === "xref_end") {
|
|
1793
|
+
flush_pending_runs();
|
|
1794
|
+
current_wrappers = ["", ""];
|
|
1795
|
+
current_style = ["", ""];
|
|
1796
|
+
const txt = `~](#${ev.id})`;
|
|
1797
|
+
this._add_virtual_text(txt, current, paragraph);
|
|
1798
|
+
current += txt.length;
|
|
1799
|
+
} else if (ev.type === "bookmark") {
|
|
1800
|
+
flush_pending_runs();
|
|
1801
|
+
current_wrappers = ["", ""];
|
|
1802
|
+
current_style = ["", ""];
|
|
1803
|
+
const txt = `{#${ev.id}}`;
|
|
1804
|
+
this._add_virtual_text(txt, current, paragraph);
|
|
1805
|
+
current += txt.length;
|
|
1806
|
+
}
|
|
1807
|
+
}
|
|
1808
|
+
}
|
|
1809
|
+
flush_pending_runs();
|
|
1810
|
+
if (deferred_meta_states.length > 0) {
|
|
1811
|
+
const meta_block = this._build_merged_meta_block(deferred_meta_states);
|
|
1812
|
+
if (meta_block) {
|
|
1813
|
+
const full_meta = `{>>${meta_block}<<}`;
|
|
1814
|
+
this._add_virtual_text(full_meta, current, paragraph);
|
|
1815
|
+
current += full_meta.length;
|
|
1816
|
+
}
|
|
1817
|
+
}
|
|
1818
|
+
return current;
|
|
1819
|
+
}
|
|
1820
|
+
_get_wrappers(ins_id, del_id, active_ids, active_fmt) {
|
|
1821
|
+
if (del_id) return ["{--", "--}"];
|
|
1822
|
+
if (ins_id) return ["{++", "++}"];
|
|
1823
|
+
if (active_ids.size > 0 || Object.keys(active_fmt).length > 0) return ["{==", "==}"];
|
|
1824
|
+
return ["", ""];
|
|
1825
|
+
}
|
|
1826
|
+
_build_merged_meta_block(states_list) {
|
|
1827
|
+
const change_lines = [];
|
|
1828
|
+
const comment_lines = [];
|
|
1829
|
+
const seen_sigs = /* @__PURE__ */ new Set();
|
|
1830
|
+
for (const [ins_map, del_map, comments_set, fmt_map] of states_list) {
|
|
1831
|
+
for (const [uid, meta] of Object.entries(ins_map)) {
|
|
1832
|
+
const sig = `Chg:${uid}`;
|
|
1833
|
+
if (!seen_sigs.has(sig)) {
|
|
1834
|
+
const auth = meta.author || "Unknown";
|
|
1835
|
+
change_lines.push(`[${sig} insert] ${auth}`);
|
|
1836
|
+
seen_sigs.add(sig);
|
|
1837
|
+
}
|
|
1838
|
+
}
|
|
1839
|
+
for (const [uid, meta] of Object.entries(del_map)) {
|
|
1840
|
+
const sig = `Chg:${uid}`;
|
|
1841
|
+
if (!seen_sigs.has(sig)) {
|
|
1842
|
+
const auth = meta.author || "Unknown";
|
|
1843
|
+
change_lines.push(`[${sig} delete] ${auth}`);
|
|
1844
|
+
seen_sigs.add(sig);
|
|
1845
|
+
}
|
|
1846
|
+
}
|
|
1847
|
+
for (const [uid, meta] of Object.entries(fmt_map)) {
|
|
1848
|
+
const sig = `Chg:${uid}`;
|
|
1849
|
+
if (!seen_sigs.has(sig)) {
|
|
1850
|
+
const auth = meta.author || "Unknown";
|
|
1851
|
+
change_lines.push(`[${sig} format] ${auth}`);
|
|
1852
|
+
seen_sigs.add(sig);
|
|
1853
|
+
}
|
|
1854
|
+
}
|
|
1855
|
+
const sorted_ids = Array.from(comments_set).sort();
|
|
1856
|
+
for (const c_id of sorted_ids) {
|
|
1857
|
+
if (!this.comments_map[c_id]) continue;
|
|
1858
|
+
const sig = `Com:${c_id}`;
|
|
1859
|
+
if (!seen_sigs.has(sig)) {
|
|
1860
|
+
const data = this.comments_map[c_id];
|
|
1861
|
+
let header = `[${sig}] ${data.author}`;
|
|
1862
|
+
if (data.date) header += ` @ ${data.date}`;
|
|
1863
|
+
if (data.resolved) header += `(RESOLVED)`;
|
|
1864
|
+
comment_lines.push(`${header}: ${data.text}`);
|
|
1865
|
+
seen_sigs.add(sig);
|
|
1866
|
+
}
|
|
1867
|
+
}
|
|
1868
|
+
}
|
|
1869
|
+
return [...change_lines, ...comment_lines].join("\n");
|
|
1870
|
+
}
|
|
1871
|
+
_add_virtual_text(text, offset, context_paragraph, hyperlink_id = null) {
|
|
1872
|
+
const span = {
|
|
1873
|
+
start: offset,
|
|
1874
|
+
end: offset + text.length,
|
|
1875
|
+
text,
|
|
1876
|
+
run: null,
|
|
1877
|
+
paragraph: context_paragraph,
|
|
1878
|
+
hyperlink_id: hyperlink_id || void 0
|
|
1879
|
+
};
|
|
1880
|
+
this.spans.push(span);
|
|
1881
|
+
this._text_chunks.push(text);
|
|
1882
|
+
}
|
|
1883
|
+
_replace_smart_quotes(text) {
|
|
1884
|
+
return text.replace(/“/g, '"').replace(/”/g, '"').replace(/‘/g, "'").replace(/’/g, "'");
|
|
1885
|
+
}
|
|
1886
|
+
_make_fuzzy_regex(target_text) {
|
|
1887
|
+
target_text = this._strip_markdown_formatting(target_text);
|
|
1888
|
+
target_text = this._replace_smart_quotes(target_text);
|
|
1889
|
+
const parts = [];
|
|
1890
|
+
const token_pattern = /(\[_+\])|(\s+)|(['"])|([.,;:\/])/g;
|
|
1891
|
+
let last_idx = 0;
|
|
1892
|
+
let match;
|
|
1893
|
+
const escapeRegExp = (str) => str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1894
|
+
while ((match = token_pattern.exec(target_text)) !== null) {
|
|
1895
|
+
const literal = target_text.substring(last_idx, match.index);
|
|
1896
|
+
if (literal) parts.push(escapeRegExp(literal));
|
|
1897
|
+
const g_placeholder = match[1];
|
|
1898
|
+
const g_space = match[2];
|
|
1899
|
+
const g_quote = match[3];
|
|
1900
|
+
const g_punct = match[4];
|
|
1901
|
+
if (g_placeholder) {
|
|
1902
|
+
parts.push("\\[_+\\]");
|
|
1903
|
+
} else if (g_space) {
|
|
1904
|
+
parts.push("(?:\\*\\*|__|\\*|_)?");
|
|
1905
|
+
parts.push("\\s+");
|
|
1906
|
+
parts.push("(?:\\*\\*|__|\\*|_)?");
|
|
1907
|
+
} else if (g_quote) {
|
|
1908
|
+
if (g_quote === "'") parts.push("[\u2018\u2019']");
|
|
1909
|
+
else parts.push('["\u201C\u201D]');
|
|
1910
|
+
} else if (g_punct) {
|
|
1911
|
+
parts.push("(?:\\*\\*|__|\\*|_)?");
|
|
1912
|
+
parts.push(escapeRegExp(g_punct));
|
|
1913
|
+
parts.push("(?:\\*\\*|__|\\*|_)?");
|
|
1914
|
+
}
|
|
1915
|
+
last_idx = token_pattern.lastIndex;
|
|
1916
|
+
}
|
|
1917
|
+
const remaining = target_text.substring(last_idx);
|
|
1918
|
+
if (remaining) parts.push(escapeRegExp(remaining));
|
|
1919
|
+
return parts.join("");
|
|
1920
|
+
}
|
|
1921
|
+
find_match_index(target_text) {
|
|
1922
|
+
let start_idx = this.full_text.indexOf(target_text);
|
|
1923
|
+
if (start_idx !== -1) return [start_idx, target_text.length];
|
|
1924
|
+
const norm_full = this._replace_smart_quotes(this.full_text);
|
|
1925
|
+
const norm_target = this._replace_smart_quotes(target_text);
|
|
1926
|
+
start_idx = norm_full.indexOf(norm_target);
|
|
1927
|
+
if (start_idx !== -1) return [start_idx, target_text.length];
|
|
1928
|
+
const stripped_target = this._strip_markdown_formatting(target_text);
|
|
1929
|
+
if (this.full_text.includes(stripped_target)) {
|
|
1930
|
+
start_idx = this.full_text.indexOf(stripped_target);
|
|
1931
|
+
return [start_idx, stripped_target.length];
|
|
1932
|
+
}
|
|
1933
|
+
try {
|
|
1934
|
+
const pattern = new RegExp(this._make_fuzzy_regex(target_text));
|
|
1935
|
+
const match = pattern.exec(this.full_text);
|
|
1936
|
+
if (match) return [match.index, match[0].length];
|
|
1937
|
+
} catch (e) {
|
|
1938
|
+
}
|
|
1939
|
+
return [-1, 0];
|
|
1940
|
+
}
|
|
1941
|
+
find_all_match_indices(target_text) {
|
|
1942
|
+
if (!target_text) return [];
|
|
1943
|
+
const escapeRegExp = (str) => str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1944
|
+
let matches = [...this.full_text.matchAll(new RegExp(escapeRegExp(target_text), "g"))];
|
|
1945
|
+
if (matches.length > 0) return matches.map((m) => [m.index, m[0].length]);
|
|
1946
|
+
const norm_full = this._replace_smart_quotes(this.full_text);
|
|
1947
|
+
const norm_target = this._replace_smart_quotes(target_text);
|
|
1948
|
+
matches = [...norm_full.matchAll(new RegExp(escapeRegExp(norm_target), "g"))];
|
|
1949
|
+
if (matches.length > 0) return matches.map((m) => [m.index, m[0].length]);
|
|
1950
|
+
const stripped_target = this._strip_markdown_formatting(target_text);
|
|
1951
|
+
matches = [...this.full_text.matchAll(new RegExp(escapeRegExp(stripped_target), "g"))];
|
|
1952
|
+
if (matches.length > 0) return matches.map((m) => [m.index, m[0].length]);
|
|
1953
|
+
try {
|
|
1954
|
+
const pattern = new RegExp(this._make_fuzzy_regex(target_text), "g");
|
|
1955
|
+
matches = [...this.full_text.matchAll(pattern)];
|
|
1956
|
+
if (matches.length > 0) return matches.map((m) => [m.index, m[0].length]);
|
|
1957
|
+
} catch (e) {
|
|
1958
|
+
}
|
|
1959
|
+
return [];
|
|
1960
|
+
}
|
|
1961
|
+
find_target_runs(target_text) {
|
|
1962
|
+
const [start_idx, length] = this.find_match_index(target_text);
|
|
1963
|
+
if (start_idx === -1) return [];
|
|
1964
|
+
return this._resolve_runs_at_range(start_idx, start_idx + length);
|
|
1965
|
+
}
|
|
1966
|
+
find_target_runs_by_index(start_index, length, rebuild_map = true) {
|
|
1967
|
+
return this._resolve_runs_at_range(start_index, start_index + length, rebuild_map);
|
|
1968
|
+
}
|
|
1969
|
+
get_virtual_spans_in_range(start_index, length) {
|
|
1970
|
+
const end_index = start_index + length;
|
|
1971
|
+
return this.spans.filter((s) => s.run === null && s.text === "\n\n" && s.start >= start_index && s.end <= end_index);
|
|
1972
|
+
}
|
|
1973
|
+
_resolve_runs_at_range(start_idx, end_idx, rebuild_map = true) {
|
|
1974
|
+
const affected_spans = this.spans.filter((s) => s.end > start_idx && s.start < end_idx);
|
|
1975
|
+
if (affected_spans.length === 0) return [];
|
|
1976
|
+
const working_runs = affected_spans.filter((s) => s.run !== null).map((s) => s.run);
|
|
1977
|
+
if (working_runs.length === 0) return [];
|
|
1978
|
+
let dom_modified = false;
|
|
1979
|
+
const first_real_span = affected_spans.find((s) => s.run !== null);
|
|
1980
|
+
let start_split_adjustment = 0;
|
|
1981
|
+
if (first_real_span) {
|
|
1982
|
+
const local_start = start_idx - first_real_span.start;
|
|
1983
|
+
if (local_start > 0) {
|
|
1984
|
+
const idx_in_working = 0;
|
|
1985
|
+
const [, right_run] = this._split_run_at_index(working_runs[idx_in_working], local_start);
|
|
1986
|
+
working_runs[idx_in_working] = right_run;
|
|
1987
|
+
dom_modified = true;
|
|
1988
|
+
start_split_adjustment = local_start;
|
|
1989
|
+
}
|
|
1990
|
+
}
|
|
1991
|
+
const last_real_span = [...affected_spans].reverse().find((s) => s.run !== null);
|
|
1992
|
+
if (last_real_span) {
|
|
1993
|
+
const is_same_run = first_real_span === last_real_span;
|
|
1994
|
+
const run_to_split = working_runs[working_runs.length - 1];
|
|
1995
|
+
let overlap_end = Math.min(last_real_span.end, end_idx);
|
|
1996
|
+
let local_end = overlap_end - last_real_span.start;
|
|
1997
|
+
if (is_same_run && start_split_adjustment > 0) {
|
|
1998
|
+
local_end -= start_split_adjustment;
|
|
1999
|
+
}
|
|
2000
|
+
const run_text = get_run_text(run_to_split);
|
|
2001
|
+
if (local_end > 0 && local_end < run_text.length) {
|
|
2002
|
+
const [left_run] = this._split_run_at_index(run_to_split, local_end);
|
|
2003
|
+
working_runs[working_runs.length - 1] = left_run;
|
|
2004
|
+
dom_modified = true;
|
|
2005
|
+
}
|
|
2006
|
+
}
|
|
2007
|
+
if (dom_modified && rebuild_map) {
|
|
2008
|
+
this._build_map();
|
|
2009
|
+
}
|
|
2010
|
+
return working_runs;
|
|
2011
|
+
}
|
|
2012
|
+
get_insertion_anchor(index, rebuild_map = true) {
|
|
2013
|
+
const preceding = this.spans.filter((s) => s.end === index);
|
|
2014
|
+
if (preceding.length > 0) {
|
|
2015
|
+
for (let i = preceding.length - 1; i >= 0; i--) {
|
|
2016
|
+
if (preceding[i].run) return [preceding[i].run, preceding[i].paragraph];
|
|
2017
|
+
}
|
|
2018
|
+
for (let i = preceding.length - 1; i >= 0; i--) {
|
|
2019
|
+
if (preceding[i].paragraph) return [null, preceding[i].paragraph];
|
|
2020
|
+
}
|
|
2021
|
+
}
|
|
2022
|
+
const containing = this.spans.filter((s) => s.start < index && index < s.end);
|
|
2023
|
+
if (containing.length > 0) {
|
|
2024
|
+
const span = containing[0];
|
|
2025
|
+
if (span.run === null) {
|
|
2026
|
+
if (span.paragraph === null) {
|
|
2027
|
+
return this.get_insertion_anchor(span.end, rebuild_map);
|
|
2028
|
+
}
|
|
2029
|
+
return [null, span.paragraph];
|
|
2030
|
+
} else {
|
|
2031
|
+
const offset = index - span.start;
|
|
2032
|
+
const [left] = this._split_run_at_index(span.run, offset);
|
|
2033
|
+
if (rebuild_map) this._build_map();
|
|
2034
|
+
return [left, span.paragraph];
|
|
2035
|
+
}
|
|
2036
|
+
}
|
|
2037
|
+
if (index === 0 && this.spans.length > 0) {
|
|
2038
|
+
for (const s of this.spans) if (s.run) return [s.run, s.paragraph];
|
|
2039
|
+
for (const s of this.spans) if (s.paragraph) return [null, s.paragraph];
|
|
2040
|
+
return [null, null];
|
|
2041
|
+
}
|
|
2042
|
+
const preceding_gap = this.spans.filter((s) => s.end < index);
|
|
2043
|
+
if (preceding_gap.length > 0) {
|
|
2044
|
+
for (let i = preceding_gap.length - 1; i >= 0; i--) {
|
|
2045
|
+
if (preceding_gap[i].run) return [preceding_gap[i].run, preceding_gap[i].paragraph];
|
|
2046
|
+
}
|
|
2047
|
+
for (let i = preceding_gap.length - 1; i >= 0; i--) {
|
|
2048
|
+
if (preceding_gap[i].paragraph) return [null, preceding_gap[i].paragraph];
|
|
2049
|
+
}
|
|
2050
|
+
}
|
|
2051
|
+
return [null, null];
|
|
2052
|
+
}
|
|
2053
|
+
_split_run_at_index(run, split_index) {
|
|
2054
|
+
const text = get_run_text(run);
|
|
2055
|
+
const left_text = text.substring(0, split_index);
|
|
2056
|
+
const right_text = text.substring(split_index);
|
|
2057
|
+
this._set_run_text_elements(run._element, left_text);
|
|
2058
|
+
const new_r_element = run._element.cloneNode(true);
|
|
2059
|
+
this._set_run_text_elements(new_r_element, right_text);
|
|
2060
|
+
if (run._element.parentNode) {
|
|
2061
|
+
run._element.parentNode.insertBefore(new_r_element, run._element.nextSibling);
|
|
2062
|
+
}
|
|
2063
|
+
const new_run = new Run(new_r_element, run._parent);
|
|
2064
|
+
return [run, new_run];
|
|
2065
|
+
}
|
|
2066
|
+
_set_run_text_elements(r_element, new_text) {
|
|
2067
|
+
const to_remove = [];
|
|
2068
|
+
for (let i = 0; i < r_element.childNodes.length; i++) {
|
|
2069
|
+
const child = r_element.childNodes[i];
|
|
2070
|
+
if (child.nodeType === 1 && ["w:t", "w:delText", "w:br", "w:cr", "w:tab"].includes(child.tagName)) {
|
|
2071
|
+
to_remove.push(child);
|
|
2072
|
+
}
|
|
2073
|
+
}
|
|
2074
|
+
for (const child of to_remove) {
|
|
2075
|
+
r_element.removeChild(child);
|
|
2076
|
+
}
|
|
2077
|
+
const doc = r_element.ownerDocument;
|
|
2078
|
+
if (doc) {
|
|
2079
|
+
const new_t = doc.createElement("w:t");
|
|
2080
|
+
new_t.textContent = new_text;
|
|
2081
|
+
if (new_text.trim() !== new_text) {
|
|
2082
|
+
new_t.setAttribute("xml:space", "preserve");
|
|
2083
|
+
}
|
|
2084
|
+
r_element.appendChild(new_t);
|
|
2085
|
+
}
|
|
2086
|
+
}
|
|
2087
|
+
get_context_at_range(start_idx, end_idx) {
|
|
2088
|
+
const real_spans = this.spans.filter((s) => s.run && s.end > start_idx && s.start < end_idx);
|
|
2089
|
+
if (real_spans.length > 0) return real_spans[0];
|
|
2090
|
+
return null;
|
|
2091
|
+
}
|
|
2092
|
+
};
|
|
2093
|
+
|
|
2094
|
+
// src/diff.ts
|
|
2095
|
+
var import_diff_match_patch = __toESM(require("diff-match-patch"), 1);
|
|
2096
|
+
function trim_common_context(target, new_val) {
|
|
2097
|
+
if (!target || !new_val) return [0, 0];
|
|
2098
|
+
const isSpace = (char) => /\s/.test(char);
|
|
2099
|
+
let prefix_len = 0;
|
|
2100
|
+
let limit = Math.min(target.length, new_val.length);
|
|
2101
|
+
while (prefix_len < limit && target[prefix_len] === new_val[prefix_len]) {
|
|
2102
|
+
prefix_len++;
|
|
2103
|
+
}
|
|
2104
|
+
if (prefix_len < target.length && prefix_len < new_val.length) {
|
|
2105
|
+
while (prefix_len > 0) {
|
|
2106
|
+
const target_split = !isSpace(target[prefix_len - 1]) && !isSpace(target[prefix_len]);
|
|
2107
|
+
const new_split = !isSpace(new_val[prefix_len - 1]) && !isSpace(new_val[prefix_len]);
|
|
2108
|
+
if (target_split || new_split) {
|
|
2109
|
+
prefix_len--;
|
|
2110
|
+
} else {
|
|
2111
|
+
break;
|
|
2112
|
+
}
|
|
2113
|
+
}
|
|
2114
|
+
}
|
|
2115
|
+
while (prefix_len > 0) {
|
|
2116
|
+
if (prefix_len < target.length) {
|
|
2117
|
+
const charSeq = target.substring(prefix_len - 1, prefix_len + 1);
|
|
2118
|
+
if (charSeq === "**" || charSeq === "__") {
|
|
2119
|
+
prefix_len--;
|
|
2120
|
+
continue;
|
|
2121
|
+
}
|
|
2122
|
+
}
|
|
2123
|
+
const left = target.substring(0, prefix_len);
|
|
2124
|
+
const b_count = (left.match(/\*\*/g) || []).length;
|
|
2125
|
+
const u2_count = (left.match(/__/g) || []).length;
|
|
2126
|
+
const u1_count = (left.replace(/__/g, "").match(/_/g) || []).length;
|
|
2127
|
+
if (b_count % 2 !== 0) {
|
|
2128
|
+
prefix_len = left.lastIndexOf("**");
|
|
2129
|
+
continue;
|
|
2130
|
+
}
|
|
2131
|
+
if (u2_count % 2 !== 0) {
|
|
2132
|
+
prefix_len = left.lastIndexOf("__");
|
|
2133
|
+
continue;
|
|
2134
|
+
}
|
|
2135
|
+
if (u1_count % 2 !== 0) {
|
|
2136
|
+
let idx = left.length - 1;
|
|
2137
|
+
while (idx >= 0) {
|
|
2138
|
+
if (left[idx] === "_" && (idx === 0 || left[idx - 1] !== "_") && (idx === left.length - 1 || left[idx + 1] !== "_")) {
|
|
2139
|
+
prefix_len = idx;
|
|
2140
|
+
break;
|
|
2141
|
+
}
|
|
2142
|
+
idx--;
|
|
2143
|
+
}
|
|
2144
|
+
continue;
|
|
2145
|
+
}
|
|
2146
|
+
let temp_len = prefix_len;
|
|
2147
|
+
let hit_header = false;
|
|
2148
|
+
while (temp_len > 0) {
|
|
2149
|
+
const char = target[temp_len - 1];
|
|
2150
|
+
if (char === "#") {
|
|
2151
|
+
prefix_len = temp_len - 1;
|
|
2152
|
+
while (prefix_len > 0 && target[prefix_len - 1] !== "\n") {
|
|
2153
|
+
prefix_len--;
|
|
2154
|
+
}
|
|
2155
|
+
hit_header = true;
|
|
2156
|
+
break;
|
|
2157
|
+
}
|
|
2158
|
+
if (char === "\n") break;
|
|
2159
|
+
temp_len--;
|
|
2160
|
+
}
|
|
2161
|
+
if (hit_header) continue;
|
|
2162
|
+
break;
|
|
2163
|
+
}
|
|
2164
|
+
let suffix_len = 0;
|
|
2165
|
+
const target_rem_len = target.length - prefix_len;
|
|
2166
|
+
const new_rem_len = new_val.length - prefix_len;
|
|
2167
|
+
const limit_suffix = Math.min(target_rem_len, new_rem_len);
|
|
2168
|
+
while (suffix_len < limit_suffix && target[target.length - 1 - suffix_len] === new_val[new_val.length - 1 - suffix_len]) {
|
|
2169
|
+
suffix_len++;
|
|
2170
|
+
}
|
|
2171
|
+
if (suffix_len > 0) {
|
|
2172
|
+
while (suffix_len > 0) {
|
|
2173
|
+
let target_split = false;
|
|
2174
|
+
if (suffix_len < target.length) {
|
|
2175
|
+
target_split = !isSpace(target[target.length - 1 - suffix_len]) && !isSpace(target[target.length - suffix_len]);
|
|
2176
|
+
}
|
|
2177
|
+
let new_split = false;
|
|
2178
|
+
if (suffix_len < new_val.length) {
|
|
2179
|
+
new_split = !isSpace(new_val[new_val.length - 1 - suffix_len]) && !isSpace(new_val[new_val.length - suffix_len]);
|
|
2180
|
+
}
|
|
2181
|
+
if (target_split || new_split) {
|
|
2182
|
+
suffix_len--;
|
|
2183
|
+
} else {
|
|
2184
|
+
break;
|
|
2185
|
+
}
|
|
2186
|
+
}
|
|
2187
|
+
}
|
|
2188
|
+
while (suffix_len > 0) {
|
|
2189
|
+
const idx = target.length - suffix_len;
|
|
2190
|
+
if (idx > 0) {
|
|
2191
|
+
const charSeq = target.substring(idx - 1, idx + 1);
|
|
2192
|
+
if (charSeq === "**" || charSeq === "__") {
|
|
2193
|
+
suffix_len--;
|
|
2194
|
+
continue;
|
|
2195
|
+
}
|
|
2196
|
+
}
|
|
2197
|
+
const right = target.substring(target.length - suffix_len);
|
|
2198
|
+
const b_count = (right.match(/\*\*/g) || []).length;
|
|
2199
|
+
const u2_count = (right.match(/__/g) || []).length;
|
|
2200
|
+
const u1_count = (right.replace(/__/g, "").match(/_/g) || []).length;
|
|
2201
|
+
if (b_count % 2 !== 0) {
|
|
2202
|
+
suffix_len -= right.indexOf("**") + 2;
|
|
2203
|
+
continue;
|
|
2204
|
+
}
|
|
2205
|
+
if (u2_count % 2 !== 0) {
|
|
2206
|
+
suffix_len -= right.indexOf("__") + 2;
|
|
2207
|
+
continue;
|
|
2208
|
+
}
|
|
2209
|
+
if (u1_count % 2 !== 0) {
|
|
2210
|
+
let idx_in_right = 0;
|
|
2211
|
+
while (idx_in_right < right.length) {
|
|
2212
|
+
if (right[idx_in_right] === "_" && (idx_in_right === 0 || right[idx_in_right - 1] !== "_") && (idx_in_right === right.length - 1 || right[idx_in_right + 1] !== "_")) {
|
|
2213
|
+
suffix_len -= idx_in_right + 1;
|
|
2214
|
+
break;
|
|
2215
|
+
}
|
|
2216
|
+
idx_in_right++;
|
|
2217
|
+
}
|
|
2218
|
+
continue;
|
|
2219
|
+
}
|
|
2220
|
+
break;
|
|
2221
|
+
}
|
|
2222
|
+
if (suffix_len > 0 && /^\s+$/.test(target.substring(target.length - suffix_len))) {
|
|
2223
|
+
suffix_len = 0;
|
|
2224
|
+
}
|
|
2225
|
+
for (const marker of ["**", "__", "_"]) {
|
|
2226
|
+
const mlen = marker.length;
|
|
2227
|
+
const tgt_rem = target.substring(prefix_len, target.length - suffix_len);
|
|
2228
|
+
const new_rem = new_val.substring(prefix_len, new_val.length - suffix_len);
|
|
2229
|
+
if (tgt_rem.startsWith(marker) && new_rem.startsWith(marker) && tgt_rem.endsWith(marker) && new_rem.endsWith(marker) && tgt_rem.length >= 2 * mlen && new_rem.length >= 2 * mlen) {
|
|
2230
|
+
prefix_len += mlen;
|
|
2231
|
+
suffix_len += mlen;
|
|
2232
|
+
}
|
|
2233
|
+
}
|
|
2234
|
+
return [prefix_len, suffix_len];
|
|
2235
|
+
}
|
|
2236
|
+
function _words_to_chars(text1, text2) {
|
|
2237
|
+
const token_array = [];
|
|
2238
|
+
const token_hash = {};
|
|
2239
|
+
const split_pattern = /(\s+|[\p{L}\p{N}_]+|[^\p{L}\p{N}_\s])/gu;
|
|
2240
|
+
const encode_text = (text) => {
|
|
2241
|
+
const tokens = text.split(split_pattern).filter(Boolean);
|
|
2242
|
+
let encoded_chars = "";
|
|
2243
|
+
for (const token of tokens) {
|
|
2244
|
+
if (token in token_hash) {
|
|
2245
|
+
encoded_chars += String.fromCharCode(token_hash[token]);
|
|
2246
|
+
} else {
|
|
2247
|
+
const code = token_array.length;
|
|
2248
|
+
token_hash[token] = code;
|
|
2249
|
+
token_array.push(token);
|
|
2250
|
+
encoded_chars += String.fromCharCode(code);
|
|
2251
|
+
}
|
|
2252
|
+
}
|
|
2253
|
+
return encoded_chars;
|
|
2254
|
+
};
|
|
2255
|
+
return [encode_text(text1), encode_text(text2), token_array];
|
|
2256
|
+
}
|
|
2257
|
+
function generate_edits_from_text(original_text, modified_text) {
|
|
2258
|
+
const dmp = new import_diff_match_patch.default.diff_match_patch();
|
|
2259
|
+
const [chars1, chars2, token_array] = _words_to_chars(original_text, modified_text);
|
|
2260
|
+
const diffs = dmp.diff_main(chars1, chars2, false);
|
|
2261
|
+
dmp.diff_cleanupSemantic(diffs);
|
|
2262
|
+
for (let i = 0; i < diffs.length; i++) {
|
|
2263
|
+
const chars = diffs[i][1];
|
|
2264
|
+
let text = "";
|
|
2265
|
+
for (let j = 0; j < chars.length; j++) text += token_array[chars.charCodeAt(j)];
|
|
2266
|
+
diffs[i][1] = text;
|
|
2267
|
+
}
|
|
2268
|
+
const edits = [];
|
|
2269
|
+
let current_original_index = 0;
|
|
2270
|
+
let pending_delete = null;
|
|
2271
|
+
for (const [op, text] of diffs) {
|
|
2272
|
+
if (op === 0) {
|
|
2273
|
+
if (pending_delete) {
|
|
2274
|
+
const [idx, del_txt] = pending_delete;
|
|
2275
|
+
edits.push({ type: "modify", target_text: del_txt, new_text: "", comment: "Diff: Text deleted", _match_start_index: idx });
|
|
2276
|
+
pending_delete = null;
|
|
2277
|
+
}
|
|
2278
|
+
current_original_index += text.length;
|
|
2279
|
+
} else if (op === -1) {
|
|
2280
|
+
pending_delete = [current_original_index, text];
|
|
2281
|
+
current_original_index += text.length;
|
|
2282
|
+
} else if (op === 1) {
|
|
2283
|
+
if (pending_delete) {
|
|
2284
|
+
const [idx, del_txt] = pending_delete;
|
|
2285
|
+
edits.push({ type: "modify", target_text: del_txt, new_text: text, comment: "Diff: Replacement", _match_start_index: idx });
|
|
2286
|
+
pending_delete = null;
|
|
2287
|
+
} else {
|
|
2288
|
+
edits.push({ type: "modify", target_text: "", new_text: text, comment: "Diff: Text inserted", _match_start_index: current_original_index });
|
|
2289
|
+
}
|
|
2290
|
+
}
|
|
2291
|
+
}
|
|
2292
|
+
if (pending_delete) {
|
|
2293
|
+
const [idx, del_txt] = pending_delete;
|
|
2294
|
+
edits.push({ type: "modify", target_text: del_txt, new_text: "", comment: "Diff: Text deleted", _match_start_index: idx });
|
|
2295
|
+
}
|
|
2296
|
+
return edits;
|
|
2297
|
+
}
|
|
2298
|
+
|
|
2299
|
+
// src/engine.ts
|
|
2300
|
+
function insertAfter(newNode, refNode) {
|
|
2301
|
+
if (refNode.parentNode) {
|
|
2302
|
+
refNode.parentNode.insertBefore(newNode, refNode.nextSibling);
|
|
2303
|
+
}
|
|
2304
|
+
}
|
|
2305
|
+
var BatchValidationError = class extends Error {
|
|
2306
|
+
errors;
|
|
2307
|
+
constructor(errors) {
|
|
2308
|
+
super("Batch validation failed:\n" + errors.join("\n"));
|
|
2309
|
+
this.name = "BatchValidationError";
|
|
2310
|
+
this.errors = errors;
|
|
2311
|
+
}
|
|
2312
|
+
};
|
|
2313
|
+
function validate_edit_strings(edits) {
|
|
2314
|
+
const errors = [];
|
|
2315
|
+
for (let i = 0; i < edits.length; i++) {
|
|
2316
|
+
const edit = edits[i];
|
|
2317
|
+
const t_text = edit.target_text || "";
|
|
2318
|
+
const n_text = edit.new_text || "";
|
|
2319
|
+
if (n_text.includes("{++") || n_text.includes("{--") || n_text.includes("{>>") || n_text.includes("{==")) {
|
|
2320
|
+
errors.push(`- Edit ${i + 1} Failed: Do not manually write CriticMarkup tags ({++, {--, {>>, {==) in \`new_text\`. The engine handles redlining automatically. To add a comment, use the \`comment\` parameter.`);
|
|
2321
|
+
}
|
|
2322
|
+
if (t_text.includes("[^") || n_text.includes("[^")) {
|
|
2323
|
+
const t_fns = (t_text.match(/\[\^(?:fn|en)-[^\]]+\]/g) || []).sort();
|
|
2324
|
+
const n_fns = (n_text.match(/\[\^(?:fn|en)-[^\]]+\]/g) || []).sort();
|
|
2325
|
+
if (JSON.stringify(t_fns) !== JSON.stringify(n_fns)) {
|
|
2326
|
+
if (n_fns.length > t_fns.length || n_fns.some((f) => n_fns.filter((x) => x === f).length > t_fns.filter((x) => x === f).length)) {
|
|
2327
|
+
errors.push(`- Edit ${i + 1} Failed: Cannot insert footnote/endnote markers via text replace. Markers like \`[^fn-N]\` are read-only projections. Use Word's References menu.`);
|
|
2328
|
+
} else {
|
|
2329
|
+
errors.push(`- Edit ${i + 1} Failed: Cannot delete footnote/endnote references via text replace. The marker corresponds to a structural XML element.`);
|
|
2330
|
+
}
|
|
2331
|
+
}
|
|
2332
|
+
}
|
|
2333
|
+
if (t_text.includes("](") || n_text.includes("](")) {
|
|
2334
|
+
const t_links = (t_text.match(/\[(?!~)[^\]]+\]\([^)]+\)/g) || []).sort();
|
|
2335
|
+
const n_links = (n_text.match(/\[(?!~)[^\]]+\]\([^)]+\)/g) || []).sort();
|
|
2336
|
+
if (t_links.length !== n_links.length) {
|
|
2337
|
+
if (n_links.length > t_links.length) {
|
|
2338
|
+
errors.push(`- Edit ${i + 1} Failed: Cannot insert hyperlinks via text replace. Use a dedicated structural operation.`);
|
|
2339
|
+
} else {
|
|
2340
|
+
errors.push(`- Edit ${i + 1} Failed: Cannot delete hyperlinks via text replace. The marker corresponds to a structural XML element.`);
|
|
2341
|
+
}
|
|
2342
|
+
} else if (t_links.length > 1 && JSON.stringify(t_links) !== JSON.stringify(n_links)) {
|
|
2343
|
+
errors.push(`- Edit ${i + 1} Failed: Can only edit or retarget one hyperlink per text replacement. Please split into multiple edits.`);
|
|
2344
|
+
}
|
|
2345
|
+
}
|
|
2346
|
+
if (t_text.includes("[~") || n_text.includes("[~")) {
|
|
2347
|
+
const t_xrefs = t_text.match(/\[~[^~]+~\]\(#[^\)]+\)/g) || [];
|
|
2348
|
+
const n_xrefs = n_text.match(/\[~[^~]+~\]\(#[^\)]+\)/g) || [];
|
|
2349
|
+
if (t_xrefs.length !== n_xrefs.length) {
|
|
2350
|
+
if (n_xrefs.length > t_xrefs.length) {
|
|
2351
|
+
errors.push(`- Edit ${i + 1} Failed: Cannot insert cross-references via text replace. Markers are read-only projections.`);
|
|
2352
|
+
} else {
|
|
2353
|
+
errors.push(`- Edit ${i + 1} Failed: Cannot delete cross-references via text replace. The marker corresponds to a structural XML element.`);
|
|
2354
|
+
}
|
|
2355
|
+
} else {
|
|
2356
|
+
if (JSON.stringify(t_xrefs) !== JSON.stringify(n_xrefs)) {
|
|
2357
|
+
errors.push(`- Edit ${i + 1} Failed: Modifying or retargeting cross-reference markers is disallowed to prevent dependency corruption.`);
|
|
2358
|
+
}
|
|
2359
|
+
}
|
|
2360
|
+
}
|
|
2361
|
+
if (t_text.includes("{#") || n_text.includes("{#")) {
|
|
2362
|
+
const t_anchors = t_text.match(/\{#[^\}]+\}/g) || [];
|
|
2363
|
+
const n_anchors = n_text.match(/\{#[^\}]+\}/g) || [];
|
|
2364
|
+
for (const a of n_anchors) {
|
|
2365
|
+
if (n_anchors.filter((x) => x === a).length > t_anchors.filter((x) => x === a).length) {
|
|
2366
|
+
errors.push(`- Edit ${i + 1} Failed: Cannot modify or insert internal anchor markers (\`{#...}\`). These represent structural XML bookmarks.`);
|
|
2367
|
+
break;
|
|
2368
|
+
}
|
|
2369
|
+
}
|
|
2370
|
+
}
|
|
2371
|
+
if (edit.type === "modify" && n_text) {
|
|
2372
|
+
const lines = n_text.split("\n");
|
|
2373
|
+
for (const line of lines) {
|
|
2374
|
+
const stripped = line.trimStart();
|
|
2375
|
+
if (stripped.startsWith("#######")) {
|
|
2376
|
+
const level = stripped.length - stripped.replace(/^#+/, "").length;
|
|
2377
|
+
if (stripped.substring(level).startsWith(" ") || stripped.substring(level) === "") {
|
|
2378
|
+
errors.push(`- Edit ${i + 1} Failed: Heading level ${level} is not supported (maximum is 6).`);
|
|
2379
|
+
break;
|
|
2380
|
+
}
|
|
2381
|
+
}
|
|
2382
|
+
}
|
|
2383
|
+
}
|
|
2384
|
+
if (t_text.includes("READONLY_BOUNDARY_START") || n_text.includes("READONLY_BOUNDARY_START") || t_text.includes("# Document Structure (Read-Only)") || n_text.includes("# Document Structure (Read-Only)")) {
|
|
2385
|
+
errors.push(`- Edit ${i + 1} Failed: Modification targets the read-only boundary (Structural Appendix). This section cannot be edited.`);
|
|
2386
|
+
}
|
|
2387
|
+
}
|
|
2388
|
+
return errors;
|
|
2389
|
+
}
|
|
2390
|
+
var RedlineEngine = class {
|
|
2391
|
+
doc;
|
|
2392
|
+
author;
|
|
2393
|
+
timestamp;
|
|
2394
|
+
current_id;
|
|
2395
|
+
mapper;
|
|
2396
|
+
comments_manager;
|
|
2397
|
+
clean_mapper = null;
|
|
2398
|
+
skipped_details = [];
|
|
2399
|
+
constructor(doc, author = "Adeu AI (TS)") {
|
|
2400
|
+
this.doc = doc;
|
|
2401
|
+
this.author = author;
|
|
2402
|
+
this.timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/\.\d{3}Z$/, "Z");
|
|
2403
|
+
const w16du_ns = "http://schemas.microsoft.com/office/word/2023/wordml/word16du";
|
|
2404
|
+
for (const part of this.doc.pkg.parts) {
|
|
2405
|
+
if (part === this.doc.part || part.contentType.includes("wordprocessingml") && part.contentType.endsWith("+xml")) {
|
|
2406
|
+
if (!part._element.hasAttribute("xmlns:w16du")) {
|
|
2407
|
+
part._element.setAttribute("xmlns:w16du", w16du_ns);
|
|
2408
|
+
}
|
|
2409
|
+
}
|
|
2410
|
+
}
|
|
2411
|
+
this.current_id = this._scan_existing_ids();
|
|
2412
|
+
this.mapper = new DocumentMapper(this.doc);
|
|
2413
|
+
this.comments_manager = new CommentsManager(this.doc);
|
|
2414
|
+
}
|
|
2415
|
+
_scan_existing_ids() {
|
|
2416
|
+
let maxId = 0;
|
|
2417
|
+
for (const tag of ["w:ins", "w:del"]) {
|
|
2418
|
+
const elements = findAllDescendants(this.doc.element, tag);
|
|
2419
|
+
for (const el of elements) {
|
|
2420
|
+
const val = parseInt(el.getAttribute("w:id") || "0", 10);
|
|
2421
|
+
if (!isNaN(val) && val > maxId) maxId = val;
|
|
2422
|
+
}
|
|
2423
|
+
}
|
|
2424
|
+
return maxId;
|
|
2425
|
+
}
|
|
2426
|
+
accept_all_revisions() {
|
|
2427
|
+
const dels = findAllDescendants(this.doc.element, "w:del");
|
|
2428
|
+
for (const d of dels) {
|
|
2429
|
+
const parent = d.parentNode;
|
|
2430
|
+
if (parent?.tagName === "w:trPr") {
|
|
2431
|
+
const tr = parent.parentNode;
|
|
2432
|
+
tr?.parentNode?.removeChild(tr);
|
|
2433
|
+
} else {
|
|
2434
|
+
parent?.removeChild(d);
|
|
2435
|
+
}
|
|
2436
|
+
}
|
|
2437
|
+
const insNodes = findAllDescendants(this.doc.element, "w:ins");
|
|
2438
|
+
for (const i of insNodes) {
|
|
2439
|
+
const parent = i.parentNode;
|
|
2440
|
+
if (parent?.tagName === "w:trPr") {
|
|
2441
|
+
parent.removeChild(i);
|
|
2442
|
+
} else {
|
|
2443
|
+
while (i.firstChild) parent?.insertBefore(i.firstChild, i);
|
|
2444
|
+
parent?.removeChild(i);
|
|
2445
|
+
}
|
|
2446
|
+
}
|
|
2447
|
+
}
|
|
2448
|
+
_getNextId() {
|
|
2449
|
+
this.current_id++;
|
|
2450
|
+
return this.current_id.toString();
|
|
2451
|
+
}
|
|
2452
|
+
_create_track_change_tag(tagName, author = "", reuseId = null) {
|
|
2453
|
+
const xmlDoc = this.doc.part._element.ownerDocument;
|
|
2454
|
+
const tag = xmlDoc.createElement(tagName);
|
|
2455
|
+
const wid = reuseId !== null ? reuseId : this._getNextId();
|
|
2456
|
+
tag.setAttribute("w:id", wid);
|
|
2457
|
+
tag.setAttribute("w:author", author || this.author);
|
|
2458
|
+
tag.setAttribute("w:date", this.timestamp);
|
|
2459
|
+
tag.setAttribute("w16du:dateUtc", this.timestamp);
|
|
2460
|
+
return tag;
|
|
2461
|
+
}
|
|
2462
|
+
_set_text_content(element, text) {
|
|
2463
|
+
element.textContent = text;
|
|
2464
|
+
if (text.trim() !== text) {
|
|
2465
|
+
element.setAttribute("xml:space", "preserve");
|
|
2466
|
+
}
|
|
2467
|
+
}
|
|
2468
|
+
_parse_markdown_style(text) {
|
|
2469
|
+
const stripped_text = text.trimStart();
|
|
2470
|
+
if (stripped_text.startsWith("#")) {
|
|
2471
|
+
let level = 0;
|
|
2472
|
+
let temp = stripped_text;
|
|
2473
|
+
while (temp.startsWith("#")) {
|
|
2474
|
+
level++;
|
|
2475
|
+
temp = temp.substring(1);
|
|
2476
|
+
}
|
|
2477
|
+
if (temp.startsWith(" ")) return [temp.trim(), `Heading ${level}`];
|
|
2478
|
+
}
|
|
2479
|
+
if (stripped_text.startsWith("* ") || stripped_text.startsWith("- ")) {
|
|
2480
|
+
return [stripped_text.substring(2).trim(), "List Paragraph"];
|
|
2481
|
+
}
|
|
2482
|
+
const match = stripped_text.match(/^\d+\.\s+/);
|
|
2483
|
+
if (match) {
|
|
2484
|
+
return [stripped_text.substring(match[0].length).trim(), "List Number"];
|
|
2485
|
+
}
|
|
2486
|
+
return [text, null];
|
|
2487
|
+
}
|
|
2488
|
+
_parse_inline_markdown(text, baseStyle = {}) {
|
|
2489
|
+
if (!text) return [];
|
|
2490
|
+
const tokenPattern = /(\*\*.*?\*\*)|(_.*?_)/;
|
|
2491
|
+
const match = text.match(tokenPattern);
|
|
2492
|
+
if (!match) return [[text, baseStyle]];
|
|
2493
|
+
const start = match.index;
|
|
2494
|
+
const raw = match[0];
|
|
2495
|
+
const end = start + raw.length;
|
|
2496
|
+
const isBold = raw.startsWith("**");
|
|
2497
|
+
const innerContent = isBold ? raw.substring(2, raw.length - 2) : raw.substring(1, raw.length - 1);
|
|
2498
|
+
const preText = text.substring(0, start);
|
|
2499
|
+
const postText = text.substring(end);
|
|
2500
|
+
const results = [];
|
|
2501
|
+
if (preText) results.push([preText, baseStyle]);
|
|
2502
|
+
const newStyle = { ...baseStyle };
|
|
2503
|
+
if (isBold) newStyle.bold = true;
|
|
2504
|
+
else newStyle.italic = true;
|
|
2505
|
+
results.push(...this._parse_inline_markdown(innerContent, newStyle));
|
|
2506
|
+
results.push(...this._parse_inline_markdown(postText, baseStyle));
|
|
2507
|
+
return results;
|
|
2508
|
+
}
|
|
2509
|
+
_apply_run_props(runElement, props, suppressInherited = false) {
|
|
2510
|
+
if (!props) {
|
|
2511
|
+
if (!suppressInherited) return;
|
|
2512
|
+
props = {};
|
|
2513
|
+
}
|
|
2514
|
+
let rPr = findChild(runElement, "w:rPr");
|
|
2515
|
+
if (!rPr && (props.bold || props.italic || suppressInherited)) {
|
|
2516
|
+
const doc = runElement.ownerDocument;
|
|
2517
|
+
rPr = doc.createElement("w:rPr");
|
|
2518
|
+
runElement.appendChild(rPr);
|
|
2519
|
+
}
|
|
2520
|
+
if (rPr) {
|
|
2521
|
+
const doc = runElement.ownerDocument;
|
|
2522
|
+
if (props.bold) {
|
|
2523
|
+
let b = findChild(rPr, "w:b");
|
|
2524
|
+
if (!b) {
|
|
2525
|
+
b = doc.createElement("w:b");
|
|
2526
|
+
rPr.appendChild(b);
|
|
2527
|
+
}
|
|
2528
|
+
b.setAttribute("w:val", "1");
|
|
2529
|
+
} else if (suppressInherited) {
|
|
2530
|
+
const b = findChild(rPr, "w:b");
|
|
2531
|
+
if (b) rPr.removeChild(b);
|
|
2532
|
+
}
|
|
2533
|
+
if (props.italic) {
|
|
2534
|
+
let i = findChild(rPr, "w:i");
|
|
2535
|
+
if (!i) {
|
|
2536
|
+
i = doc.createElement("w:i");
|
|
2537
|
+
rPr.appendChild(i);
|
|
2538
|
+
}
|
|
2539
|
+
i.setAttribute("w:val", "1");
|
|
2540
|
+
} else if (suppressInherited) {
|
|
2541
|
+
const i = findChild(rPr, "w:i");
|
|
2542
|
+
if (i) rPr.removeChild(i);
|
|
2543
|
+
}
|
|
2544
|
+
}
|
|
2545
|
+
}
|
|
2546
|
+
validate_edits(edits) {
|
|
2547
|
+
const errors = [];
|
|
2548
|
+
if (!this.mapper.full_text) this.mapper["_build_map"]();
|
|
2549
|
+
errors.push(...validate_edit_strings(edits));
|
|
2550
|
+
for (let i = 0; i < edits.length; i++) {
|
|
2551
|
+
const edit = edits[i];
|
|
2552
|
+
if (!edit.target_text) continue;
|
|
2553
|
+
let matches = this.mapper.find_all_match_indices(edit.target_text);
|
|
2554
|
+
let activeText = this.mapper.full_text;
|
|
2555
|
+
if (matches.length === 0) {
|
|
2556
|
+
if (!this.clean_mapper) this.clean_mapper = new DocumentMapper(this.doc, true);
|
|
2557
|
+
matches = this.clean_mapper.find_all_match_indices(edit.target_text);
|
|
2558
|
+
if (matches.length > 0) activeText = this.clean_mapper.full_text;
|
|
2559
|
+
}
|
|
2560
|
+
if (matches.length === 0) {
|
|
2561
|
+
errors.push(`- Edit ${i + 1} Failed: Target text not found in document:
|
|
2562
|
+
"${edit.target_text}"`);
|
|
2563
|
+
} else if (matches.length > 1) {
|
|
2564
|
+
errors.push(`- Edit ${i + 1} Failed: Target text is ambiguous. Found ${matches.length} matches.
|
|
2565
|
+
Provide more context.`);
|
|
2566
|
+
}
|
|
2567
|
+
for (const [start, length] of matches) {
|
|
2568
|
+
const spans = this.mapper.spans.filter((s) => s.end > start && s.start < start + length);
|
|
2569
|
+
const nestedAuthors = /* @__PURE__ */ new Set();
|
|
2570
|
+
for (const s of spans) {
|
|
2571
|
+
if (s.ins_id) {
|
|
2572
|
+
const insNodes = findAllDescendants(this.doc.element, "w:ins").filter((n) => n.getAttribute("w:id") === s.ins_id);
|
|
2573
|
+
if (insNodes.length > 0) {
|
|
2574
|
+
const auth = insNodes[0].getAttribute("w:author");
|
|
2575
|
+
if (auth && auth !== this.author) nestedAuthors.add(auth);
|
|
2576
|
+
}
|
|
2577
|
+
}
|
|
2578
|
+
}
|
|
2579
|
+
if (nestedAuthors.size > 0) {
|
|
2580
|
+
errors.push(`- Edit ${i + 1} Failed: Modification targets an active insertion from another author (${Array.from(nestedAuthors).join(", ")}).`);
|
|
2581
|
+
}
|
|
2582
|
+
}
|
|
2583
|
+
}
|
|
2584
|
+
return errors;
|
|
2585
|
+
}
|
|
2586
|
+
process_batch(changes) {
|
|
2587
|
+
this.skipped_details = [];
|
|
2588
|
+
const actions = changes.filter((c) => ["accept", "reject", "reply"].includes(c.type));
|
|
2589
|
+
const edits = changes.filter((c) => !["accept", "reject", "reply"].includes(c.type));
|
|
2590
|
+
let applied_actions = 0, skipped_actions = 0;
|
|
2591
|
+
if (actions.length > 0) {
|
|
2592
|
+
const res = this.apply_review_actions(actions);
|
|
2593
|
+
applied_actions = res[0];
|
|
2594
|
+
skipped_actions = res[1];
|
|
2595
|
+
if (applied_actions > 0) {
|
|
2596
|
+
this.mapper["_build_map"]();
|
|
2597
|
+
if (this.clean_mapper) this.clean_mapper["_build_map"]();
|
|
2598
|
+
}
|
|
2599
|
+
}
|
|
2600
|
+
if (edits.length > 0) {
|
|
2601
|
+
const errors = this.validate_edits(edits);
|
|
2602
|
+
if (errors.length > 0) throw new BatchValidationError(errors);
|
|
2603
|
+
}
|
|
2604
|
+
let applied_edits = 0, skipped_edits = 0;
|
|
2605
|
+
if (edits.length > 0) {
|
|
2606
|
+
const res = this.apply_edits(edits);
|
|
2607
|
+
applied_edits = res[0];
|
|
2608
|
+
skipped_edits = res[1];
|
|
2609
|
+
}
|
|
2610
|
+
return {
|
|
2611
|
+
actions_applied: applied_actions,
|
|
2612
|
+
actions_skipped: skipped_actions,
|
|
2613
|
+
edits_applied: applied_edits,
|
|
2614
|
+
edits_skipped: skipped_edits,
|
|
2615
|
+
skipped_details: this.skipped_details
|
|
2616
|
+
};
|
|
2617
|
+
}
|
|
2618
|
+
apply_edits(edits) {
|
|
2619
|
+
let applied = 0;
|
|
2620
|
+
let skipped = 0;
|
|
2621
|
+
const resolved_edits = [];
|
|
2622
|
+
for (const edit of edits) {
|
|
2623
|
+
if (edit._match_start_index !== void 0 && edit._match_start_index !== null) {
|
|
2624
|
+
resolved_edits.push([edit, edit.new_text || null]);
|
|
2625
|
+
} else if (edit.type === "insert_row" || edit.type === "delete_row") {
|
|
2626
|
+
const [idx] = this.mapper.find_match_index(edit.target_text);
|
|
2627
|
+
if (idx !== -1) {
|
|
2628
|
+
edit._match_start_index = idx;
|
|
2629
|
+
resolved_edits.push([edit, null]);
|
|
2630
|
+
} else {
|
|
2631
|
+
skipped++;
|
|
2632
|
+
this.skipped_details.push(`- Failed to locate row target: '${(edit.target_text || "").substring(0, 40)}...'`);
|
|
2633
|
+
}
|
|
2634
|
+
} else {
|
|
2635
|
+
const resolved = this._pre_resolve_heuristic_edit(edit);
|
|
2636
|
+
if (resolved) {
|
|
2637
|
+
if (Array.isArray(resolved)) {
|
|
2638
|
+
for (const r of resolved) resolved_edits.push([r, r.new_text]);
|
|
2639
|
+
} else {
|
|
2640
|
+
resolved_edits.push([resolved, resolved.new_text]);
|
|
2641
|
+
}
|
|
2642
|
+
} else {
|
|
2643
|
+
skipped++;
|
|
2644
|
+
this.skipped_details.push(`- Failed to apply edit targeting: '${(edit.target_text || "insertion").substring(0, 40)}...'`);
|
|
2645
|
+
}
|
|
2646
|
+
}
|
|
2647
|
+
}
|
|
2648
|
+
resolved_edits.sort((a, b) => (b[0]._match_start_index || 0) - (a[0]._match_start_index || 0));
|
|
2649
|
+
const occupied_ranges = [];
|
|
2650
|
+
for (const [edit, orig_new] of resolved_edits) {
|
|
2651
|
+
const start = edit._match_start_index || 0;
|
|
2652
|
+
const end = start + (edit.target_text ? edit.target_text.length : 0);
|
|
2653
|
+
const overlaps = occupied_ranges.some(([occ_start, occ_end]) => start < occ_end && end > occ_start);
|
|
2654
|
+
if (overlaps) {
|
|
2655
|
+
skipped++;
|
|
2656
|
+
this.skipped_details.push(`- Skipped overlapping edit targeting: '${(edit.target_text || "insertion").substring(0, 40)}...'`);
|
|
2657
|
+
continue;
|
|
2658
|
+
}
|
|
2659
|
+
let success = false;
|
|
2660
|
+
if (edit.type === "modify") {
|
|
2661
|
+
success = this._apply_single_edit_indexed(edit, orig_new, false);
|
|
2662
|
+
} else if (edit.type === "insert_row" || edit.type === "delete_row") {
|
|
2663
|
+
success = this._apply_table_edit(edit, false);
|
|
2664
|
+
}
|
|
2665
|
+
if (success) {
|
|
2666
|
+
applied++;
|
|
2667
|
+
occupied_ranges.push([start, end]);
|
|
2668
|
+
} else {
|
|
2669
|
+
skipped++;
|
|
2670
|
+
this.skipped_details.push(`- Failed to apply edit targeting: '${(edit.target_text || "insertion").substring(0, 40)}...'`);
|
|
2671
|
+
}
|
|
2672
|
+
}
|
|
2673
|
+
return [applied, skipped];
|
|
2674
|
+
}
|
|
2675
|
+
apply_review_actions(actions) {
|
|
2676
|
+
let applied = 0;
|
|
2677
|
+
let skipped = 0;
|
|
2678
|
+
for (const action of actions) {
|
|
2679
|
+
const type = action.type;
|
|
2680
|
+
if (type === "reply") {
|
|
2681
|
+
const cid = action.target_id.replace("Com:", "");
|
|
2682
|
+
this.comments_manager.addComment(this.author, action.text, cid);
|
|
2683
|
+
applied++;
|
|
2684
|
+
continue;
|
|
2685
|
+
}
|
|
2686
|
+
const target_id = action.target_id.replace("Chg:", "");
|
|
2687
|
+
const all_ins = findAllDescendants(this.doc.element, "w:ins").filter((n) => n.getAttribute("w:id") === target_id);
|
|
2688
|
+
const all_del = findAllDescendants(this.doc.element, "w:del").filter((n) => n.getAttribute("w:id") === target_id);
|
|
2689
|
+
const all_nodes = [...all_ins, ...all_del];
|
|
2690
|
+
if (all_nodes.length === 0) {
|
|
2691
|
+
skipped++;
|
|
2692
|
+
this.skipped_details.push(`- Failed to apply action: Target ID ${action.target_id} not found.`);
|
|
2693
|
+
continue;
|
|
2694
|
+
}
|
|
2695
|
+
for (const node of all_nodes) {
|
|
2696
|
+
const is_ins = node.tagName === "w:ins";
|
|
2697
|
+
const parent_tag = node.parentNode ? node.parentNode.tagName : "";
|
|
2698
|
+
const is_trPr = parent_tag === "w:trPr";
|
|
2699
|
+
if (type === "accept") {
|
|
2700
|
+
if (is_ins) {
|
|
2701
|
+
if (is_trPr) node.parentNode?.removeChild(node);
|
|
2702
|
+
else {
|
|
2703
|
+
while (node.firstChild) node.parentNode?.insertBefore(node.firstChild, node);
|
|
2704
|
+
node.parentNode?.removeChild(node);
|
|
2705
|
+
}
|
|
2706
|
+
} else {
|
|
2707
|
+
if (is_trPr) {
|
|
2708
|
+
const tr = node.parentNode?.parentNode;
|
|
2709
|
+
tr?.parentNode?.removeChild(tr);
|
|
2710
|
+
} else {
|
|
2711
|
+
node.parentNode?.removeChild(node);
|
|
2712
|
+
}
|
|
2713
|
+
}
|
|
2714
|
+
} else if (type === "reject") {
|
|
2715
|
+
if (is_ins) {
|
|
2716
|
+
if (is_trPr) {
|
|
2717
|
+
const tr = node.parentNode?.parentNode;
|
|
2718
|
+
tr?.parentNode?.removeChild(tr);
|
|
2719
|
+
} else node.parentNode?.removeChild(node);
|
|
2720
|
+
} else {
|
|
2721
|
+
if (is_trPr) node.parentNode?.removeChild(node);
|
|
2722
|
+
else {
|
|
2723
|
+
const delTexts = Array.from(node.getElementsByTagName("w:delText"));
|
|
2724
|
+
for (const dt of delTexts) {
|
|
2725
|
+
const t = dt.ownerDocument.createElement("w:t");
|
|
2726
|
+
t.textContent = dt.textContent;
|
|
2727
|
+
if (dt.hasAttribute("xml:space")) t.setAttribute("xml:space", "preserve");
|
|
2728
|
+
dt.parentNode?.replaceChild(t, dt);
|
|
2729
|
+
}
|
|
2730
|
+
while (node.firstChild) node.parentNode?.insertBefore(node.firstChild, node);
|
|
2731
|
+
node.parentNode?.removeChild(node);
|
|
2732
|
+
}
|
|
2733
|
+
}
|
|
2734
|
+
}
|
|
2735
|
+
}
|
|
2736
|
+
applied++;
|
|
2737
|
+
}
|
|
2738
|
+
return [applied, skipped];
|
|
2739
|
+
}
|
|
2740
|
+
_apply_table_edit(edit, rebuild_map) {
|
|
2741
|
+
const start_idx = edit._match_start_index || 0;
|
|
2742
|
+
const [anchor_run, anchor_para] = this.mapper.get_insertion_anchor(start_idx, rebuild_map);
|
|
2743
|
+
let target_element = null;
|
|
2744
|
+
if (anchor_run) target_element = anchor_run._element;
|
|
2745
|
+
else if (anchor_para) target_element = anchor_para._element;
|
|
2746
|
+
if (!target_element) return false;
|
|
2747
|
+
let tr = target_element;
|
|
2748
|
+
while (tr && tr.tagName !== "w:tr") tr = tr.parentNode;
|
|
2749
|
+
if (!tr) return false;
|
|
2750
|
+
if (edit.type === "delete_row") {
|
|
2751
|
+
let trPr = findChild(tr, "w:trPr");
|
|
2752
|
+
if (!trPr) {
|
|
2753
|
+
trPr = tr.ownerDocument.createElement("w:trPr");
|
|
2754
|
+
tr.insertBefore(trPr, tr.firstChild);
|
|
2755
|
+
}
|
|
2756
|
+
trPr.appendChild(this._create_track_change_tag("w:del"));
|
|
2757
|
+
return true;
|
|
2758
|
+
} else if (edit.type === "insert_row") {
|
|
2759
|
+
const new_tr = tr.ownerDocument.createElement("w:tr");
|
|
2760
|
+
const trPr = tr.ownerDocument.createElement("w:trPr");
|
|
2761
|
+
new_tr.appendChild(trPr);
|
|
2762
|
+
trPr.appendChild(this._create_track_change_tag("w:ins"));
|
|
2763
|
+
for (const cellText of edit.cells) {
|
|
2764
|
+
const tc = tr.ownerDocument.createElement("w:tc");
|
|
2765
|
+
const p = tr.ownerDocument.createElement("w:p");
|
|
2766
|
+
const r = tr.ownerDocument.createElement("w:r");
|
|
2767
|
+
const t = tr.ownerDocument.createElement("w:t");
|
|
2768
|
+
t.textContent = cellText;
|
|
2769
|
+
if (cellText.trim() !== cellText) t.setAttribute("xml:space", "preserve");
|
|
2770
|
+
r.appendChild(t);
|
|
2771
|
+
p.appendChild(r);
|
|
2772
|
+
tc.appendChild(p);
|
|
2773
|
+
new_tr.appendChild(tc);
|
|
2774
|
+
}
|
|
2775
|
+
if (edit.position === "above") tr.parentNode?.insertBefore(new_tr, tr);
|
|
2776
|
+
else insertAfter(new_tr, tr);
|
|
2777
|
+
return true;
|
|
2778
|
+
}
|
|
2779
|
+
return false;
|
|
2780
|
+
}
|
|
2781
|
+
_pre_resolve_heuristic_edit(edit) {
|
|
2782
|
+
if (!edit.target_text) return null;
|
|
2783
|
+
let [start_idx, match_len] = this.mapper.find_match_index(edit.target_text);
|
|
2784
|
+
let use_clean_map = false;
|
|
2785
|
+
if (start_idx === -1) {
|
|
2786
|
+
if (!this.clean_mapper) this.clean_mapper = new DocumentMapper(this.doc, true);
|
|
2787
|
+
[start_idx, match_len] = this.clean_mapper.find_match_index(edit.target_text);
|
|
2788
|
+
if (start_idx !== -1) use_clean_map = true;
|
|
2789
|
+
else return null;
|
|
2790
|
+
}
|
|
2791
|
+
const active_mapper = use_clean_map ? this.clean_mapper : this.mapper;
|
|
2792
|
+
const effective_new_text = edit.new_text || "";
|
|
2793
|
+
const actual_doc_text = this.mapper.full_text.substring(start_idx, start_idx + match_len);
|
|
2794
|
+
if (actual_doc_text === effective_new_text || edit.target_text === effective_new_text) {
|
|
2795
|
+
return {
|
|
2796
|
+
type: "modify",
|
|
2797
|
+
target_text: actual_doc_text,
|
|
2798
|
+
new_text: actual_doc_text,
|
|
2799
|
+
comment: edit.comment,
|
|
2800
|
+
_match_start_index: start_idx,
|
|
2801
|
+
_internal_op: "COMMENT_ONLY",
|
|
2802
|
+
_active_mapper_ref: active_mapper
|
|
2803
|
+
};
|
|
2804
|
+
}
|
|
2805
|
+
let effective_op = "";
|
|
2806
|
+
let final_target = "";
|
|
2807
|
+
let final_new = "";
|
|
2808
|
+
let effective_start_idx = start_idx;
|
|
2809
|
+
if (effective_new_text.startsWith(actual_doc_text)) {
|
|
2810
|
+
effective_op = "INSERTION";
|
|
2811
|
+
final_new = effective_new_text.substring(actual_doc_text.length);
|
|
2812
|
+
effective_start_idx = start_idx + match_len;
|
|
2813
|
+
} else {
|
|
2814
|
+
const [prefix_len, suffix_len] = trim_common_context(actual_doc_text, effective_new_text);
|
|
2815
|
+
const t_end = actual_doc_text.length - suffix_len;
|
|
2816
|
+
const n_end = effective_new_text.length - suffix_len;
|
|
2817
|
+
final_target = actual_doc_text.substring(prefix_len, t_end);
|
|
2818
|
+
final_new = effective_new_text.substring(prefix_len, n_end);
|
|
2819
|
+
effective_start_idx = start_idx + prefix_len;
|
|
2820
|
+
if (!final_target && final_new) effective_op = "INSERTION";
|
|
2821
|
+
else if (final_target && !final_new) effective_op = "DELETION";
|
|
2822
|
+
else if (final_target && final_new) effective_op = "MODIFICATION";
|
|
2823
|
+
else effective_op = "COMMENT_ONLY";
|
|
2824
|
+
}
|
|
2825
|
+
return {
|
|
2826
|
+
type: "modify",
|
|
2827
|
+
target_text: final_target,
|
|
2828
|
+
new_text: final_new,
|
|
2829
|
+
comment: edit.comment,
|
|
2830
|
+
_match_start_index: effective_start_idx,
|
|
2831
|
+
_internal_op: effective_op,
|
|
2832
|
+
_active_mapper_ref: active_mapper
|
|
2833
|
+
};
|
|
2834
|
+
}
|
|
2835
|
+
_apply_single_edit_indexed(edit, orig_new, rebuild_map) {
|
|
2836
|
+
let op = edit._internal_op;
|
|
2837
|
+
const active_mapper = edit._active_mapper_ref || this.mapper;
|
|
2838
|
+
const start_idx = edit._match_start_index || 0;
|
|
2839
|
+
const length = edit.target_text ? edit.target_text.length : 0;
|
|
2840
|
+
const del_id = ["DELETION", "MODIFICATION"].includes(op) ? this._getNextId() : null;
|
|
2841
|
+
const ins_id = ["INSERTION", "MODIFICATION"].includes(op) ? this._getNextId() : null;
|
|
2842
|
+
if (op === "COMMENT_ONLY") {
|
|
2843
|
+
return true;
|
|
2844
|
+
}
|
|
2845
|
+
if (op === "INSERTION") {
|
|
2846
|
+
const [anchor_run, anchor_para] = active_mapper.get_insertion_anchor(start_idx, rebuild_map);
|
|
2847
|
+
if (!anchor_run && !anchor_para) return false;
|
|
2848
|
+
const xmlDoc = this.doc.part._element.ownerDocument;
|
|
2849
|
+
const ins = this._create_track_change_tag("w:ins", "", ins_id);
|
|
2850
|
+
const segments = this._parse_inline_markdown(edit.new_text || "");
|
|
2851
|
+
for (const [segText, segProps] of segments) {
|
|
2852
|
+
const r = xmlDoc.createElement("w:r");
|
|
2853
|
+
this._apply_run_props(r, segProps, false);
|
|
2854
|
+
const t = xmlDoc.createElement("w:t");
|
|
2855
|
+
this._set_text_content(t, segText);
|
|
2856
|
+
r.appendChild(t);
|
|
2857
|
+
ins.appendChild(r);
|
|
2858
|
+
}
|
|
2859
|
+
if (anchor_run) {
|
|
2860
|
+
insertAfter(ins, anchor_run._element);
|
|
2861
|
+
} else if (anchor_para) {
|
|
2862
|
+
anchor_para._element.appendChild(ins);
|
|
2863
|
+
}
|
|
2864
|
+
return true;
|
|
2865
|
+
}
|
|
2866
|
+
const target_runs = active_mapper.find_target_runs_by_index(start_idx, length, rebuild_map);
|
|
2867
|
+
if (target_runs.length === 0) return false;
|
|
2868
|
+
let last_del = null;
|
|
2869
|
+
for (const run of target_runs) {
|
|
2870
|
+
const del_tag = this._create_track_change_tag("w:del", "", del_id);
|
|
2871
|
+
const new_run = run._element.cloneNode(true);
|
|
2872
|
+
const tNodes = Array.from(new_run.getElementsByTagName("w:t"));
|
|
2873
|
+
tNodes.forEach((t) => {
|
|
2874
|
+
const delText = new_run.ownerDocument.createElement("w:delText");
|
|
2875
|
+
delText.textContent = t.textContent;
|
|
2876
|
+
if (t.hasAttribute("xml:space")) delText.setAttribute("xml:space", "preserve");
|
|
2877
|
+
new_run.replaceChild(delText, t);
|
|
2878
|
+
});
|
|
2879
|
+
del_tag.appendChild(new_run);
|
|
2880
|
+
run._element.parentNode?.replaceChild(del_tag, run._element);
|
|
2881
|
+
last_del = del_tag;
|
|
2882
|
+
}
|
|
2883
|
+
if (op === "MODIFICATION" && edit.new_text && last_del) {
|
|
2884
|
+
const xmlDoc = this.doc.part._element.ownerDocument;
|
|
2885
|
+
const ins = this._create_track_change_tag("w:ins", "", ins_id);
|
|
2886
|
+
const segments = this._parse_inline_markdown(edit.new_text);
|
|
2887
|
+
for (const [segText, segProps] of segments) {
|
|
2888
|
+
const r = xmlDoc.createElement("w:r");
|
|
2889
|
+
this._apply_run_props(r, segProps, false);
|
|
2890
|
+
const t = xmlDoc.createElement("w:t");
|
|
2891
|
+
this._set_text_content(t, segText);
|
|
2892
|
+
r.appendChild(t);
|
|
2893
|
+
ins.appendChild(r);
|
|
2894
|
+
}
|
|
2895
|
+
insertAfter(ins, last_del);
|
|
2896
|
+
}
|
|
2897
|
+
return true;
|
|
2898
|
+
}
|
|
2899
|
+
};
|
|
2900
|
+
|
|
2901
|
+
// src/markup.ts
|
|
2902
|
+
function _should_strip_markers(text, marker) {
|
|
2903
|
+
if (!text.startsWith(marker) || !text.endsWith(marker)) return false;
|
|
2904
|
+
if (text.length < marker.length * 2) return false;
|
|
2905
|
+
const inner = text.substring(marker.length, text.length - marker.length);
|
|
2906
|
+
if (!inner) return false;
|
|
2907
|
+
if (inner.includes(marker)) return false;
|
|
2908
|
+
if (!/[a-zA-Z]/.test(inner)) return false;
|
|
2909
|
+
if (marker === "__" && /^\w+$/.test(inner)) return false;
|
|
2910
|
+
if (marker === "_") {
|
|
2911
|
+
if (inner.includes("_")) return false;
|
|
2912
|
+
if (/^[0-9_]+$/.test(inner)) return false;
|
|
2913
|
+
}
|
|
2914
|
+
return true;
|
|
2915
|
+
}
|
|
2916
|
+
function _strip_balanced_markers(text) {
|
|
2917
|
+
let prefix_markup = "";
|
|
2918
|
+
let suffix_markup = "";
|
|
2919
|
+
let clean_text = text;
|
|
2920
|
+
const markers = ["**", "__", "_", "*"];
|
|
2921
|
+
for (const marker of markers) {
|
|
2922
|
+
if (_should_strip_markers(clean_text, marker)) {
|
|
2923
|
+
prefix_markup += marker;
|
|
2924
|
+
suffix_markup = marker + suffix_markup;
|
|
2925
|
+
clean_text = clean_text.substring(marker.length, clean_text.length - marker.length);
|
|
2926
|
+
break;
|
|
2927
|
+
}
|
|
2928
|
+
}
|
|
2929
|
+
return [prefix_markup, clean_text, suffix_markup];
|
|
2930
|
+
}
|
|
2931
|
+
function _replace_smart_quotes(text) {
|
|
2932
|
+
return text.replace(/“/g, '"').replace(/”/g, '"').replace(/‘/g, "'").replace(/’/g, "'");
|
|
2933
|
+
}
|
|
2934
|
+
function _find_safe_boundaries(text, start, end) {
|
|
2935
|
+
let new_start = start;
|
|
2936
|
+
let new_end = end;
|
|
2937
|
+
const expand_if_unbalanced = (marker) => {
|
|
2938
|
+
const current_match = text.substring(new_start, new_end);
|
|
2939
|
+
const count = (current_match.match(new RegExp(marker.replace(/\*/g, "\\*"), "g")) || []).length;
|
|
2940
|
+
if (count % 2 !== 0) {
|
|
2941
|
+
const suffix = text.substring(new_end);
|
|
2942
|
+
if (suffix.startsWith(marker)) {
|
|
2943
|
+
new_end += marker.length;
|
|
2944
|
+
return;
|
|
2945
|
+
}
|
|
2946
|
+
const prefix = text.substring(0, new_start);
|
|
2947
|
+
if (prefix.endsWith(marker)) {
|
|
2948
|
+
new_start -= marker.length;
|
|
2949
|
+
return;
|
|
2950
|
+
}
|
|
2951
|
+
}
|
|
2952
|
+
};
|
|
2953
|
+
for (let i = 0; i < 2; i++) {
|
|
2954
|
+
expand_if_unbalanced("**");
|
|
2955
|
+
expand_if_unbalanced("__");
|
|
2956
|
+
expand_if_unbalanced("_");
|
|
2957
|
+
expand_if_unbalanced("*");
|
|
2958
|
+
}
|
|
2959
|
+
return [new_start, new_end];
|
|
2960
|
+
}
|
|
2961
|
+
function _refine_match_boundaries(text, start, end) {
|
|
2962
|
+
const markers = ["**", "__", "*", "_"];
|
|
2963
|
+
let current_text = text.substring(start, end);
|
|
2964
|
+
let best_start = start;
|
|
2965
|
+
let best_end = end;
|
|
2966
|
+
const countMarker = (str, mk) => (str.match(new RegExp(mk.replace(/\*/g, "\\*"), "g")) || []).length;
|
|
2967
|
+
for (const marker of markers) {
|
|
2968
|
+
if (current_text.startsWith(marker)) {
|
|
2969
|
+
const current_score = countMarker(current_text, marker) % 2;
|
|
2970
|
+
const trimmed_text = current_text.substring(marker.length);
|
|
2971
|
+
const trimmed_score = countMarker(trimmed_text, marker) % 2;
|
|
2972
|
+
if (current_score === 1 && trimmed_score === 0) {
|
|
2973
|
+
best_start += marker.length;
|
|
2974
|
+
current_text = trimmed_text;
|
|
2975
|
+
}
|
|
2976
|
+
}
|
|
2977
|
+
}
|
|
2978
|
+
for (const marker of markers) {
|
|
2979
|
+
if (current_text.endsWith(marker)) {
|
|
2980
|
+
const current_score = countMarker(current_text, marker) % 2;
|
|
2981
|
+
const trimmed_text = current_text.substring(0, current_text.length - marker.length);
|
|
2982
|
+
const trimmed_score = countMarker(trimmed_text, marker) % 2;
|
|
2983
|
+
if (current_score === 1 && trimmed_score === 0) {
|
|
2984
|
+
best_end -= marker.length;
|
|
2985
|
+
current_text = trimmed_text;
|
|
2986
|
+
}
|
|
2987
|
+
}
|
|
2988
|
+
}
|
|
2989
|
+
return [best_start, best_end];
|
|
2990
|
+
}
|
|
2991
|
+
function _make_fuzzy_regex(target_text) {
|
|
2992
|
+
target_text = _replace_smart_quotes(target_text);
|
|
2993
|
+
const parts = [];
|
|
2994
|
+
const token_pattern = /(_+)|(\s+)|(['"])|([.,;:\/])/g;
|
|
2995
|
+
const md_noise = "[*_]*";
|
|
2996
|
+
const structural_noise = "(?:\\s*(?:[*+\\->]|\\d+\\.)\\s+|\\s*\\n\\s*)";
|
|
2997
|
+
const start_list_marker = "(?:[ \\t]*(?:[*+\\->]|\\d+\\.)\\s+)?";
|
|
2998
|
+
parts.push(start_list_marker);
|
|
2999
|
+
parts.push(md_noise);
|
|
3000
|
+
let last_idx = 0;
|
|
3001
|
+
let match;
|
|
3002
|
+
const escapeRegExp = (str) => str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
3003
|
+
while ((match = token_pattern.exec(target_text)) !== null) {
|
|
3004
|
+
const literal = target_text.substring(last_idx, match.index);
|
|
3005
|
+
if (literal) {
|
|
3006
|
+
parts.push(escapeRegExp(literal));
|
|
3007
|
+
parts.push(md_noise);
|
|
3008
|
+
}
|
|
3009
|
+
const g_underscore = match[1];
|
|
3010
|
+
const g_space = match[2];
|
|
3011
|
+
const g_quote = match[3];
|
|
3012
|
+
const g_punct = match[4];
|
|
3013
|
+
if (g_underscore) {
|
|
3014
|
+
parts.push("_+");
|
|
3015
|
+
} else if (g_space) {
|
|
3016
|
+
if (g_space.includes("\n")) {
|
|
3017
|
+
parts.push(`(?:${structural_noise}|\\s+)+`);
|
|
3018
|
+
} else {
|
|
3019
|
+
parts.push("\\s+");
|
|
3020
|
+
}
|
|
3021
|
+
} else if (g_quote) {
|
|
3022
|
+
if (g_quote === "'") parts.push("[\u2018\u2019']");
|
|
3023
|
+
else parts.push('["\u201C\u201D]');
|
|
3024
|
+
} else if (g_punct) {
|
|
3025
|
+
parts.push(escapeRegExp(g_punct));
|
|
3026
|
+
}
|
|
3027
|
+
parts.push(md_noise);
|
|
3028
|
+
last_idx = token_pattern.lastIndex;
|
|
3029
|
+
}
|
|
3030
|
+
const remaining = target_text.substring(last_idx);
|
|
3031
|
+
if (remaining) parts.push(escapeRegExp(remaining));
|
|
3032
|
+
return parts.join("");
|
|
3033
|
+
}
|
|
3034
|
+
function _find_match_in_text(text, target) {
|
|
3035
|
+
if (!target) return [-1, -1];
|
|
3036
|
+
let idx = text.indexOf(target);
|
|
3037
|
+
if (idx !== -1) return _find_safe_boundaries(text, idx, idx + target.length);
|
|
3038
|
+
const norm_text = _replace_smart_quotes(text);
|
|
3039
|
+
const norm_target = _replace_smart_quotes(target);
|
|
3040
|
+
idx = norm_text.indexOf(norm_target);
|
|
3041
|
+
if (idx !== -1) return _find_safe_boundaries(text, idx, idx + norm_target.length);
|
|
3042
|
+
try {
|
|
3043
|
+
const pattern = new RegExp(_make_fuzzy_regex(target));
|
|
3044
|
+
const match = pattern.exec(text);
|
|
3045
|
+
if (match) {
|
|
3046
|
+
const raw_start = match.index;
|
|
3047
|
+
const raw_end = match.index + match[0].length;
|
|
3048
|
+
const [refined_start, refined_end] = _refine_match_boundaries(text, raw_start, raw_end);
|
|
3049
|
+
return _find_safe_boundaries(text, refined_start, refined_end);
|
|
3050
|
+
}
|
|
3051
|
+
} catch (e) {
|
|
3052
|
+
}
|
|
3053
|
+
return [-1, -1];
|
|
3054
|
+
}
|
|
3055
|
+
function _build_critic_markup(target_text, new_text, comment, edit_index, include_index, highlight_only) {
|
|
3056
|
+
const parts = [];
|
|
3057
|
+
let [prefix_markup, clean_target, suffix_markup] = _strip_balanced_markers(target_text);
|
|
3058
|
+
let clean_new = new_text;
|
|
3059
|
+
if (prefix_markup && new_text) {
|
|
3060
|
+
if (new_text.startsWith(prefix_markup) && new_text.endsWith(suffix_markup)) {
|
|
3061
|
+
const inner_len = prefix_markup.length;
|
|
3062
|
+
clean_new = new_text.length > inner_len * 2 ? new_text.substring(inner_len, new_text.length - inner_len) : new_text;
|
|
3063
|
+
}
|
|
3064
|
+
}
|
|
3065
|
+
parts.push(prefix_markup);
|
|
3066
|
+
if (highlight_only) {
|
|
3067
|
+
parts.push(`{==${clean_target}==}`);
|
|
3068
|
+
} else {
|
|
3069
|
+
const has_target = Boolean(clean_target);
|
|
3070
|
+
const has_new = Boolean(clean_new);
|
|
3071
|
+
if (has_target && !has_new) parts.push(`{--${clean_target}--}`);
|
|
3072
|
+
else if (!has_target && has_new) parts.push(`{++${clean_new}++}`);
|
|
3073
|
+
else if (has_target && has_new) parts.push(`{--${clean_target}--}{++${clean_new}++}`);
|
|
3074
|
+
}
|
|
3075
|
+
parts.push(suffix_markup);
|
|
3076
|
+
const meta_parts = [];
|
|
3077
|
+
if (comment) meta_parts.push(comment);
|
|
3078
|
+
if (include_index) meta_parts.push(`[Edit:${edit_index}]`);
|
|
3079
|
+
if (meta_parts.length > 0) {
|
|
3080
|
+
parts.push(`{>>${meta_parts.join(" ")}<<}`);
|
|
3081
|
+
}
|
|
3082
|
+
return parts.join("");
|
|
3083
|
+
}
|
|
3084
|
+
function apply_edits_to_markdown(markdown_text, edits, include_index = false, highlight_only = false) {
|
|
3085
|
+
if (!edits || edits.length === 0) return markdown_text;
|
|
3086
|
+
const matched_edits = [];
|
|
3087
|
+
for (let idx = 0; idx < edits.length; idx++) {
|
|
3088
|
+
const edit = edits[idx];
|
|
3089
|
+
const target = edit.target_text || "";
|
|
3090
|
+
if (!target) {
|
|
3091
|
+
continue;
|
|
3092
|
+
}
|
|
3093
|
+
const [start, end] = _find_match_in_text(markdown_text, target);
|
|
3094
|
+
if (start === -1) continue;
|
|
3095
|
+
const actual_matched_text = markdown_text.substring(start, end);
|
|
3096
|
+
matched_edits.push([start, end, actual_matched_text, edit, idx]);
|
|
3097
|
+
}
|
|
3098
|
+
const matched_edits_filtered = [];
|
|
3099
|
+
const occupied_ranges = [];
|
|
3100
|
+
matched_edits.sort((a, b) => a[4] - b[4]);
|
|
3101
|
+
for (const [start, end, actual_text, edit, orig_idx] of matched_edits) {
|
|
3102
|
+
let overlaps = false;
|
|
3103
|
+
for (const [occ_start, occ_end] of occupied_ranges) {
|
|
3104
|
+
if (start < occ_end && end > occ_start) {
|
|
3105
|
+
overlaps = true;
|
|
3106
|
+
break;
|
|
3107
|
+
}
|
|
3108
|
+
}
|
|
3109
|
+
if (!overlaps) {
|
|
3110
|
+
matched_edits_filtered.push([start, end, actual_text, edit, orig_idx]);
|
|
3111
|
+
occupied_ranges.push([start, end]);
|
|
3112
|
+
}
|
|
3113
|
+
}
|
|
3114
|
+
matched_edits_filtered.sort((a, b) => b[0] - a[0]);
|
|
3115
|
+
let result = markdown_text;
|
|
3116
|
+
for (const [start, end, actual_text, edit, orig_idx] of matched_edits_filtered) {
|
|
3117
|
+
const new_txt = edit.new_text || "";
|
|
3118
|
+
const [prefix_len, suffix_len] = trim_common_context(actual_text, new_txt);
|
|
3119
|
+
const unmodified_prefix = prefix_len > 0 ? actual_text.substring(0, prefix_len) : "";
|
|
3120
|
+
const unmodified_suffix = suffix_len > 0 ? actual_text.substring(actual_text.length - suffix_len) : "";
|
|
3121
|
+
const t_end = actual_text.length - suffix_len;
|
|
3122
|
+
const n_end = new_txt.length - suffix_len;
|
|
3123
|
+
const isolated_target = actual_text.substring(prefix_len, t_end);
|
|
3124
|
+
const isolated_new = new_txt.substring(prefix_len, n_end);
|
|
3125
|
+
const markup = _build_critic_markup(
|
|
3126
|
+
isolated_target,
|
|
3127
|
+
isolated_new,
|
|
3128
|
+
edit.comment,
|
|
3129
|
+
orig_idx,
|
|
3130
|
+
include_index,
|
|
3131
|
+
highlight_only
|
|
3132
|
+
);
|
|
3133
|
+
const full_replacement = unmodified_prefix + markup + unmodified_suffix;
|
|
3134
|
+
result = result.substring(0, start) + full_replacement + result.substring(end);
|
|
3135
|
+
}
|
|
3136
|
+
return result;
|
|
3137
|
+
}
|
|
3138
|
+
|
|
3139
|
+
// src/pagination.ts
|
|
3140
|
+
var PAGE_TARGET_CHARS = 19e3;
|
|
3141
|
+
var APPENDIX_MARKER = "<!-- READONLY_BOUNDARY_START -->";
|
|
3142
|
+
var _CRITIC_TOKENS = {
|
|
3143
|
+
"{++": "++}",
|
|
3144
|
+
"{--": "--}",
|
|
3145
|
+
"{==": "==}",
|
|
3146
|
+
"{>>": "<<}"
|
|
3147
|
+
};
|
|
3148
|
+
var _CHG_ID_PATTERN = /\bChg:(\d+)\b/g;
|
|
3149
|
+
function split_structural_appendix(markdown) {
|
|
3150
|
+
if (!markdown) return ["", ""];
|
|
3151
|
+
const idx = markdown.indexOf(APPENDIX_MARKER);
|
|
3152
|
+
if (idx === -1) return [markdown, ""];
|
|
3153
|
+
const line_start = markdown.lastIndexOf("\n", idx) + 1;
|
|
3154
|
+
const body = markdown.substring(0, line_start).trimEnd();
|
|
3155
|
+
const appendix = markdown.substring(line_start);
|
|
3156
|
+
return [body, appendix];
|
|
3157
|
+
}
|
|
3158
|
+
function paginate(markdown_body, structural_appendix = "") {
|
|
3159
|
+
if (!markdown_body) {
|
|
3160
|
+
const appendix_clean = structural_appendix ? structural_appendix.trim() : "";
|
|
3161
|
+
const content = appendix_clean;
|
|
3162
|
+
return {
|
|
3163
|
+
pages: [{
|
|
3164
|
+
page: 1,
|
|
3165
|
+
total_pages: 1,
|
|
3166
|
+
has_next: false,
|
|
3167
|
+
has_prev: false,
|
|
3168
|
+
tracked_change_count: _count_tracked_changes(content),
|
|
3169
|
+
page_content: content
|
|
3170
|
+
}],
|
|
3171
|
+
total_pages: 1,
|
|
3172
|
+
body_pages: [""],
|
|
3173
|
+
body_page_offsets: [0]
|
|
3174
|
+
};
|
|
3175
|
+
}
|
|
3176
|
+
const block_records = _tokenize_into_atomic_blocks(markdown_body);
|
|
3177
|
+
const [body_pages, body_page_offsets] = _assemble_pages(block_records);
|
|
3178
|
+
let final_pages;
|
|
3179
|
+
if (structural_appendix && structural_appendix.trim()) {
|
|
3180
|
+
const appendix = structural_appendix.trim();
|
|
3181
|
+
final_pages = body_pages.map((bp) => bp ? `${bp}
|
|
3182
|
+
|
|
3183
|
+
${appendix}` : appendix);
|
|
3184
|
+
} else {
|
|
3185
|
+
final_pages = [...body_pages];
|
|
3186
|
+
}
|
|
3187
|
+
const total = final_pages.length;
|
|
3188
|
+
const page_infos = final_pages.map((content, i) => ({
|
|
3189
|
+
page: i + 1,
|
|
3190
|
+
total_pages: total,
|
|
3191
|
+
has_next: i + 1 < total,
|
|
3192
|
+
has_prev: i + 1 > 1,
|
|
3193
|
+
tracked_change_count: _count_tracked_changes(content),
|
|
3194
|
+
page_content: content
|
|
3195
|
+
}));
|
|
3196
|
+
return {
|
|
3197
|
+
pages: page_infos,
|
|
3198
|
+
total_pages: total,
|
|
3199
|
+
body_pages,
|
|
3200
|
+
body_page_offsets
|
|
3201
|
+
};
|
|
3202
|
+
}
|
|
3203
|
+
function _tokenize_into_atomic_blocks(markdown_body) {
|
|
3204
|
+
const raw_blocks = _split_on_safe_paragraph_breaks(markdown_body);
|
|
3205
|
+
return _merge_footnote_sections(raw_blocks);
|
|
3206
|
+
}
|
|
3207
|
+
function _split_on_safe_paragraph_breaks(text) {
|
|
3208
|
+
const counters = { "++}": 0, "--}": 0, "==}": 0, "<<}": 0 };
|
|
3209
|
+
const blocks = [];
|
|
3210
|
+
let block_start = 0;
|
|
3211
|
+
let i = 0;
|
|
3212
|
+
const n = text.length;
|
|
3213
|
+
while (i < n) {
|
|
3214
|
+
let matched_open = false;
|
|
3215
|
+
for (const [open_tok, close_tok] of Object.entries(_CRITIC_TOKENS)) {
|
|
3216
|
+
if (text.startsWith(open_tok, i)) {
|
|
3217
|
+
counters[close_tok]++;
|
|
3218
|
+
i += open_tok.length;
|
|
3219
|
+
matched_open = true;
|
|
3220
|
+
break;
|
|
3221
|
+
}
|
|
3222
|
+
}
|
|
3223
|
+
if (matched_open) continue;
|
|
3224
|
+
let matched_close = false;
|
|
3225
|
+
for (const close_tok of Object.values(_CRITIC_TOKENS)) {
|
|
3226
|
+
if (text.startsWith(close_tok, i)) {
|
|
3227
|
+
if (counters[close_tok] > 0) counters[close_tok]--;
|
|
3228
|
+
i += close_tok.length;
|
|
3229
|
+
matched_close = true;
|
|
3230
|
+
break;
|
|
3231
|
+
}
|
|
3232
|
+
}
|
|
3233
|
+
if (matched_close) continue;
|
|
3234
|
+
if (text[i] === "\n" && i + 1 < n && text[i + 1] === "\n") {
|
|
3235
|
+
if (Object.values(counters).every((c) => c === 0)) {
|
|
3236
|
+
const block_text = text.substring(block_start, i);
|
|
3237
|
+
if (block_text) blocks.push([block_text, block_start]);
|
|
3238
|
+
let j = i;
|
|
3239
|
+
while (j < n && text[j] === "\n") j++;
|
|
3240
|
+
i = j;
|
|
3241
|
+
block_start = i;
|
|
3242
|
+
continue;
|
|
3243
|
+
}
|
|
3244
|
+
}
|
|
3245
|
+
i++;
|
|
3246
|
+
}
|
|
3247
|
+
if (block_start < n) {
|
|
3248
|
+
const block_text = text.substring(block_start, n);
|
|
3249
|
+
if (block_text) blocks.push([block_text, block_start]);
|
|
3250
|
+
}
|
|
3251
|
+
return blocks;
|
|
3252
|
+
}
|
|
3253
|
+
function _merge_footnote_sections(blocks) {
|
|
3254
|
+
if (!blocks.length) return blocks;
|
|
3255
|
+
const merged = [];
|
|
3256
|
+
let i = 0;
|
|
3257
|
+
while (i < blocks.length) {
|
|
3258
|
+
const [block_text, block_offset] = blocks[i];
|
|
3259
|
+
const stripped = block_text.trimStart();
|
|
3260
|
+
const is_section_header = stripped.startsWith("## Footnotes") || stripped.startsWith("## Endnotes");
|
|
3261
|
+
if (!is_section_header) {
|
|
3262
|
+
merged.push([block_text, block_offset]);
|
|
3263
|
+
i++;
|
|
3264
|
+
continue;
|
|
3265
|
+
}
|
|
3266
|
+
let accumulated_text = block_text;
|
|
3267
|
+
let j = i + 1;
|
|
3268
|
+
while (j < blocks.length) {
|
|
3269
|
+
const [next_text] = blocks[j];
|
|
3270
|
+
const next_stripped = next_text.trimStart();
|
|
3271
|
+
if (next_stripped.startsWith("[^fn-") || next_stripped.startsWith("[^en-")) {
|
|
3272
|
+
accumulated_text = `${accumulated_text}
|
|
3273
|
+
|
|
3274
|
+
${next_text}`;
|
|
3275
|
+
j++;
|
|
3276
|
+
} else {
|
|
3277
|
+
break;
|
|
3278
|
+
}
|
|
3279
|
+
}
|
|
3280
|
+
merged.push([accumulated_text, block_offset]);
|
|
3281
|
+
i = j;
|
|
3282
|
+
}
|
|
3283
|
+
return merged;
|
|
3284
|
+
}
|
|
3285
|
+
function _assemble_pages(block_records) {
|
|
3286
|
+
if (!block_records.length) return [[""], [0]];
|
|
3287
|
+
const pages = [];
|
|
3288
|
+
const page_starts = [];
|
|
3289
|
+
let current_blocks = [];
|
|
3290
|
+
let current_size = 0;
|
|
3291
|
+
let current_start = -1;
|
|
3292
|
+
const flush_current = () => {
|
|
3293
|
+
if (current_blocks.length > 0) {
|
|
3294
|
+
pages.push(current_blocks.join("\n\n"));
|
|
3295
|
+
page_starts.push(current_start);
|
|
3296
|
+
}
|
|
3297
|
+
current_blocks = [];
|
|
3298
|
+
current_size = 0;
|
|
3299
|
+
current_start = -1;
|
|
3300
|
+
};
|
|
3301
|
+
for (const [block_text, block_offset] of block_records) {
|
|
3302
|
+
const block_size = block_text.length;
|
|
3303
|
+
const added_size = block_size + (current_blocks.length > 0 ? 2 : 0);
|
|
3304
|
+
if (current_blocks.length > 0 && current_size + added_size > PAGE_TARGET_CHARS) {
|
|
3305
|
+
flush_current();
|
|
3306
|
+
}
|
|
3307
|
+
if (current_blocks.length === 0 && block_size > PAGE_TARGET_CHARS) {
|
|
3308
|
+
pages.push(block_text);
|
|
3309
|
+
page_starts.push(block_offset);
|
|
3310
|
+
continue;
|
|
3311
|
+
}
|
|
3312
|
+
if (current_blocks.length === 0) current_start = block_offset;
|
|
3313
|
+
current_blocks.push(block_text);
|
|
3314
|
+
current_size += current_size > 0 ? added_size : block_size;
|
|
3315
|
+
}
|
|
3316
|
+
flush_current();
|
|
3317
|
+
if (!pages.length) return [[""], [0]];
|
|
3318
|
+
return [pages, page_starts];
|
|
3319
|
+
}
|
|
3320
|
+
function _count_tracked_changes(page_content) {
|
|
3321
|
+
const matches = [...page_content.matchAll(_CHG_ID_PATTERN)];
|
|
3322
|
+
const distinct = new Set(matches.map((m) => m[1]));
|
|
3323
|
+
return distinct.size;
|
|
3324
|
+
}
|
|
3325
|
+
|
|
3326
|
+
// src/outline.ts
|
|
3327
|
+
var _HEADING_PREFIX_RE = /^(#{1,6}) /;
|
|
3328
|
+
var _HEURISTIC_MIN_WORDS = 3;
|
|
3329
|
+
function extract_outline(doc, projected_body, body_pages, body_page_offsets, paragraph_offsets = null) {
|
|
3330
|
+
if (body_pages.length !== body_page_offsets.length) {
|
|
3331
|
+
throw new Error("body_pages and body_page_offsets length mismatch");
|
|
3332
|
+
}
|
|
3333
|
+
const comments_map = extract_comments_data(doc.pkg);
|
|
3334
|
+
const block_records = _walk_doc_body(doc, comments_map);
|
|
3335
|
+
const heading_indices = [];
|
|
3336
|
+
for (let idx = 0; idx < block_records.length; idx++) {
|
|
3337
|
+
const rec = block_records[idx];
|
|
3338
|
+
if (!(rec.is_paragraph && _is_heading(rec.item))) continue;
|
|
3339
|
+
if (!_heading_passes_quality_filter(rec.item, comments_map)) continue;
|
|
3340
|
+
heading_indices.push(idx);
|
|
3341
|
+
}
|
|
3342
|
+
if (heading_indices.length === 0) return [];
|
|
3343
|
+
const nodes = [];
|
|
3344
|
+
for (let h_pos = 0; h_pos < heading_indices.length; h_pos++) {
|
|
3345
|
+
const rec_idx = heading_indices[h_pos];
|
|
3346
|
+
const rec = block_records[rec_idx];
|
|
3347
|
+
const paragraph = rec.item;
|
|
3348
|
+
const level = _heading_level(paragraph);
|
|
3349
|
+
const text = _heading_text(paragraph, comments_map);
|
|
3350
|
+
const style = _determine_heading_style(paragraph);
|
|
3351
|
+
const owned_end = _find_owned_end(block_records, heading_indices, h_pos, level);
|
|
3352
|
+
const owned_blocks = block_records.slice(rec_idx + 1, owned_end);
|
|
3353
|
+
const has_table = _direct_has_table(block_records, rec_idx + 1, owned_end);
|
|
3354
|
+
const footnote_ids = _collect_footnote_ids(owned_blocks);
|
|
3355
|
+
const page_num = _offset_to_page(rec.start_offset, body_page_offsets);
|
|
3356
|
+
nodes.push({ level, text, page: page_num, style, has_table, footnote_ids });
|
|
3357
|
+
}
|
|
3358
|
+
return nodes;
|
|
3359
|
+
}
|
|
3360
|
+
function _direct_has_table(block_records, range_start, range_end) {
|
|
3361
|
+
for (let idx = range_start; idx < range_end; idx++) {
|
|
3362
|
+
const rec = block_records[idx];
|
|
3363
|
+
if (rec.is_paragraph && _is_heading(rec.item)) return false;
|
|
3364
|
+
if (rec.is_table) return true;
|
|
3365
|
+
}
|
|
3366
|
+
return false;
|
|
3367
|
+
}
|
|
3368
|
+
function _walk_doc_body(doc, comments_map) {
|
|
3369
|
+
const parts = Array.from(iter_document_parts(doc));
|
|
3370
|
+
let body_start_offset = 0;
|
|
3371
|
+
let body_part = null;
|
|
3372
|
+
for (const part of parts) {
|
|
3373
|
+
if (part === doc) {
|
|
3374
|
+
body_part = part;
|
|
3375
|
+
break;
|
|
3376
|
+
}
|
|
3377
|
+
const part_text = _project_part(part, comments_map);
|
|
3378
|
+
if (part_text) {
|
|
3379
|
+
if (body_start_offset > 0) body_start_offset += 2;
|
|
3380
|
+
body_start_offset += part_text.length;
|
|
3381
|
+
}
|
|
3382
|
+
}
|
|
3383
|
+
if (!body_part) {
|
|
3384
|
+
body_part = doc;
|
|
3385
|
+
body_start_offset = 0;
|
|
3386
|
+
} else {
|
|
3387
|
+
if (body_start_offset > 0) body_start_offset += 2;
|
|
3388
|
+
}
|
|
3389
|
+
const records = [];
|
|
3390
|
+
let cursor = body_start_offset;
|
|
3391
|
+
let is_first_block = true;
|
|
3392
|
+
for (const item of iter_block_items(body_part)) {
|
|
3393
|
+
if (item instanceof Paragraph) {
|
|
3394
|
+
const prefix = get_paragraph_prefix(item);
|
|
3395
|
+
const p_text = build_paragraph_text(item, comments_map, false);
|
|
3396
|
+
const block_len = (prefix + p_text).length;
|
|
3397
|
+
if (!is_first_block) cursor += 2;
|
|
3398
|
+
records.push({ item, is_paragraph: true, is_table: false, start_offset: cursor, projected_length: block_len });
|
|
3399
|
+
cursor += block_len;
|
|
3400
|
+
is_first_block = false;
|
|
3401
|
+
} else if (item instanceof Table) {
|
|
3402
|
+
const table_text = extract_table(item, comments_map, false, 0);
|
|
3403
|
+
const block_len = table_text ? table_text.length : 0;
|
|
3404
|
+
if (!is_first_block) cursor += 2;
|
|
3405
|
+
const table_start = cursor;
|
|
3406
|
+
records.push({ item, is_paragraph: false, is_table: true, start_offset: table_start, projected_length: block_len });
|
|
3407
|
+
_record_table_inner_blocks_lite(item, table_start, records, comments_map);
|
|
3408
|
+
cursor += block_len;
|
|
3409
|
+
is_first_block = false;
|
|
3410
|
+
}
|
|
3411
|
+
}
|
|
3412
|
+
return records;
|
|
3413
|
+
}
|
|
3414
|
+
function _compute_inner_block_offset(table, target_paragraph, table_start_offset, comments_map) {
|
|
3415
|
+
const target_el = target_paragraph._element;
|
|
3416
|
+
let cursor = table_start_offset;
|
|
3417
|
+
let rows_processed = 0;
|
|
3418
|
+
for (const row of table.rows) {
|
|
3419
|
+
if (rows_processed > 0) cursor += 1;
|
|
3420
|
+
const seen_cells = /* @__PURE__ */ new Set();
|
|
3421
|
+
let cells_in_row = 0;
|
|
3422
|
+
for (const cell of row.cells) {
|
|
3423
|
+
if (seen_cells.has(cell)) continue;
|
|
3424
|
+
seen_cells.add(cell);
|
|
3425
|
+
if (cells_in_row > 0) cursor += 3;
|
|
3426
|
+
const [new_cursor, found] = _walk_cell_for_offset(cell, target_el, cursor, comments_map);
|
|
3427
|
+
if (found) return new_cursor;
|
|
3428
|
+
cursor = new_cursor;
|
|
3429
|
+
cells_in_row++;
|
|
3430
|
+
}
|
|
3431
|
+
rows_processed++;
|
|
3432
|
+
}
|
|
3433
|
+
return table_start_offset;
|
|
3434
|
+
}
|
|
3435
|
+
function _walk_cell_for_offset(cell, target_el, cell_start_cursor, comments_map) {
|
|
3436
|
+
let cursor = cell_start_cursor;
|
|
3437
|
+
let is_first_block = true;
|
|
3438
|
+
for (const inner_item of iter_block_items(cell)) {
|
|
3439
|
+
if (!is_first_block) cursor += 2;
|
|
3440
|
+
if (inner_item instanceof Paragraph) {
|
|
3441
|
+
if (inner_item._element === target_el) return [cursor, true];
|
|
3442
|
+
const prefix = get_paragraph_prefix(inner_item);
|
|
3443
|
+
const p_text = build_paragraph_text(inner_item, comments_map, false);
|
|
3444
|
+
cursor += (prefix + p_text).length;
|
|
3445
|
+
} else if (inner_item instanceof Table) {
|
|
3446
|
+
const nested_offset = _compute_inner_block_offset(inner_item, new Paragraph(target_el, null), cursor, comments_map);
|
|
3447
|
+
if (nested_offset !== cursor) {
|
|
3448
|
+
if (_element_is_descendant(target_el, inner_item._element)) return [nested_offset, true];
|
|
3449
|
+
}
|
|
3450
|
+
const table_text = extract_table(inner_item, comments_map, false, 0);
|
|
3451
|
+
cursor += table_text ? table_text.length : 0;
|
|
3452
|
+
}
|
|
3453
|
+
is_first_block = false;
|
|
3454
|
+
}
|
|
3455
|
+
return [cursor, false];
|
|
3456
|
+
}
|
|
3457
|
+
function _element_is_descendant(target_el, ancestor_el) {
|
|
3458
|
+
let cur = target_el.parentNode;
|
|
3459
|
+
while (cur) {
|
|
3460
|
+
if (cur === ancestor_el) return true;
|
|
3461
|
+
cur = cur.parentNode;
|
|
3462
|
+
}
|
|
3463
|
+
return false;
|
|
3464
|
+
}
|
|
3465
|
+
function _record_table_inner_blocks_lite(table, inherited_offset, records, comments_map) {
|
|
3466
|
+
const seen_cells = /* @__PURE__ */ new Set();
|
|
3467
|
+
for (const row of table.rows) {
|
|
3468
|
+
for (const cell of row.cells) {
|
|
3469
|
+
if (seen_cells.has(cell)) continue;
|
|
3470
|
+
seen_cells.add(cell);
|
|
3471
|
+
for (const inner_item of iter_block_items(cell)) {
|
|
3472
|
+
if (inner_item instanceof Paragraph) {
|
|
3473
|
+
const true_offset = _is_heading(inner_item) ? _compute_inner_block_offset(table, inner_item, inherited_offset, comments_map) : inherited_offset;
|
|
3474
|
+
records.push({ item: inner_item, is_paragraph: true, is_table: false, start_offset: true_offset, projected_length: 0 });
|
|
3475
|
+
} else if (inner_item instanceof Table) {
|
|
3476
|
+
records.push({ item: inner_item, is_paragraph: false, is_table: true, start_offset: inherited_offset, projected_length: 0 });
|
|
3477
|
+
_record_table_inner_blocks_lite(inner_item, inherited_offset, records, comments_map);
|
|
3478
|
+
}
|
|
3479
|
+
}
|
|
3480
|
+
}
|
|
3481
|
+
}
|
|
3482
|
+
}
|
|
3483
|
+
function _project_part(part, comments_map) {
|
|
3484
|
+
const blocks = [];
|
|
3485
|
+
const c_type = part.constructor.name;
|
|
3486
|
+
if (c_type === "NotesPart") {
|
|
3487
|
+
const header = part.note_type === "fn" ? "## Footnotes" : "## Endnotes";
|
|
3488
|
+
blocks.push(`---
|
|
3489
|
+
${header}`);
|
|
3490
|
+
}
|
|
3491
|
+
let is_first_para = true;
|
|
3492
|
+
for (const item of iter_block_items(part)) {
|
|
3493
|
+
if (item.constructor.name === "FootnoteItem") {
|
|
3494
|
+
const fn_text = _project_part(item, comments_map);
|
|
3495
|
+
if (fn_text) blocks.push(fn_text);
|
|
3496
|
+
} else if (item instanceof Paragraph) {
|
|
3497
|
+
let prefix = get_paragraph_prefix(item);
|
|
3498
|
+
if (is_first_para && c_type === "FootnoteItem") prefix = `[^${part.note_type}-${part.id}]: ${prefix}`;
|
|
3499
|
+
const p_text = build_paragraph_text(item, comments_map, false);
|
|
3500
|
+
blocks.push(prefix + p_text);
|
|
3501
|
+
is_first_para = false;
|
|
3502
|
+
} else if (item instanceof Table) {
|
|
3503
|
+
const table_text = extract_table(item, comments_map, false, 0);
|
|
3504
|
+
if (table_text) blocks.push(table_text);
|
|
3505
|
+
is_first_para = false;
|
|
3506
|
+
}
|
|
3507
|
+
}
|
|
3508
|
+
return blocks.join("\n\n");
|
|
3509
|
+
}
|
|
3510
|
+
function _is_heading(paragraph) {
|
|
3511
|
+
return _HEADING_PREFIX_RE.test(get_paragraph_prefix(paragraph));
|
|
3512
|
+
}
|
|
3513
|
+
function _heading_passes_quality_filter(paragraph, comments_map) {
|
|
3514
|
+
const style = _determine_heading_style(paragraph);
|
|
3515
|
+
if (style !== "(heuristic)") return true;
|
|
3516
|
+
const text = _heading_text(paragraph, comments_map);
|
|
3517
|
+
if (!text) return false;
|
|
3518
|
+
const word_count = (text.match(/\w+/g) || []).length;
|
|
3519
|
+
return word_count >= _HEURISTIC_MIN_WORDS;
|
|
3520
|
+
}
|
|
3521
|
+
function _heading_level(paragraph) {
|
|
3522
|
+
const match = _HEADING_PREFIX_RE.exec(get_paragraph_prefix(paragraph));
|
|
3523
|
+
return match ? Math.min(match[1].length, 6) : 1;
|
|
3524
|
+
}
|
|
3525
|
+
function _heading_text(paragraph, comments_map) {
|
|
3526
|
+
const p_text = build_paragraph_text(paragraph, comments_map, false);
|
|
3527
|
+
let cleaned = _strip_critic_markup(p_text);
|
|
3528
|
+
cleaned = _strip_inline_formatting(cleaned);
|
|
3529
|
+
return cleaned.trim();
|
|
3530
|
+
}
|
|
3531
|
+
function _strip_critic_markup(text) {
|
|
3532
|
+
if (!text) return "";
|
|
3533
|
+
text = text.replace(/\{--[\s\S]*?--\}/g, "");
|
|
3534
|
+
text = text.replace(/\{>>[\s\S]*?<<\}/g, "");
|
|
3535
|
+
text = text.replace(/\{\+\+([\s\S]*?)\+\+\}/g, "$1");
|
|
3536
|
+
text = text.replace(/\{==([\s\S]*?)==\}/g, "$1");
|
|
3537
|
+
return text;
|
|
3538
|
+
}
|
|
3539
|
+
function _strip_inline_formatting(text) {
|
|
3540
|
+
if (!text) return "";
|
|
3541
|
+
text = text.replace(/\*\*(.+?)\*\*/g, "$1");
|
|
3542
|
+
text = text.replace(/__(.+?)__/g, "$1");
|
|
3543
|
+
text = text.replace(/(?<!\w)_(\S(?:.*?\S)?)_(?!\w)/g, "$1");
|
|
3544
|
+
return text;
|
|
3545
|
+
}
|
|
3546
|
+
function _determine_heading_style(paragraph) {
|
|
3547
|
+
const [style_cache, default_pstyle] = _get_style_cache(paragraph._parent.part || paragraph._parent);
|
|
3548
|
+
const pPr = findChild(paragraph._element, "w:pPr");
|
|
3549
|
+
let style_id = default_pstyle;
|
|
3550
|
+
if (pPr) {
|
|
3551
|
+
const oLvl = findChild(pPr, "w:outlineLvl");
|
|
3552
|
+
if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
|
|
3553
|
+
const style = _safe_style_name(paragraph, style_cache, default_pstyle);
|
|
3554
|
+
if (style && (style.startsWith("Heading") || style === "Title")) return style;
|
|
3555
|
+
return "(outline_level)";
|
|
3556
|
+
}
|
|
3557
|
+
const pStyle = findChild(pPr, "w:pStyle");
|
|
3558
|
+
if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
|
|
3559
|
+
}
|
|
3560
|
+
const style_name = style_id && style_cache && style_cache[style_id] ? style_cache[style_id].name : null;
|
|
3561
|
+
if (style_name && (style_name.startsWith("Heading") || style_name === "Title")) return style_name;
|
|
3562
|
+
if (style_name && /Heading[ ]?([1-6])(?![0-9])/.test(style_name)) return style_name;
|
|
3563
|
+
return "(heuristic)";
|
|
3564
|
+
}
|
|
3565
|
+
function _safe_style_name(paragraph, style_cache, default_pstyle) {
|
|
3566
|
+
const pPr = findChild(paragraph._element, "w:pPr");
|
|
3567
|
+
let style_id = default_pstyle;
|
|
3568
|
+
if (pPr) {
|
|
3569
|
+
const pStyle = findChild(pPr, "w:pStyle");
|
|
3570
|
+
if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
|
|
3571
|
+
}
|
|
3572
|
+
return style_id && style_cache && style_cache[style_id] ? style_cache[style_id].name : null;
|
|
3573
|
+
}
|
|
3574
|
+
function _find_owned_end(block_records, heading_indices, current_h_pos, current_level) {
|
|
3575
|
+
for (let next_h_pos = current_h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
|
|
3576
|
+
const next_idx = heading_indices[next_h_pos];
|
|
3577
|
+
if (_heading_level(block_records[next_idx].item) <= current_level) return next_idx;
|
|
3578
|
+
}
|
|
3579
|
+
return block_records.length;
|
|
3580
|
+
}
|
|
3581
|
+
function _collect_footnote_ids(owned_blocks) {
|
|
3582
|
+
const seen = /* @__PURE__ */ new Set();
|
|
3583
|
+
const ordered = [];
|
|
3584
|
+
for (const rec of owned_blocks) {
|
|
3585
|
+
if (!rec.is_paragraph) continue;
|
|
3586
|
+
for (const event of iter_paragraph_content(rec.item)) {
|
|
3587
|
+
if (!("type" in event)) continue;
|
|
3588
|
+
let fn_id = "";
|
|
3589
|
+
if (event.type === "footnote") fn_id = `fn-${event.id}`;
|
|
3590
|
+
else if (event.type === "endnote") fn_id = `en-${event.id}`;
|
|
3591
|
+
else continue;
|
|
3592
|
+
if (!seen.has(fn_id)) {
|
|
3593
|
+
seen.add(fn_id);
|
|
3594
|
+
ordered.push(fn_id);
|
|
3595
|
+
}
|
|
3596
|
+
}
|
|
3597
|
+
}
|
|
3598
|
+
return ordered;
|
|
3599
|
+
}
|
|
3600
|
+
function _offset_to_page(offset, body_page_offsets) {
|
|
3601
|
+
if (!body_page_offsets || body_page_offsets.length === 0) return 1;
|
|
3602
|
+
let page = 1;
|
|
3603
|
+
for (let i = 0; i < body_page_offsets.length; i++) {
|
|
3604
|
+
if (offset >= body_page_offsets[i]) page = i + 1;
|
|
3605
|
+
else break;
|
|
3606
|
+
}
|
|
3607
|
+
return page;
|
|
3608
|
+
}
|
|
3609
|
+
|
|
3610
|
+
// src/index.ts
|
|
3611
|
+
var identifyEngine = () => "adeu-core-node";
|
|
3612
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
3613
|
+
0 && (module.exports = {
|
|
3614
|
+
BatchValidationError,
|
|
3615
|
+
DocumentMapper,
|
|
3616
|
+
DocumentObject,
|
|
3617
|
+
RedlineEngine,
|
|
3618
|
+
apply_edits_to_markdown,
|
|
3619
|
+
extractTextFromBuffer,
|
|
3620
|
+
extract_outline,
|
|
3621
|
+
generate_edits_from_text,
|
|
3622
|
+
identifyEngine,
|
|
3623
|
+
paginate,
|
|
3624
|
+
split_structural_appendix,
|
|
3625
|
+
trim_common_context
|
|
3626
|
+
});
|
|
3627
|
+
//# sourceMappingURL=index.cjs.map
|