@adeu/core 1.10.0 → 1.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +259 -30
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +9 -3
- package/dist/index.d.ts +9 -3
- package/dist/index.js +258 -30
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/engine.bugs.test.ts +2 -2
- package/src/engine.ts +179 -49
- package/src/index.ts +1 -1
- package/src/ingest.ts +32 -8
- package/src/mapper.ts +14 -8
- package/src/outline.ts +196 -1
- package/src/parity_gaps.test.ts +98 -0
package/package.json
CHANGED
package/src/engine.bugs.test.ts
CHANGED
|
@@ -251,14 +251,14 @@ describe("Resolved Bugs Core Engine Verification", () => {
|
|
|
251
251
|
const p = addParagraph(doc, "Short heading");
|
|
252
252
|
|
|
253
253
|
const fakeCache = {
|
|
254
|
-
|
|
254
|
+
Heading3: { name: "Heading 3", outline_level: 2, bold: true },
|
|
255
255
|
};
|
|
256
256
|
(doc.pkg as any)._adeu_style_cache = [fakeCache, "Normal"];
|
|
257
257
|
|
|
258
258
|
const docEl = p.ownerDocument!;
|
|
259
259
|
const pPr = docEl.createElement("w:pPr");
|
|
260
260
|
const pStyle = docEl.createElement("w:pStyle");
|
|
261
|
-
pStyle.setAttribute("w:val", "
|
|
261
|
+
pStyle.setAttribute("w:val", "Heading3");
|
|
262
262
|
pPr.appendChild(pStyle);
|
|
263
263
|
p.insertBefore(pPr, p.firstChild);
|
|
264
264
|
|
package/src/engine.ts
CHANGED
|
@@ -221,6 +221,7 @@ export class RedlineEngine {
|
|
|
221
221
|
public mapper: DocumentMapper;
|
|
222
222
|
public comments_manager: CommentsManager;
|
|
223
223
|
public clean_mapper: DocumentMapper | null = null;
|
|
224
|
+
public original_mapper: DocumentMapper | null = null;
|
|
224
225
|
public skipped_details: string[] = [];
|
|
225
226
|
|
|
226
227
|
constructor(doc: DocumentObject, author: string = "Adeu AI (TS)") {
|
|
@@ -255,7 +256,9 @@ export class RedlineEngine {
|
|
|
255
256
|
return null;
|
|
256
257
|
}
|
|
257
258
|
|
|
258
|
-
private _build_edit_context_previews(
|
|
259
|
+
private _build_edit_context_previews(
|
|
260
|
+
edit: any,
|
|
261
|
+
): [string | null, string | null] {
|
|
259
262
|
if (edit.type !== "modify") return [null, null];
|
|
260
263
|
if (edit._resolved_proxy_edit) {
|
|
261
264
|
edit = edit._resolved_proxy_edit;
|
|
@@ -271,7 +274,10 @@ export class RedlineEngine {
|
|
|
271
274
|
|
|
272
275
|
const before_start = Math.max(0, start_idx - 30);
|
|
273
276
|
const context_before = full_text.substring(before_start, start_idx);
|
|
274
|
-
const context_after = full_text.substring(
|
|
277
|
+
const context_after = full_text.substring(
|
|
278
|
+
start_idx + length,
|
|
279
|
+
start_idx + length + 30,
|
|
280
|
+
);
|
|
275
281
|
|
|
276
282
|
const critic_markup = `${context_before}{--${target_text}--}{++${new_text}++}${context_after}`;
|
|
277
283
|
|
|
@@ -337,7 +343,7 @@ export class RedlineEngine {
|
|
|
337
343
|
for (const tag of ["w:t", "w:tab", "w:br"]) {
|
|
338
344
|
for (const child of findAllDescendants(p, tag)) {
|
|
339
345
|
if (tag === "w:t" && !child.textContent) continue;
|
|
340
|
-
|
|
346
|
+
|
|
341
347
|
let is_deleted = false;
|
|
342
348
|
let curr = child.parentNode as Element | null;
|
|
343
349
|
while (curr && curr !== p) {
|
|
@@ -401,7 +407,10 @@ export class RedlineEngine {
|
|
|
401
407
|
if (parent) {
|
|
402
408
|
if (parent.tagName === "w:r" || parent.tagName.endsWith(":r")) {
|
|
403
409
|
const nonRprChildren = Array.from(parent.childNodes).filter(
|
|
404
|
-
(c) =>
|
|
410
|
+
(c) =>
|
|
411
|
+
c.nodeType === 1 &&
|
|
412
|
+
(c as Element).tagName !== "w:rPr" &&
|
|
413
|
+
(c as Element).tagName !== "rPr",
|
|
405
414
|
);
|
|
406
415
|
if (nonRprChildren.length <= 1) {
|
|
407
416
|
parent.parentNode?.removeChild(parent);
|
|
@@ -420,8 +429,12 @@ export class RedlineEngine {
|
|
|
420
429
|
for (const part of pkg.parts) {
|
|
421
430
|
if (part.partname.toLowerCase().includes("comments")) {
|
|
422
431
|
comment_partnames.add(part.partname);
|
|
423
|
-
const withSlash = part.partname.startsWith("/")
|
|
424
|
-
|
|
432
|
+
const withSlash = part.partname.startsWith("/")
|
|
433
|
+
? part.partname
|
|
434
|
+
: "/" + part.partname;
|
|
435
|
+
const withoutSlash = part.partname.startsWith("/")
|
|
436
|
+
? part.partname.substring(1)
|
|
437
|
+
: part.partname;
|
|
425
438
|
comment_partnames.add(withSlash);
|
|
426
439
|
comment_partnames.add(withoutSlash);
|
|
427
440
|
}
|
|
@@ -437,8 +450,10 @@ export class RedlineEngine {
|
|
|
437
450
|
const target = rel.getAttribute("Target") || "";
|
|
438
451
|
if (target.toLowerCase().includes("comments")) {
|
|
439
452
|
toRemove.push(rel);
|
|
440
|
-
|
|
441
|
-
const sourcePath = part.partname
|
|
453
|
+
|
|
454
|
+
const sourcePath = part.partname
|
|
455
|
+
.replace("/_rels/", "/")
|
|
456
|
+
.replace(".rels", "");
|
|
442
457
|
const sourcePart = pkg.getPartByPath(sourcePath);
|
|
443
458
|
if (sourcePart) {
|
|
444
459
|
const relId = rel.getAttribute("Id");
|
|
@@ -459,7 +474,10 @@ export class RedlineEngine {
|
|
|
459
474
|
const toRemove: Element[] = [];
|
|
460
475
|
for (const override of overrides) {
|
|
461
476
|
const partName = override.getAttribute("PartName") || "";
|
|
462
|
-
if (
|
|
477
|
+
if (
|
|
478
|
+
comment_partnames.has(partName) ||
|
|
479
|
+
partName.toLowerCase().includes("comments")
|
|
480
|
+
) {
|
|
463
481
|
toRemove.push(override);
|
|
464
482
|
}
|
|
465
483
|
}
|
|
@@ -469,7 +487,9 @@ export class RedlineEngine {
|
|
|
469
487
|
}
|
|
470
488
|
|
|
471
489
|
// Remove comment parts from pkg.parts
|
|
472
|
-
pkg.parts = pkg.parts.filter(
|
|
490
|
+
pkg.parts = pkg.parts.filter(
|
|
491
|
+
(p) => !p.partname.toLowerCase().includes("comments"),
|
|
492
|
+
);
|
|
473
493
|
|
|
474
494
|
// Remove comment files from pkg.unzipped
|
|
475
495
|
for (const key of Object.keys(pkg.unzipped)) {
|
|
@@ -1176,12 +1196,16 @@ export class RedlineEngine {
|
|
|
1176
1196
|
|
|
1177
1197
|
let matches = this.mapper.find_all_match_indices(edit.target_text);
|
|
1178
1198
|
let activeText = this.mapper.full_text;
|
|
1199
|
+
let target_mapper = this.mapper;
|
|
1179
1200
|
|
|
1180
1201
|
if (matches.length === 0) {
|
|
1181
1202
|
if (!this.clean_mapper)
|
|
1182
1203
|
this.clean_mapper = new DocumentMapper(this.doc, true);
|
|
1183
1204
|
matches = this.clean_mapper.find_all_match_indices(edit.target_text);
|
|
1184
|
-
if (matches.length > 0)
|
|
1205
|
+
if (matches.length > 0) {
|
|
1206
|
+
activeText = this.clean_mapper.full_text;
|
|
1207
|
+
target_mapper = this.clean_mapper;
|
|
1208
|
+
}
|
|
1185
1209
|
}
|
|
1186
1210
|
|
|
1187
1211
|
// BUG-23-5: a copy of the target that lives entirely inside a tracked
|
|
@@ -1189,7 +1213,7 @@ export class RedlineEngine {
|
|
|
1189
1213
|
// count toward ambiguity. Drop matches whose overlapping real text is
|
|
1190
1214
|
// exclusively deleted. Only applies to the raw mapper (the clean mapper
|
|
1191
1215
|
// already omits deleted text).
|
|
1192
|
-
if (activeText === this.mapper.full_text && matches.length >
|
|
1216
|
+
if (activeText === this.mapper.full_text && matches.length > 0) {
|
|
1193
1217
|
const liveMatches = matches.filter(([start, length]) => {
|
|
1194
1218
|
const realSpans = this.mapper.spans.filter(
|
|
1195
1219
|
(s) => s.run !== null && s.end > start && s.start < start + length,
|
|
@@ -1199,13 +1223,55 @@ export class RedlineEngine {
|
|
|
1199
1223
|
// part of a tracked deletion).
|
|
1200
1224
|
return realSpans.some((s) => !s.del_id);
|
|
1201
1225
|
});
|
|
1202
|
-
|
|
1226
|
+
matches = liveMatches;
|
|
1203
1227
|
}
|
|
1204
1228
|
|
|
1229
|
+
let is_deleted_text = false;
|
|
1230
|
+
const deleted_authors = new Set<string>();
|
|
1231
|
+
|
|
1205
1232
|
if (matches.length === 0) {
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1233
|
+
if (!this.original_mapper) {
|
|
1234
|
+
this.original_mapper = new DocumentMapper(this.doc, false, true);
|
|
1235
|
+
}
|
|
1236
|
+
const orig_matches = this.original_mapper.find_all_match_indices(edit.target_text);
|
|
1237
|
+
if (orig_matches.length > 0) {
|
|
1238
|
+
is_deleted_text = true;
|
|
1239
|
+
for (const [start, length] of orig_matches) {
|
|
1240
|
+
const spans = this.original_mapper.spans.filter(
|
|
1241
|
+
(s) => s.end > start && s.start < start + length,
|
|
1242
|
+
);
|
|
1243
|
+
for (const s of spans) {
|
|
1244
|
+
if (s.run !== null) {
|
|
1245
|
+
let parent = s.run._element as Node | null;
|
|
1246
|
+
while (parent) {
|
|
1247
|
+
if (parent.nodeType === 1 && (parent as Element).tagName === "w:del") {
|
|
1248
|
+
const auth = (parent as Element).getAttribute("w:author");
|
|
1249
|
+
if (auth) {
|
|
1250
|
+
deleted_authors.add(auth);
|
|
1251
|
+
}
|
|
1252
|
+
break;
|
|
1253
|
+
}
|
|
1254
|
+
parent = parent.parentNode;
|
|
1255
|
+
}
|
|
1256
|
+
}
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
}
|
|
1261
|
+
|
|
1262
|
+
if (matches.length === 0) {
|
|
1263
|
+
if (is_deleted_text) {
|
|
1264
|
+
const author_phrase = deleted_authors.size > 0
|
|
1265
|
+
? `by ${Array.from(deleted_authors).sort().join(", ")}`
|
|
1266
|
+
: "by an existing revision";
|
|
1267
|
+
errors.push(
|
|
1268
|
+
`- Edit ${i + 1} Failed: Target text matches text inside a tracked deletion ${author_phrase}. Reject/accept that change first or target the active replacement text instead.`,
|
|
1269
|
+
);
|
|
1270
|
+
} else {
|
|
1271
|
+
errors.push(
|
|
1272
|
+
`- Edit ${i + 1} Failed: Target text not found in document:\n "${edit.target_text}"`,
|
|
1273
|
+
);
|
|
1274
|
+
}
|
|
1209
1275
|
} else if (matches.length > 1) {
|
|
1210
1276
|
const positions: [number, number][] = matches.map(([start, length]) => [
|
|
1211
1277
|
start,
|
|
@@ -1230,18 +1296,29 @@ export class RedlineEngine {
|
|
|
1230
1296
|
const [pfx, sfx] = trim_common_context(matched, edit.new_text || "");
|
|
1231
1297
|
const t_end = matched.length - sfx;
|
|
1232
1298
|
const final_target = matched.substring(pfx, t_end);
|
|
1233
|
-
const final_new = (edit.new_text || "").substring(
|
|
1299
|
+
const final_new = (edit.new_text || "").substring(
|
|
1300
|
+
pfx,
|
|
1301
|
+
(edit.new_text || "").length - sfx,
|
|
1302
|
+
);
|
|
1234
1303
|
if (final_target.includes("\n\n")) {
|
|
1235
1304
|
if (final_new.includes("\n\n")) {
|
|
1236
1305
|
const parts = matched.split("\n\n");
|
|
1237
|
-
if (
|
|
1306
|
+
if (
|
|
1307
|
+
parts.length >= 2 &&
|
|
1308
|
+
parts[0].trim() !== "" &&
|
|
1309
|
+
parts[parts.length - 1].trim() !== ""
|
|
1310
|
+
) {
|
|
1238
1311
|
errors.push(
|
|
1239
1312
|
`- Edit ${i + 1} Failed: target_text spans a paragraph boundary with body text on both sides. The paragraph break is a structural element, not literal text, so it cannot be replaced as a single span without corrupting the document. Split this into one edit per paragraph.`,
|
|
1240
1313
|
);
|
|
1241
1314
|
}
|
|
1242
1315
|
} else {
|
|
1243
1316
|
const parts = final_target.split("\n\n");
|
|
1244
|
-
if (
|
|
1317
|
+
if (
|
|
1318
|
+
parts.length >= 2 &&
|
|
1319
|
+
parts[0].trim() !== "" &&
|
|
1320
|
+
parts[parts.length - 1].trim() !== ""
|
|
1321
|
+
) {
|
|
1245
1322
|
errors.push(
|
|
1246
1323
|
`- Edit ${i + 1} Failed: target_text spans a paragraph boundary with body text on both sides. The paragraph break is a structural element, not literal text, so it cannot be replaced as a single span without corrupting the document. Split this into one edit per paragraph.`,
|
|
1247
1324
|
);
|
|
@@ -1318,7 +1395,10 @@ export class RedlineEngine {
|
|
|
1318
1395
|
return errors;
|
|
1319
1396
|
}
|
|
1320
1397
|
|
|
1321
|
-
public process_batch(
|
|
1398
|
+
public process_batch(
|
|
1399
|
+
changes: DocumentChange[],
|
|
1400
|
+
dry_run: boolean = false,
|
|
1401
|
+
): any {
|
|
1322
1402
|
if (dry_run) {
|
|
1323
1403
|
const baselines = new Map<any, Element>();
|
|
1324
1404
|
for (const part of this.doc.pkg.parts) {
|
|
@@ -1345,7 +1425,10 @@ export class RedlineEngine {
|
|
|
1345
1425
|
}
|
|
1346
1426
|
}
|
|
1347
1427
|
|
|
1348
|
-
private _process_batch_internal(
|
|
1428
|
+
private _process_batch_internal(
|
|
1429
|
+
changes: DocumentChange[],
|
|
1430
|
+
dry_run_mode: boolean = false,
|
|
1431
|
+
): any {
|
|
1349
1432
|
this.skipped_details = [];
|
|
1350
1433
|
const actions = changes.filter((c) =>
|
|
1351
1434
|
["accept", "reject", "reply"].includes(c.type),
|
|
@@ -1398,7 +1481,9 @@ export class RedlineEngine {
|
|
|
1398
1481
|
if (dry_run_mode) {
|
|
1399
1482
|
for (const edit of edits) {
|
|
1400
1483
|
const single_errors = this.validate_edits([edit]);
|
|
1401
|
-
const warning = this._check_punctuation_warning(
|
|
1484
|
+
const warning = this._check_punctuation_warning(
|
|
1485
|
+
(edit as any).target_text || "",
|
|
1486
|
+
);
|
|
1402
1487
|
if (single_errors.length > 0) {
|
|
1403
1488
|
skipped_edits++;
|
|
1404
1489
|
edits_reports.push({
|
|
@@ -1428,7 +1513,10 @@ export class RedlineEngine {
|
|
|
1428
1513
|
});
|
|
1429
1514
|
} else {
|
|
1430
1515
|
skipped_edits++;
|
|
1431
|
-
const error_msg =
|
|
1516
|
+
const error_msg =
|
|
1517
|
+
this.skipped_details.length > 0
|
|
1518
|
+
? this.skipped_details[this.skipped_details.length - 1]
|
|
1519
|
+
: "Failed to apply edit";
|
|
1432
1520
|
edits_reports.push({
|
|
1433
1521
|
status: "failed",
|
|
1434
1522
|
target_text: (edit as any).target_text || "",
|
|
@@ -1445,7 +1533,7 @@ export class RedlineEngine {
|
|
|
1445
1533
|
if (errors.length > 0) {
|
|
1446
1534
|
throw new BatchValidationError(errors);
|
|
1447
1535
|
}
|
|
1448
|
-
const cloned_edits = edits.map(e => JSON.parse(JSON.stringify(e)));
|
|
1536
|
+
const cloned_edits = edits.map((e) => JSON.parse(JSON.stringify(e)));
|
|
1449
1537
|
const res = this.apply_edits(cloned_edits);
|
|
1450
1538
|
applied_edits = res[0];
|
|
1451
1539
|
skipped_edits = res[1];
|
|
@@ -1453,7 +1541,9 @@ export class RedlineEngine {
|
|
|
1453
1541
|
for (const edit of cloned_edits) {
|
|
1454
1542
|
const success = (edit as any)._applied_status || false;
|
|
1455
1543
|
const error_msg = (edit as any)._error_msg || null;
|
|
1456
|
-
const warning = this._check_punctuation_warning(
|
|
1544
|
+
const warning = this._check_punctuation_warning(
|
|
1545
|
+
(edit as any).target_text || "",
|
|
1546
|
+
);
|
|
1457
1547
|
let critic_markup = null;
|
|
1458
1548
|
let clean_text = null;
|
|
1459
1549
|
if (success) {
|
|
@@ -1482,7 +1572,7 @@ export class RedlineEngine {
|
|
|
1482
1572
|
skipped_details: this.skipped_details,
|
|
1483
1573
|
edits: edits_reports,
|
|
1484
1574
|
engine: "node",
|
|
1485
|
-
version: "1.
|
|
1575
|
+
version: "1.10.0",
|
|
1486
1576
|
};
|
|
1487
1577
|
}
|
|
1488
1578
|
|
|
@@ -1523,7 +1613,9 @@ export class RedlineEngine {
|
|
|
1523
1613
|
} else {
|
|
1524
1614
|
skipped++;
|
|
1525
1615
|
edit._applied_status = false;
|
|
1526
|
-
const target_snippet = (edit.target_text || "")
|
|
1616
|
+
const target_snippet = (edit.target_text || "")
|
|
1617
|
+
.trim()
|
|
1618
|
+
.substring(0, 40);
|
|
1527
1619
|
const msg = `- Failed to locate row target: '${target_snippet}...'`;
|
|
1528
1620
|
this.skipped_details.push(msg);
|
|
1529
1621
|
edit._error_msg = msg;
|
|
@@ -1535,7 +1627,10 @@ export class RedlineEngine {
|
|
|
1535
1627
|
for (const r of resolved) {
|
|
1536
1628
|
r._resolved_start_idx = r._match_start_index;
|
|
1537
1629
|
r._parent_edit_ref = edit;
|
|
1538
|
-
if (
|
|
1630
|
+
if (
|
|
1631
|
+
edit._resolved_start_idx === undefined ||
|
|
1632
|
+
edit._resolved_start_idx === null
|
|
1633
|
+
) {
|
|
1539
1634
|
edit._resolved_start_idx = r._resolved_start_idx;
|
|
1540
1635
|
}
|
|
1541
1636
|
if (!edit._resolved_proxy_edit) {
|
|
@@ -1563,7 +1658,8 @@ export class RedlineEngine {
|
|
|
1563
1658
|
}
|
|
1564
1659
|
|
|
1565
1660
|
resolved_edits.sort(
|
|
1566
|
-
(a, b) =>
|
|
1661
|
+
(a, b) =>
|
|
1662
|
+
(b[0]._resolved_start_idx || 0) - (a[0]._resolved_start_idx || 0),
|
|
1567
1663
|
);
|
|
1568
1664
|
const occupied_ranges: [number, number][] = [];
|
|
1569
1665
|
|
|
@@ -1722,7 +1818,11 @@ export class RedlineEngine {
|
|
|
1722
1818
|
}
|
|
1723
1819
|
|
|
1724
1820
|
private _apply_table_edit(edit: any, rebuild_map: boolean): boolean {
|
|
1725
|
-
const start_idx =
|
|
1821
|
+
const start_idx =
|
|
1822
|
+
edit._resolved_start_idx !== undefined &&
|
|
1823
|
+
edit._resolved_start_idx !== null
|
|
1824
|
+
? edit._resolved_start_idx
|
|
1825
|
+
: edit._match_start_index || 0;
|
|
1726
1826
|
const [anchor_run, anchor_para] = this.mapper.get_insertion_anchor(
|
|
1727
1827
|
start_idx,
|
|
1728
1828
|
rebuild_map,
|
|
@@ -1877,7 +1977,11 @@ export class RedlineEngine {
|
|
|
1877
1977
|
): boolean {
|
|
1878
1978
|
let op = edit._internal_op;
|
|
1879
1979
|
const active_mapper = edit._active_mapper_ref || this.mapper;
|
|
1880
|
-
const start_idx =
|
|
1980
|
+
const start_idx =
|
|
1981
|
+
edit._resolved_start_idx !== undefined &&
|
|
1982
|
+
edit._resolved_start_idx !== null
|
|
1983
|
+
? edit._resolved_start_idx
|
|
1984
|
+
: edit._match_start_index || 0;
|
|
1881
1985
|
const length = edit.target_text ? edit.target_text.length : 0;
|
|
1882
1986
|
|
|
1883
1987
|
const del_id = ["DELETION", "MODIFICATION"].includes(op)
|
|
@@ -1967,7 +2071,9 @@ export class RedlineEngine {
|
|
|
1967
2071
|
) {
|
|
1968
2072
|
const body = _bug233_target_para.parentNode as Element;
|
|
1969
2073
|
const xmlDoc = this.doc.part._element.ownerDocument!;
|
|
1970
|
-
const lines = _bug233_new
|
|
2074
|
+
const lines = _bug233_new
|
|
2075
|
+
.split(/[\r\n]+/)
|
|
2076
|
+
.filter((l: string) => l !== "");
|
|
1971
2077
|
let firstNew: Element | null = null;
|
|
1972
2078
|
let lastNew: Element | null = null;
|
|
1973
2079
|
let lastIns: Element | null = null;
|
|
@@ -2069,7 +2175,9 @@ export class RedlineEngine {
|
|
|
2069
2175
|
if (start_p) {
|
|
2070
2176
|
let first_anchor_target = result.first_node;
|
|
2071
2177
|
if (result.first_node.tagName === "w:p") {
|
|
2072
|
-
first_anchor_target =
|
|
2178
|
+
first_anchor_target =
|
|
2179
|
+
findAllDescendants(result.first_node, "w:ins")[0] ||
|
|
2180
|
+
result.first_node;
|
|
2073
2181
|
}
|
|
2074
2182
|
const start_anchor = ascend_to_paragraph_child(
|
|
2075
2183
|
first_anchor_target,
|
|
@@ -2086,22 +2194,27 @@ export class RedlineEngine {
|
|
|
2086
2194
|
end_anchor,
|
|
2087
2195
|
edit.comment,
|
|
2088
2196
|
);
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2197
|
+
}
|
|
2198
|
+
} else {
|
|
2199
|
+
// Inline only: anchor around first_node in its host paragraph.
|
|
2200
|
+
let host_p: Element | null = result.first_node;
|
|
2201
|
+
while (host_p && host_p.tagName !== "w:p")
|
|
2094
2202
|
host_p = host_p.parentNode as Element;
|
|
2095
|
-
|
|
2203
|
+
if (host_p) {
|
|
2096
2204
|
let first_anchor_target = result.first_node;
|
|
2097
2205
|
if (result.first_node.tagName === "w:p") {
|
|
2098
|
-
first_anchor_target =
|
|
2206
|
+
first_anchor_target =
|
|
2207
|
+
findAllDescendants(result.first_node, "w:ins")[0] ||
|
|
2208
|
+
result.first_node;
|
|
2099
2209
|
}
|
|
2100
|
-
const anchor = ascend_to_paragraph_child(
|
|
2210
|
+
const anchor = ascend_to_paragraph_child(
|
|
2211
|
+
first_anchor_target,
|
|
2212
|
+
host_p,
|
|
2213
|
+
);
|
|
2101
2214
|
this._attach_comment(host_p, anchor, anchor, edit.comment);
|
|
2102
|
-
}
|
|
2103
2215
|
}
|
|
2104
2216
|
}
|
|
2217
|
+
}
|
|
2105
2218
|
return true;
|
|
2106
2219
|
}
|
|
2107
2220
|
|
|
@@ -2111,7 +2224,10 @@ export class RedlineEngine {
|
|
|
2111
2224
|
length,
|
|
2112
2225
|
rebuild_map,
|
|
2113
2226
|
);
|
|
2114
|
-
const virtual_spans = active_mapper.get_virtual_spans_in_range(
|
|
2227
|
+
const virtual_spans = active_mapper.get_virtual_spans_in_range(
|
|
2228
|
+
start_idx,
|
|
2229
|
+
length,
|
|
2230
|
+
);
|
|
2115
2231
|
|
|
2116
2232
|
if (target_runs.length === 0 && virtual_spans.length === 0) return false;
|
|
2117
2233
|
|
|
@@ -2187,18 +2303,26 @@ export class RedlineEngine {
|
|
|
2187
2303
|
|
|
2188
2304
|
// PHASE 2: OOXML Paragraph Merge Protocol
|
|
2189
2305
|
if (op === "DELETION" || op === "MODIFICATION") {
|
|
2190
|
-
if (
|
|
2306
|
+
if (
|
|
2307
|
+
op === "MODIFICATION" &&
|
|
2308
|
+
target_runs.length === 0 &&
|
|
2309
|
+
virtual_spans.length > 0 &&
|
|
2310
|
+
edit.new_text
|
|
2311
|
+
) {
|
|
2191
2312
|
const first_span = virtual_spans[0];
|
|
2192
2313
|
if (first_span.paragraph) {
|
|
2193
2314
|
const p1_el = first_span.paragraph._element;
|
|
2194
2315
|
const last_runs = findAllDescendants(p1_el, "w:r");
|
|
2195
|
-
const anchor =
|
|
2196
|
-
|
|
2316
|
+
const anchor =
|
|
2317
|
+
last_runs.length > 0
|
|
2318
|
+
? new Run(last_runs[last_runs.length - 1], first_span.paragraph)
|
|
2319
|
+
: null;
|
|
2320
|
+
|
|
2197
2321
|
const result = this._track_insert_multiline(
|
|
2198
2322
|
edit.new_text,
|
|
2199
2323
|
anchor,
|
|
2200
2324
|
first_span.paragraph,
|
|
2201
|
-
ins_id
|
|
2325
|
+
ins_id!,
|
|
2202
2326
|
);
|
|
2203
2327
|
if (result.first_node) {
|
|
2204
2328
|
p1_el.appendChild(result.first_node);
|
|
@@ -2218,7 +2342,10 @@ export class RedlineEngine {
|
|
|
2218
2342
|
let pPr = findChild(p1_element, "w:pPr");
|
|
2219
2343
|
if (!pPr) {
|
|
2220
2344
|
pPr = p1_element.ownerDocument!.createElement("w:pPr") as Element;
|
|
2221
|
-
p1_element.insertBefore(
|
|
2345
|
+
p1_element.insertBefore(
|
|
2346
|
+
pPr,
|
|
2347
|
+
p1_element.firstChild as Node | null,
|
|
2348
|
+
);
|
|
2222
2349
|
}
|
|
2223
2350
|
let rPr = findChild(pPr!, "w:rPr");
|
|
2224
2351
|
if (!rPr) {
|
|
@@ -2230,7 +2357,10 @@ export class RedlineEngine {
|
|
|
2230
2357
|
|
|
2231
2358
|
const children = Array.from(p2_element.childNodes);
|
|
2232
2359
|
for (const child of children) {
|
|
2233
|
-
if (
|
|
2360
|
+
if (
|
|
2361
|
+
child.nodeType === 1 &&
|
|
2362
|
+
(child as Element).tagName === "w:pPr"
|
|
2363
|
+
) {
|
|
2234
2364
|
continue;
|
|
2235
2365
|
}
|
|
2236
2366
|
p1_element.appendChild(child);
|
package/src/index.ts
CHANGED
|
@@ -9,5 +9,5 @@ export { generate_edits_from_text, trim_common_context, create_unified_diff, cre
|
|
|
9
9
|
export { apply_edits_to_markdown } from './markup.js';
|
|
10
10
|
export { paginate, split_structural_appendix, PaginationResult, PageInfo } from './pagination.js';
|
|
11
11
|
export { extract_outline, OutlineNode } from './outline.js';
|
|
12
|
-
export { extractTextFromBuffer } from './ingest.js';
|
|
12
|
+
export { extractTextFromBuffer, _extractTextFromDoc } from './ingest.js';
|
|
13
13
|
export { finalize_document, FinalizeOptions, FinalizeResult } from './sanitize/core.js';
|
package/src/ingest.ts
CHANGED
|
@@ -11,18 +11,24 @@ import { extract_comments_data } from './comments.js';
|
|
|
11
11
|
|
|
12
12
|
export async function extractTextFromBuffer(buffer: Buffer, cleanView = false): Promise<string> {
|
|
13
13
|
const doc = await DocumentObject.load(buffer);
|
|
14
|
-
return _extractTextFromDoc(doc, cleanView);
|
|
14
|
+
return _extractTextFromDoc(doc, cleanView) as string;
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
export function _extractTextFromDoc(
|
|
17
|
+
export function _extractTextFromDoc(
|
|
18
|
+
doc: DocumentObject,
|
|
19
|
+
cleanView = false,
|
|
20
|
+
includeAppendix = true,
|
|
21
|
+
return_paragraph_offsets = false,
|
|
22
|
+
): string | { text: string; paragraph_offsets: Map<any, [number, number]> } {
|
|
18
23
|
const comments_map = extract_comments_data(doc.pkg);
|
|
19
24
|
|
|
20
25
|
const full_text: string[] = [];
|
|
26
|
+
const paragraph_offsets = new Map<any, [number, number]>();
|
|
21
27
|
let cursor = 0;
|
|
22
28
|
|
|
23
29
|
for (const part of iter_document_parts(doc)) {
|
|
24
30
|
const part_cursor = full_text.length > 0 ? cursor + 2 : cursor;
|
|
25
|
-
const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor);
|
|
31
|
+
const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor, return_paragraph_offsets ? paragraph_offsets : undefined);
|
|
26
32
|
if (part_text) {
|
|
27
33
|
if (full_text.length > 0) cursor += 2;
|
|
28
34
|
full_text.push(part_text);
|
|
@@ -37,10 +43,19 @@ export function _extractTextFromDoc(doc: DocumentObject, cleanView = false, incl
|
|
|
37
43
|
if (appendix) base_text += appendix;
|
|
38
44
|
}
|
|
39
45
|
|
|
46
|
+
if (return_paragraph_offsets) {
|
|
47
|
+
return { text: base_text, paragraph_offsets };
|
|
48
|
+
}
|
|
40
49
|
return base_text;
|
|
41
50
|
}
|
|
42
51
|
|
|
43
|
-
function _extract_blocks(
|
|
52
|
+
function _extract_blocks(
|
|
53
|
+
container: any,
|
|
54
|
+
comments_map: any,
|
|
55
|
+
cleanView: boolean,
|
|
56
|
+
cursor: number,
|
|
57
|
+
paragraph_offsets?: Map<any, [number, number]>
|
|
58
|
+
): string {
|
|
44
59
|
const part = container.part || container;
|
|
45
60
|
const [style_cache, default_pstyle] = _get_style_cache(part);
|
|
46
61
|
|
|
@@ -62,7 +77,7 @@ function _extract_blocks(container: any, comments_map: any, cleanView: boolean,
|
|
|
62
77
|
const block_start = local_cursor;
|
|
63
78
|
|
|
64
79
|
if (item.constructor.name === 'FootnoteItem') {
|
|
65
|
-
const fn_text = _extract_blocks(item, comments_map, cleanView, block_start);
|
|
80
|
+
const fn_text = _extract_blocks(item, comments_map, cleanView, block_start, paragraph_offsets);
|
|
66
81
|
if (fn_text) {
|
|
67
82
|
blocks.push(fn_text);
|
|
68
83
|
local_cursor = block_start + fn_text.length;
|
|
@@ -78,11 +93,14 @@ function _extract_blocks(container: any, comments_map: any, cleanView: boolean,
|
|
|
78
93
|
const p_text = build_paragraph_text(item, comments_map, cleanView, style_cache, default_pstyle);
|
|
79
94
|
const full_block = prefix + p_text;
|
|
80
95
|
blocks.push(full_block);
|
|
96
|
+
if (paragraph_offsets) {
|
|
97
|
+
paragraph_offsets.set(item._element, [block_start, full_block.length]);
|
|
98
|
+
}
|
|
81
99
|
local_cursor = block_start + full_block.length;
|
|
82
100
|
is_first_para = false;
|
|
83
101
|
is_first_block = false;
|
|
84
102
|
} else if (item instanceof Table) {
|
|
85
|
-
const table_text = extract_table(item, comments_map, cleanView, block_start);
|
|
103
|
+
const table_text = extract_table(item, comments_map, cleanView, block_start, paragraph_offsets);
|
|
86
104
|
if (table_text) {
|
|
87
105
|
blocks.push(table_text);
|
|
88
106
|
local_cursor = block_start + table_text.length;
|
|
@@ -97,7 +115,13 @@ function _extract_blocks(container: any, comments_map: any, cleanView: boolean,
|
|
|
97
115
|
return blocks.join('\n\n');
|
|
98
116
|
}
|
|
99
117
|
|
|
100
|
-
export function extract_table(
|
|
118
|
+
export function extract_table(
|
|
119
|
+
table: Table,
|
|
120
|
+
comments_map: any,
|
|
121
|
+
cleanView: boolean,
|
|
122
|
+
cursor: number,
|
|
123
|
+
paragraph_offsets?: Map<any, [number, number]>
|
|
124
|
+
): string {
|
|
101
125
|
const rows_text: string[] = [];
|
|
102
126
|
let rows_processed = 0;
|
|
103
127
|
let local_cursor = cursor;
|
|
@@ -124,7 +148,7 @@ export function extract_table(table: Table, comments_map: any, cleanView: boolea
|
|
|
124
148
|
|
|
125
149
|
if (!first_cell) cell_cursor += 3;
|
|
126
150
|
|
|
127
|
-
const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor);
|
|
151
|
+
const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor, paragraph_offsets);
|
|
128
152
|
cell_texts.push(cell_content);
|
|
129
153
|
cell_cursor += cell_content.length;
|
|
130
154
|
first_cell = false;
|