@adeu/core 1.10.0 → 1.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adeu/core",
3
- "version": "1.10.0",
3
+ "version": "1.11.2",
4
4
  "description": "",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -251,14 +251,14 @@ describe("Resolved Bugs Core Engine Verification", () => {
251
251
  const p = addParagraph(doc, "Short heading");
252
252
 
253
253
  const fakeCache = {
254
- CustomHeading: { name: "Custom Heading", outline_level: 2, bold: true },
254
+ Heading3: { name: "Heading 3", outline_level: 2, bold: true },
255
255
  };
256
256
  (doc.pkg as any)._adeu_style_cache = [fakeCache, "Normal"];
257
257
 
258
258
  const docEl = p.ownerDocument!;
259
259
  const pPr = docEl.createElement("w:pPr");
260
260
  const pStyle = docEl.createElement("w:pStyle");
261
- pStyle.setAttribute("w:val", "CustomHeading");
261
+ pStyle.setAttribute("w:val", "Heading3");
262
262
  pPr.appendChild(pStyle);
263
263
  p.insertBefore(pPr, p.firstChild);
264
264
 
package/src/engine.ts CHANGED
@@ -221,6 +221,7 @@ export class RedlineEngine {
221
221
  public mapper: DocumentMapper;
222
222
  public comments_manager: CommentsManager;
223
223
  public clean_mapper: DocumentMapper | null = null;
224
+ public original_mapper: DocumentMapper | null = null;
224
225
  public skipped_details: string[] = [];
225
226
 
226
227
  constructor(doc: DocumentObject, author: string = "Adeu AI (TS)") {
@@ -255,7 +256,9 @@ export class RedlineEngine {
255
256
  return null;
256
257
  }
257
258
 
258
- private _build_edit_context_previews(edit: any): [string | null, string | null] {
259
+ private _build_edit_context_previews(
260
+ edit: any,
261
+ ): [string | null, string | null] {
259
262
  if (edit.type !== "modify") return [null, null];
260
263
  if (edit._resolved_proxy_edit) {
261
264
  edit = edit._resolved_proxy_edit;
@@ -271,7 +274,10 @@ export class RedlineEngine {
271
274
 
272
275
  const before_start = Math.max(0, start_idx - 30);
273
276
  const context_before = full_text.substring(before_start, start_idx);
274
- const context_after = full_text.substring(start_idx + length, start_idx + length + 30);
277
+ const context_after = full_text.substring(
278
+ start_idx + length,
279
+ start_idx + length + 30,
280
+ );
275
281
 
276
282
  const critic_markup = `${context_before}{--${target_text}--}{++${new_text}++}${context_after}`;
277
283
 
@@ -337,7 +343,7 @@ export class RedlineEngine {
337
343
  for (const tag of ["w:t", "w:tab", "w:br"]) {
338
344
  for (const child of findAllDescendants(p, tag)) {
339
345
  if (tag === "w:t" && !child.textContent) continue;
340
-
346
+
341
347
  let is_deleted = false;
342
348
  let curr = child.parentNode as Element | null;
343
349
  while (curr && curr !== p) {
@@ -401,7 +407,10 @@ export class RedlineEngine {
401
407
  if (parent) {
402
408
  if (parent.tagName === "w:r" || parent.tagName.endsWith(":r")) {
403
409
  const nonRprChildren = Array.from(parent.childNodes).filter(
404
- (c) => c.nodeType === 1 && (c as Element).tagName !== "w:rPr" && (c as Element).tagName !== "rPr"
410
+ (c) =>
411
+ c.nodeType === 1 &&
412
+ (c as Element).tagName !== "w:rPr" &&
413
+ (c as Element).tagName !== "rPr",
405
414
  );
406
415
  if (nonRprChildren.length <= 1) {
407
416
  parent.parentNode?.removeChild(parent);
@@ -420,8 +429,12 @@ export class RedlineEngine {
420
429
  for (const part of pkg.parts) {
421
430
  if (part.partname.toLowerCase().includes("comments")) {
422
431
  comment_partnames.add(part.partname);
423
- const withSlash = part.partname.startsWith("/") ? part.partname : "/" + part.partname;
424
- const withoutSlash = part.partname.startsWith("/") ? part.partname.substring(1) : part.partname;
432
+ const withSlash = part.partname.startsWith("/")
433
+ ? part.partname
434
+ : "/" + part.partname;
435
+ const withoutSlash = part.partname.startsWith("/")
436
+ ? part.partname.substring(1)
437
+ : part.partname;
425
438
  comment_partnames.add(withSlash);
426
439
  comment_partnames.add(withoutSlash);
427
440
  }
@@ -437,8 +450,10 @@ export class RedlineEngine {
437
450
  const target = rel.getAttribute("Target") || "";
438
451
  if (target.toLowerCase().includes("comments")) {
439
452
  toRemove.push(rel);
440
-
441
- const sourcePath = part.partname.replace("/_rels/", "/").replace(".rels", "");
453
+
454
+ const sourcePath = part.partname
455
+ .replace("/_rels/", "/")
456
+ .replace(".rels", "");
442
457
  const sourcePart = pkg.getPartByPath(sourcePath);
443
458
  if (sourcePart) {
444
459
  const relId = rel.getAttribute("Id");
@@ -459,7 +474,10 @@ export class RedlineEngine {
459
474
  const toRemove: Element[] = [];
460
475
  for (const override of overrides) {
461
476
  const partName = override.getAttribute("PartName") || "";
462
- if (comment_partnames.has(partName) || partName.toLowerCase().includes("comments")) {
477
+ if (
478
+ comment_partnames.has(partName) ||
479
+ partName.toLowerCase().includes("comments")
480
+ ) {
463
481
  toRemove.push(override);
464
482
  }
465
483
  }
@@ -469,7 +487,9 @@ export class RedlineEngine {
469
487
  }
470
488
 
471
489
  // Remove comment parts from pkg.parts
472
- pkg.parts = pkg.parts.filter(p => !p.partname.toLowerCase().includes("comments"));
490
+ pkg.parts = pkg.parts.filter(
491
+ (p) => !p.partname.toLowerCase().includes("comments"),
492
+ );
473
493
 
474
494
  // Remove comment files from pkg.unzipped
475
495
  for (const key of Object.keys(pkg.unzipped)) {
@@ -1176,12 +1196,16 @@ export class RedlineEngine {
1176
1196
 
1177
1197
  let matches = this.mapper.find_all_match_indices(edit.target_text);
1178
1198
  let activeText = this.mapper.full_text;
1199
+ let target_mapper = this.mapper;
1179
1200
 
1180
1201
  if (matches.length === 0) {
1181
1202
  if (!this.clean_mapper)
1182
1203
  this.clean_mapper = new DocumentMapper(this.doc, true);
1183
1204
  matches = this.clean_mapper.find_all_match_indices(edit.target_text);
1184
- if (matches.length > 0) activeText = this.clean_mapper.full_text;
1205
+ if (matches.length > 0) {
1206
+ activeText = this.clean_mapper.full_text;
1207
+ target_mapper = this.clean_mapper;
1208
+ }
1185
1209
  }
1186
1210
 
1187
1211
  // BUG-23-5: a copy of the target that lives entirely inside a tracked
@@ -1189,7 +1213,7 @@ export class RedlineEngine {
1189
1213
  // count toward ambiguity. Drop matches whose overlapping real text is
1190
1214
  // exclusively deleted. Only applies to the raw mapper (the clean mapper
1191
1215
  // already omits deleted text).
1192
- if (activeText === this.mapper.full_text && matches.length > 1) {
1216
+ if (activeText === this.mapper.full_text && matches.length > 0) {
1193
1217
  const liveMatches = matches.filter(([start, length]) => {
1194
1218
  const realSpans = this.mapper.spans.filter(
1195
1219
  (s) => s.run !== null && s.end > start && s.start < start + length,
@@ -1199,13 +1223,55 @@ export class RedlineEngine {
1199
1223
  // part of a tracked deletion).
1200
1224
  return realSpans.some((s) => !s.del_id);
1201
1225
  });
1202
- if (liveMatches.length > 0) matches = liveMatches;
1226
+ matches = liveMatches;
1203
1227
  }
1204
1228
 
1229
+ let is_deleted_text = false;
1230
+ const deleted_authors = new Set<string>();
1231
+
1205
1232
  if (matches.length === 0) {
1206
- errors.push(
1207
- `- Edit ${i + 1} Failed: Target text not found in document:\n "${edit.target_text}"`,
1208
- );
1233
+ if (!this.original_mapper) {
1234
+ this.original_mapper = new DocumentMapper(this.doc, false, true);
1235
+ }
1236
+ const orig_matches = this.original_mapper.find_all_match_indices(edit.target_text);
1237
+ if (orig_matches.length > 0) {
1238
+ is_deleted_text = true;
1239
+ for (const [start, length] of orig_matches) {
1240
+ const spans = this.original_mapper.spans.filter(
1241
+ (s) => s.end > start && s.start < start + length,
1242
+ );
1243
+ for (const s of spans) {
1244
+ if (s.run !== null) {
1245
+ let parent = s.run._element as Node | null;
1246
+ while (parent) {
1247
+ if (parent.nodeType === 1 && (parent as Element).tagName === "w:del") {
1248
+ const auth = (parent as Element).getAttribute("w:author");
1249
+ if (auth) {
1250
+ deleted_authors.add(auth);
1251
+ }
1252
+ break;
1253
+ }
1254
+ parent = parent.parentNode;
1255
+ }
1256
+ }
1257
+ }
1258
+ }
1259
+ }
1260
+ }
1261
+
1262
+ if (matches.length === 0) {
1263
+ if (is_deleted_text) {
1264
+ const author_phrase = deleted_authors.size > 0
1265
+ ? `by ${Array.from(deleted_authors).sort().join(", ")}`
1266
+ : "by an existing revision";
1267
+ errors.push(
1268
+ `- Edit ${i + 1} Failed: Target text matches text inside a tracked deletion ${author_phrase}. Reject/accept that change first or target the active replacement text instead.`,
1269
+ );
1270
+ } else {
1271
+ errors.push(
1272
+ `- Edit ${i + 1} Failed: Target text not found in document:\n "${edit.target_text}"`,
1273
+ );
1274
+ }
1209
1275
  } else if (matches.length > 1) {
1210
1276
  const positions: [number, number][] = matches.map(([start, length]) => [
1211
1277
  start,
@@ -1230,18 +1296,29 @@ export class RedlineEngine {
1230
1296
  const [pfx, sfx] = trim_common_context(matched, edit.new_text || "");
1231
1297
  const t_end = matched.length - sfx;
1232
1298
  const final_target = matched.substring(pfx, t_end);
1233
- const final_new = (edit.new_text || "").substring(pfx, (edit.new_text || "").length - sfx);
1299
+ const final_new = (edit.new_text || "").substring(
1300
+ pfx,
1301
+ (edit.new_text || "").length - sfx,
1302
+ );
1234
1303
  if (final_target.includes("\n\n")) {
1235
1304
  if (final_new.includes("\n\n")) {
1236
1305
  const parts = matched.split("\n\n");
1237
- if (parts.length >= 2 && parts[0].trim() !== "" && parts[parts.length - 1].trim() !== "") {
1306
+ if (
1307
+ parts.length >= 2 &&
1308
+ parts[0].trim() !== "" &&
1309
+ parts[parts.length - 1].trim() !== ""
1310
+ ) {
1238
1311
  errors.push(
1239
1312
  `- Edit ${i + 1} Failed: target_text spans a paragraph boundary with body text on both sides. The paragraph break is a structural element, not literal text, so it cannot be replaced as a single span without corrupting the document. Split this into one edit per paragraph.`,
1240
1313
  );
1241
1314
  }
1242
1315
  } else {
1243
1316
  const parts = final_target.split("\n\n");
1244
- if (parts.length >= 2 && parts[0].trim() !== "" && parts[parts.length - 1].trim() !== "") {
1317
+ if (
1318
+ parts.length >= 2 &&
1319
+ parts[0].trim() !== "" &&
1320
+ parts[parts.length - 1].trim() !== ""
1321
+ ) {
1245
1322
  errors.push(
1246
1323
  `- Edit ${i + 1} Failed: target_text spans a paragraph boundary with body text on both sides. The paragraph break is a structural element, not literal text, so it cannot be replaced as a single span without corrupting the document. Split this into one edit per paragraph.`,
1247
1324
  );
@@ -1318,7 +1395,10 @@ export class RedlineEngine {
1318
1395
  return errors;
1319
1396
  }
1320
1397
 
1321
- public process_batch(changes: DocumentChange[], dry_run: boolean = false): any {
1398
+ public process_batch(
1399
+ changes: DocumentChange[],
1400
+ dry_run: boolean = false,
1401
+ ): any {
1322
1402
  if (dry_run) {
1323
1403
  const baselines = new Map<any, Element>();
1324
1404
  for (const part of this.doc.pkg.parts) {
@@ -1345,7 +1425,10 @@ export class RedlineEngine {
1345
1425
  }
1346
1426
  }
1347
1427
 
1348
- private _process_batch_internal(changes: DocumentChange[], dry_run_mode: boolean = false): any {
1428
+ private _process_batch_internal(
1429
+ changes: DocumentChange[],
1430
+ dry_run_mode: boolean = false,
1431
+ ): any {
1349
1432
  this.skipped_details = [];
1350
1433
  const actions = changes.filter((c) =>
1351
1434
  ["accept", "reject", "reply"].includes(c.type),
@@ -1398,7 +1481,9 @@ export class RedlineEngine {
1398
1481
  if (dry_run_mode) {
1399
1482
  for (const edit of edits) {
1400
1483
  const single_errors = this.validate_edits([edit]);
1401
- const warning = this._check_punctuation_warning((edit as any).target_text || "");
1484
+ const warning = this._check_punctuation_warning(
1485
+ (edit as any).target_text || "",
1486
+ );
1402
1487
  if (single_errors.length > 0) {
1403
1488
  skipped_edits++;
1404
1489
  edits_reports.push({
@@ -1428,7 +1513,10 @@ export class RedlineEngine {
1428
1513
  });
1429
1514
  } else {
1430
1515
  skipped_edits++;
1431
- const error_msg = this.skipped_details.length > 0 ? this.skipped_details[this.skipped_details.length - 1] : "Failed to apply edit";
1516
+ const error_msg =
1517
+ this.skipped_details.length > 0
1518
+ ? this.skipped_details[this.skipped_details.length - 1]
1519
+ : "Failed to apply edit";
1432
1520
  edits_reports.push({
1433
1521
  status: "failed",
1434
1522
  target_text: (edit as any).target_text || "",
@@ -1445,7 +1533,7 @@ export class RedlineEngine {
1445
1533
  if (errors.length > 0) {
1446
1534
  throw new BatchValidationError(errors);
1447
1535
  }
1448
- const cloned_edits = edits.map(e => JSON.parse(JSON.stringify(e)));
1536
+ const cloned_edits = edits.map((e) => JSON.parse(JSON.stringify(e)));
1449
1537
  const res = this.apply_edits(cloned_edits);
1450
1538
  applied_edits = res[0];
1451
1539
  skipped_edits = res[1];
@@ -1453,7 +1541,9 @@ export class RedlineEngine {
1453
1541
  for (const edit of cloned_edits) {
1454
1542
  const success = (edit as any)._applied_status || false;
1455
1543
  const error_msg = (edit as any)._error_msg || null;
1456
- const warning = this._check_punctuation_warning((edit as any).target_text || "");
1544
+ const warning = this._check_punctuation_warning(
1545
+ (edit as any).target_text || "",
1546
+ );
1457
1547
  let critic_markup = null;
1458
1548
  let clean_text = null;
1459
1549
  if (success) {
@@ -1482,7 +1572,7 @@ export class RedlineEngine {
1482
1572
  skipped_details: this.skipped_details,
1483
1573
  edits: edits_reports,
1484
1574
  engine: "node",
1485
- version: "1.9.0",
1575
+ version: "1.10.0",
1486
1576
  };
1487
1577
  }
1488
1578
 
@@ -1523,7 +1613,9 @@ export class RedlineEngine {
1523
1613
  } else {
1524
1614
  skipped++;
1525
1615
  edit._applied_status = false;
1526
- const target_snippet = (edit.target_text || "").trim().substring(0, 40);
1616
+ const target_snippet = (edit.target_text || "")
1617
+ .trim()
1618
+ .substring(0, 40);
1527
1619
  const msg = `- Failed to locate row target: '${target_snippet}...'`;
1528
1620
  this.skipped_details.push(msg);
1529
1621
  edit._error_msg = msg;
@@ -1535,7 +1627,10 @@ export class RedlineEngine {
1535
1627
  for (const r of resolved) {
1536
1628
  r._resolved_start_idx = r._match_start_index;
1537
1629
  r._parent_edit_ref = edit;
1538
- if (edit._resolved_start_idx === undefined || edit._resolved_start_idx === null) {
1630
+ if (
1631
+ edit._resolved_start_idx === undefined ||
1632
+ edit._resolved_start_idx === null
1633
+ ) {
1539
1634
  edit._resolved_start_idx = r._resolved_start_idx;
1540
1635
  }
1541
1636
  if (!edit._resolved_proxy_edit) {
@@ -1563,7 +1658,8 @@ export class RedlineEngine {
1563
1658
  }
1564
1659
 
1565
1660
  resolved_edits.sort(
1566
- (a, b) => (b[0]._resolved_start_idx || 0) - (a[0]._resolved_start_idx || 0),
1661
+ (a, b) =>
1662
+ (b[0]._resolved_start_idx || 0) - (a[0]._resolved_start_idx || 0),
1567
1663
  );
1568
1664
  const occupied_ranges: [number, number][] = [];
1569
1665
 
@@ -1722,7 +1818,11 @@ export class RedlineEngine {
1722
1818
  }
1723
1819
 
1724
1820
  private _apply_table_edit(edit: any, rebuild_map: boolean): boolean {
1725
- const start_idx = edit._resolved_start_idx !== undefined && edit._resolved_start_idx !== null ? edit._resolved_start_idx : (edit._match_start_index || 0);
1821
+ const start_idx =
1822
+ edit._resolved_start_idx !== undefined &&
1823
+ edit._resolved_start_idx !== null
1824
+ ? edit._resolved_start_idx
1825
+ : edit._match_start_index || 0;
1726
1826
  const [anchor_run, anchor_para] = this.mapper.get_insertion_anchor(
1727
1827
  start_idx,
1728
1828
  rebuild_map,
@@ -1877,7 +1977,11 @@ export class RedlineEngine {
1877
1977
  ): boolean {
1878
1978
  let op = edit._internal_op;
1879
1979
  const active_mapper = edit._active_mapper_ref || this.mapper;
1880
- const start_idx = edit._resolved_start_idx !== undefined && edit._resolved_start_idx !== null ? edit._resolved_start_idx : (edit._match_start_index || 0);
1980
+ const start_idx =
1981
+ edit._resolved_start_idx !== undefined &&
1982
+ edit._resolved_start_idx !== null
1983
+ ? edit._resolved_start_idx
1984
+ : edit._match_start_index || 0;
1881
1985
  const length = edit.target_text ? edit.target_text.length : 0;
1882
1986
 
1883
1987
  const del_id = ["DELETION", "MODIFICATION"].includes(op)
@@ -1967,7 +2071,9 @@ export class RedlineEngine {
1967
2071
  ) {
1968
2072
  const body = _bug233_target_para.parentNode as Element;
1969
2073
  const xmlDoc = this.doc.part._element.ownerDocument!;
1970
- const lines = _bug233_new.split(/[\r\n]+/).filter((l: string) => l !== "");
2074
+ const lines = _bug233_new
2075
+ .split(/[\r\n]+/)
2076
+ .filter((l: string) => l !== "");
1971
2077
  let firstNew: Element | null = null;
1972
2078
  let lastNew: Element | null = null;
1973
2079
  let lastIns: Element | null = null;
@@ -2069,7 +2175,9 @@ export class RedlineEngine {
2069
2175
  if (start_p) {
2070
2176
  let first_anchor_target = result.first_node;
2071
2177
  if (result.first_node.tagName === "w:p") {
2072
- first_anchor_target = findAllDescendants(result.first_node, "w:ins")[0] || result.first_node;
2178
+ first_anchor_target =
2179
+ findAllDescendants(result.first_node, "w:ins")[0] ||
2180
+ result.first_node;
2073
2181
  }
2074
2182
  const start_anchor = ascend_to_paragraph_child(
2075
2183
  first_anchor_target,
@@ -2086,22 +2194,27 @@ export class RedlineEngine {
2086
2194
  end_anchor,
2087
2195
  edit.comment,
2088
2196
  );
2089
- }
2090
- } else {
2091
- // Inline only: anchor around first_node in its host paragraph.
2092
- let host_p: Element | null = result.first_node;
2093
- while (host_p && host_p.tagName !== "w:p")
2197
+ }
2198
+ } else {
2199
+ // Inline only: anchor around first_node in its host paragraph.
2200
+ let host_p: Element | null = result.first_node;
2201
+ while (host_p && host_p.tagName !== "w:p")
2094
2202
  host_p = host_p.parentNode as Element;
2095
- if (host_p) {
2203
+ if (host_p) {
2096
2204
  let first_anchor_target = result.first_node;
2097
2205
  if (result.first_node.tagName === "w:p") {
2098
- first_anchor_target = findAllDescendants(result.first_node, "w:ins")[0] || result.first_node;
2206
+ first_anchor_target =
2207
+ findAllDescendants(result.first_node, "w:ins")[0] ||
2208
+ result.first_node;
2099
2209
  }
2100
- const anchor = ascend_to_paragraph_child(first_anchor_target, host_p);
2210
+ const anchor = ascend_to_paragraph_child(
2211
+ first_anchor_target,
2212
+ host_p,
2213
+ );
2101
2214
  this._attach_comment(host_p, anchor, anchor, edit.comment);
2102
- }
2103
2215
  }
2104
2216
  }
2217
+ }
2105
2218
  return true;
2106
2219
  }
2107
2220
 
@@ -2111,7 +2224,10 @@ export class RedlineEngine {
2111
2224
  length,
2112
2225
  rebuild_map,
2113
2226
  );
2114
- const virtual_spans = active_mapper.get_virtual_spans_in_range(start_idx, length);
2227
+ const virtual_spans = active_mapper.get_virtual_spans_in_range(
2228
+ start_idx,
2229
+ length,
2230
+ );
2115
2231
 
2116
2232
  if (target_runs.length === 0 && virtual_spans.length === 0) return false;
2117
2233
 
@@ -2187,18 +2303,26 @@ export class RedlineEngine {
2187
2303
 
2188
2304
  // PHASE 2: OOXML Paragraph Merge Protocol
2189
2305
  if (op === "DELETION" || op === "MODIFICATION") {
2190
- if (op === "MODIFICATION" && target_runs.length === 0 && virtual_spans.length > 0 && edit.new_text) {
2306
+ if (
2307
+ op === "MODIFICATION" &&
2308
+ target_runs.length === 0 &&
2309
+ virtual_spans.length > 0 &&
2310
+ edit.new_text
2311
+ ) {
2191
2312
  const first_span = virtual_spans[0];
2192
2313
  if (first_span.paragraph) {
2193
2314
  const p1_el = first_span.paragraph._element;
2194
2315
  const last_runs = findAllDescendants(p1_el, "w:r");
2195
- const anchor = last_runs.length > 0 ? new Run(last_runs[last_runs.length - 1], first_span.paragraph) : null;
2196
-
2316
+ const anchor =
2317
+ last_runs.length > 0
2318
+ ? new Run(last_runs[last_runs.length - 1], first_span.paragraph)
2319
+ : null;
2320
+
2197
2321
  const result = this._track_insert_multiline(
2198
2322
  edit.new_text,
2199
2323
  anchor,
2200
2324
  first_span.paragraph,
2201
- ins_id!
2325
+ ins_id!,
2202
2326
  );
2203
2327
  if (result.first_node) {
2204
2328
  p1_el.appendChild(result.first_node);
@@ -2218,7 +2342,10 @@ export class RedlineEngine {
2218
2342
  let pPr = findChild(p1_element, "w:pPr");
2219
2343
  if (!pPr) {
2220
2344
  pPr = p1_element.ownerDocument!.createElement("w:pPr") as Element;
2221
- p1_element.insertBefore(pPr, p1_element.firstChild as Node | null);
2345
+ p1_element.insertBefore(
2346
+ pPr,
2347
+ p1_element.firstChild as Node | null,
2348
+ );
2222
2349
  }
2223
2350
  let rPr = findChild(pPr!, "w:rPr");
2224
2351
  if (!rPr) {
@@ -2230,7 +2357,10 @@ export class RedlineEngine {
2230
2357
 
2231
2358
  const children = Array.from(p2_element.childNodes);
2232
2359
  for (const child of children) {
2233
- if (child.nodeType === 1 && (child as Element).tagName === "w:pPr") {
2360
+ if (
2361
+ child.nodeType === 1 &&
2362
+ (child as Element).tagName === "w:pPr"
2363
+ ) {
2234
2364
  continue;
2235
2365
  }
2236
2366
  p1_element.appendChild(child);
package/src/index.ts CHANGED
@@ -9,5 +9,5 @@ export { generate_edits_from_text, trim_common_context, create_unified_diff, cre
9
9
  export { apply_edits_to_markdown } from './markup.js';
10
10
  export { paginate, split_structural_appendix, PaginationResult, PageInfo } from './pagination.js';
11
11
  export { extract_outline, OutlineNode } from './outline.js';
12
- export { extractTextFromBuffer } from './ingest.js';
12
+ export { extractTextFromBuffer, _extractTextFromDoc } from './ingest.js';
13
13
  export { finalize_document, FinalizeOptions, FinalizeResult } from './sanitize/core.js';
package/src/ingest.ts CHANGED
@@ -11,18 +11,24 @@ import { extract_comments_data } from './comments.js';
11
11
 
12
12
  export async function extractTextFromBuffer(buffer: Buffer, cleanView = false): Promise<string> {
13
13
  const doc = await DocumentObject.load(buffer);
14
- return _extractTextFromDoc(doc, cleanView);
14
+ return _extractTextFromDoc(doc, cleanView) as string;
15
15
  }
16
16
 
17
- export function _extractTextFromDoc(doc: DocumentObject, cleanView = false, includeAppendix = true): string {
17
+ export function _extractTextFromDoc(
18
+ doc: DocumentObject,
19
+ cleanView = false,
20
+ includeAppendix = true,
21
+ return_paragraph_offsets = false,
22
+ ): string | { text: string; paragraph_offsets: Map<any, [number, number]> } {
18
23
  const comments_map = extract_comments_data(doc.pkg);
19
24
 
20
25
  const full_text: string[] = [];
26
+ const paragraph_offsets = new Map<any, [number, number]>();
21
27
  let cursor = 0;
22
28
 
23
29
  for (const part of iter_document_parts(doc)) {
24
30
  const part_cursor = full_text.length > 0 ? cursor + 2 : cursor;
25
- const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor);
31
+ const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor, return_paragraph_offsets ? paragraph_offsets : undefined);
26
32
  if (part_text) {
27
33
  if (full_text.length > 0) cursor += 2;
28
34
  full_text.push(part_text);
@@ -37,10 +43,19 @@ export function _extractTextFromDoc(doc: DocumentObject, cleanView = false, incl
37
43
  if (appendix) base_text += appendix;
38
44
  }
39
45
 
46
+ if (return_paragraph_offsets) {
47
+ return { text: base_text, paragraph_offsets };
48
+ }
40
49
  return base_text;
41
50
  }
42
51
 
43
- function _extract_blocks(container: any, comments_map: any, cleanView: boolean, cursor: number): string {
52
+ function _extract_blocks(
53
+ container: any,
54
+ comments_map: any,
55
+ cleanView: boolean,
56
+ cursor: number,
57
+ paragraph_offsets?: Map<any, [number, number]>
58
+ ): string {
44
59
  const part = container.part || container;
45
60
  const [style_cache, default_pstyle] = _get_style_cache(part);
46
61
 
@@ -62,7 +77,7 @@ function _extract_blocks(container: any, comments_map: any, cleanView: boolean,
62
77
  const block_start = local_cursor;
63
78
 
64
79
  if (item.constructor.name === 'FootnoteItem') {
65
- const fn_text = _extract_blocks(item, comments_map, cleanView, block_start);
80
+ const fn_text = _extract_blocks(item, comments_map, cleanView, block_start, paragraph_offsets);
66
81
  if (fn_text) {
67
82
  blocks.push(fn_text);
68
83
  local_cursor = block_start + fn_text.length;
@@ -78,11 +93,14 @@ function _extract_blocks(container: any, comments_map: any, cleanView: boolean,
78
93
  const p_text = build_paragraph_text(item, comments_map, cleanView, style_cache, default_pstyle);
79
94
  const full_block = prefix + p_text;
80
95
  blocks.push(full_block);
96
+ if (paragraph_offsets) {
97
+ paragraph_offsets.set(item._element, [block_start, full_block.length]);
98
+ }
81
99
  local_cursor = block_start + full_block.length;
82
100
  is_first_para = false;
83
101
  is_first_block = false;
84
102
  } else if (item instanceof Table) {
85
- const table_text = extract_table(item, comments_map, cleanView, block_start);
103
+ const table_text = extract_table(item, comments_map, cleanView, block_start, paragraph_offsets);
86
104
  if (table_text) {
87
105
  blocks.push(table_text);
88
106
  local_cursor = block_start + table_text.length;
@@ -97,7 +115,13 @@ function _extract_blocks(container: any, comments_map: any, cleanView: boolean,
97
115
  return blocks.join('\n\n');
98
116
  }
99
117
 
100
- export function extract_table(table: Table, comments_map: any, cleanView: boolean, cursor: number): string {
118
+ export function extract_table(
119
+ table: Table,
120
+ comments_map: any,
121
+ cleanView: boolean,
122
+ cursor: number,
123
+ paragraph_offsets?: Map<any, [number, number]>
124
+ ): string {
101
125
  const rows_text: string[] = [];
102
126
  let rows_processed = 0;
103
127
  let local_cursor = cursor;
@@ -124,7 +148,7 @@ export function extract_table(table: Table, comments_map: any, cleanView: boolea
124
148
 
125
149
  if (!first_cell) cell_cursor += 3;
126
150
 
127
- const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor);
151
+ const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor, paragraph_offsets);
128
152
  cell_texts.push(cell_content);
129
153
  cell_cursor += cell_content.length;
130
154
  first_cell = false;