@adeu/core 1.10.1 → 1.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adeu/core",
3
- "version": "1.10.1",
3
+ "version": "1.11.2",
4
4
  "description": "",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -251,14 +251,14 @@ describe("Resolved Bugs Core Engine Verification", () => {
251
251
  const p = addParagraph(doc, "Short heading");
252
252
 
253
253
  const fakeCache = {
254
- CustomHeading: { name: "Custom Heading", outline_level: 2, bold: true },
254
+ Heading3: { name: "Heading 3", outline_level: 2, bold: true },
255
255
  };
256
256
  (doc.pkg as any)._adeu_style_cache = [fakeCache, "Normal"];
257
257
 
258
258
  const docEl = p.ownerDocument!;
259
259
  const pPr = docEl.createElement("w:pPr");
260
260
  const pStyle = docEl.createElement("w:pStyle");
261
- pStyle.setAttribute("w:val", "CustomHeading");
261
+ pStyle.setAttribute("w:val", "Heading3");
262
262
  pPr.appendChild(pStyle);
263
263
  p.insertBefore(pPr, p.firstChild);
264
264
 
package/src/engine.ts CHANGED
@@ -221,6 +221,7 @@ export class RedlineEngine {
221
221
  public mapper: DocumentMapper;
222
222
  public comments_manager: CommentsManager;
223
223
  public clean_mapper: DocumentMapper | null = null;
224
+ public original_mapper: DocumentMapper | null = null;
224
225
  public skipped_details: string[] = [];
225
226
 
226
227
  constructor(doc: DocumentObject, author: string = "Adeu AI (TS)") {
@@ -1195,12 +1196,16 @@ export class RedlineEngine {
1195
1196
 
1196
1197
  let matches = this.mapper.find_all_match_indices(edit.target_text);
1197
1198
  let activeText = this.mapper.full_text;
1199
+ let target_mapper = this.mapper;
1198
1200
 
1199
1201
  if (matches.length === 0) {
1200
1202
  if (!this.clean_mapper)
1201
1203
  this.clean_mapper = new DocumentMapper(this.doc, true);
1202
1204
  matches = this.clean_mapper.find_all_match_indices(edit.target_text);
1203
- if (matches.length > 0) activeText = this.clean_mapper.full_text;
1205
+ if (matches.length > 0) {
1206
+ activeText = this.clean_mapper.full_text;
1207
+ target_mapper = this.clean_mapper;
1208
+ }
1204
1209
  }
1205
1210
 
1206
1211
  // BUG-23-5: a copy of the target that lives entirely inside a tracked
@@ -1208,7 +1213,7 @@ export class RedlineEngine {
1208
1213
  // count toward ambiguity. Drop matches whose overlapping real text is
1209
1214
  // exclusively deleted. Only applies to the raw mapper (the clean mapper
1210
1215
  // already omits deleted text).
1211
- if (activeText === this.mapper.full_text && matches.length > 1) {
1216
+ if (activeText === this.mapper.full_text && matches.length > 0) {
1212
1217
  const liveMatches = matches.filter(([start, length]) => {
1213
1218
  const realSpans = this.mapper.spans.filter(
1214
1219
  (s) => s.run !== null && s.end > start && s.start < start + length,
@@ -1218,13 +1223,55 @@ export class RedlineEngine {
1218
1223
  // part of a tracked deletion).
1219
1224
  return realSpans.some((s) => !s.del_id);
1220
1225
  });
1221
- if (liveMatches.length > 0) matches = liveMatches;
1226
+ matches = liveMatches;
1222
1227
  }
1223
1228
 
1229
+ let is_deleted_text = false;
1230
+ const deleted_authors = new Set<string>();
1231
+
1224
1232
  if (matches.length === 0) {
1225
- errors.push(
1226
- `- Edit ${i + 1} Failed: Target text not found in document:\n "${edit.target_text}"`,
1227
- );
1233
+ if (!this.original_mapper) {
1234
+ this.original_mapper = new DocumentMapper(this.doc, false, true);
1235
+ }
1236
+ const orig_matches = this.original_mapper.find_all_match_indices(edit.target_text);
1237
+ if (orig_matches.length > 0) {
1238
+ is_deleted_text = true;
1239
+ for (const [start, length] of orig_matches) {
1240
+ const spans = this.original_mapper.spans.filter(
1241
+ (s) => s.end > start && s.start < start + length,
1242
+ );
1243
+ for (const s of spans) {
1244
+ if (s.run !== null) {
1245
+ let parent = s.run._element as Node | null;
1246
+ while (parent) {
1247
+ if (parent.nodeType === 1 && (parent as Element).tagName === "w:del") {
1248
+ const auth = (parent as Element).getAttribute("w:author");
1249
+ if (auth) {
1250
+ deleted_authors.add(auth);
1251
+ }
1252
+ break;
1253
+ }
1254
+ parent = parent.parentNode;
1255
+ }
1256
+ }
1257
+ }
1258
+ }
1259
+ }
1260
+ }
1261
+
1262
+ if (matches.length === 0) {
1263
+ if (is_deleted_text) {
1264
+ const author_phrase = deleted_authors.size > 0
1265
+ ? `by ${Array.from(deleted_authors).sort().join(", ")}`
1266
+ : "by an existing revision";
1267
+ errors.push(
1268
+ `- Edit ${i + 1} Failed: Target text matches text inside a tracked deletion ${author_phrase}. Reject/accept that change first or target the active replacement text instead.`,
1269
+ );
1270
+ } else {
1271
+ errors.push(
1272
+ `- Edit ${i + 1} Failed: Target text not found in document:\n "${edit.target_text}"`,
1273
+ );
1274
+ }
1228
1275
  } else if (matches.length > 1) {
1229
1276
  const positions: [number, number][] = matches.map(([start, length]) => [
1230
1277
  start,
package/src/index.ts CHANGED
@@ -9,5 +9,5 @@ export { generate_edits_from_text, trim_common_context, create_unified_diff, cre
9
9
  export { apply_edits_to_markdown } from './markup.js';
10
10
  export { paginate, split_structural_appendix, PaginationResult, PageInfo } from './pagination.js';
11
11
  export { extract_outline, OutlineNode } from './outline.js';
12
- export { extractTextFromBuffer } from './ingest.js';
12
+ export { extractTextFromBuffer, _extractTextFromDoc } from './ingest.js';
13
13
  export { finalize_document, FinalizeOptions, FinalizeResult } from './sanitize/core.js';
package/src/ingest.ts CHANGED
@@ -11,18 +11,24 @@ import { extract_comments_data } from './comments.js';
11
11
 
12
12
  export async function extractTextFromBuffer(buffer: Buffer, cleanView = false): Promise<string> {
13
13
  const doc = await DocumentObject.load(buffer);
14
- return _extractTextFromDoc(doc, cleanView);
14
+ return _extractTextFromDoc(doc, cleanView) as string;
15
15
  }
16
16
 
17
- export function _extractTextFromDoc(doc: DocumentObject, cleanView = false, includeAppendix = true): string {
17
+ export function _extractTextFromDoc(
18
+ doc: DocumentObject,
19
+ cleanView = false,
20
+ includeAppendix = true,
21
+ return_paragraph_offsets = false,
22
+ ): string | { text: string; paragraph_offsets: Map<any, [number, number]> } {
18
23
  const comments_map = extract_comments_data(doc.pkg);
19
24
 
20
25
  const full_text: string[] = [];
26
+ const paragraph_offsets = new Map<any, [number, number]>();
21
27
  let cursor = 0;
22
28
 
23
29
  for (const part of iter_document_parts(doc)) {
24
30
  const part_cursor = full_text.length > 0 ? cursor + 2 : cursor;
25
- const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor);
31
+ const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor, return_paragraph_offsets ? paragraph_offsets : undefined);
26
32
  if (part_text) {
27
33
  if (full_text.length > 0) cursor += 2;
28
34
  full_text.push(part_text);
@@ -37,10 +43,19 @@ export function _extractTextFromDoc(doc: DocumentObject, cleanView = false, incl
37
43
  if (appendix) base_text += appendix;
38
44
  }
39
45
 
46
+ if (return_paragraph_offsets) {
47
+ return { text: base_text, paragraph_offsets };
48
+ }
40
49
  return base_text;
41
50
  }
42
51
 
43
- function _extract_blocks(container: any, comments_map: any, cleanView: boolean, cursor: number): string {
52
+ function _extract_blocks(
53
+ container: any,
54
+ comments_map: any,
55
+ cleanView: boolean,
56
+ cursor: number,
57
+ paragraph_offsets?: Map<any, [number, number]>
58
+ ): string {
44
59
  const part = container.part || container;
45
60
  const [style_cache, default_pstyle] = _get_style_cache(part);
46
61
 
@@ -62,7 +77,7 @@ function _extract_blocks(container: any, comments_map: any, cleanView: boolean,
62
77
  const block_start = local_cursor;
63
78
 
64
79
  if (item.constructor.name === 'FootnoteItem') {
65
- const fn_text = _extract_blocks(item, comments_map, cleanView, block_start);
80
+ const fn_text = _extract_blocks(item, comments_map, cleanView, block_start, paragraph_offsets);
66
81
  if (fn_text) {
67
82
  blocks.push(fn_text);
68
83
  local_cursor = block_start + fn_text.length;
@@ -78,11 +93,14 @@ function _extract_blocks(container: any, comments_map: any, cleanView: boolean,
78
93
  const p_text = build_paragraph_text(item, comments_map, cleanView, style_cache, default_pstyle);
79
94
  const full_block = prefix + p_text;
80
95
  blocks.push(full_block);
96
+ if (paragraph_offsets) {
97
+ paragraph_offsets.set(item._element, [block_start, full_block.length]);
98
+ }
81
99
  local_cursor = block_start + full_block.length;
82
100
  is_first_para = false;
83
101
  is_first_block = false;
84
102
  } else if (item instanceof Table) {
85
- const table_text = extract_table(item, comments_map, cleanView, block_start);
103
+ const table_text = extract_table(item, comments_map, cleanView, block_start, paragraph_offsets);
86
104
  if (table_text) {
87
105
  blocks.push(table_text);
88
106
  local_cursor = block_start + table_text.length;
@@ -97,7 +115,13 @@ function _extract_blocks(container: any, comments_map: any, cleanView: boolean,
97
115
  return blocks.join('\n\n');
98
116
  }
99
117
 
100
- export function extract_table(table: Table, comments_map: any, cleanView: boolean, cursor: number): string {
118
+ export function extract_table(
119
+ table: Table,
120
+ comments_map: any,
121
+ cleanView: boolean,
122
+ cursor: number,
123
+ paragraph_offsets?: Map<any, [number, number]>
124
+ ): string {
101
125
  const rows_text: string[] = [];
102
126
  let rows_processed = 0;
103
127
  let local_cursor = cursor;
@@ -124,7 +148,7 @@ export function extract_table(table: Table, comments_map: any, cleanView: boolea
124
148
 
125
149
  if (!first_cell) cell_cursor += 3;
126
150
 
127
- const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor);
151
+ const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor, paragraph_offsets);
128
152
  cell_texts.push(cell_content);
129
153
  cell_cursor += cell_content.length;
130
154
  first_cell = false;
package/src/mapper.ts CHANGED
@@ -95,15 +95,17 @@ export function renumber_snapshot_ids(doc: DocumentObject): [Record<string, stri
95
95
  export class DocumentMapper {
96
96
  public doc: DocumentObject;
97
97
  public clean_view: boolean;
98
+ public original_view: boolean;
98
99
  public comments_map: Record<string, any>;
99
100
  public full_text: string = '';
100
101
  public spans: TextSpan[] = [];
101
102
  public appendix_start_index: number = -1;
102
103
  private _text_chunks: string[] = [];
103
104
 
104
- constructor(doc: DocumentObject, clean_view: boolean = false) {
105
+ constructor(doc: DocumentObject, clean_view: boolean = false, original_view: boolean = false) {
105
106
  this.doc = doc;
106
107
  this.clean_view = clean_view;
108
+ this.original_view = original_view;
107
109
  this.comments_map = extract_comments_data(doc.pkg);
108
110
  this._build_map();
109
111
  }
@@ -200,16 +202,17 @@ export class DocumentMapper {
200
202
  const del_node = trPr ? findChild(trPr, 'w:del') : null;
201
203
 
202
204
  if (this.clean_view && del_node) continue;
205
+ if (this.original_view && ins) continue;
203
206
 
204
207
  if (rows_processed > 0) {
205
208
  this._add_virtual_text('\n', current, null);
206
209
  current += 1;
207
210
  }
208
211
 
209
- if (ins && !this.clean_view) {
212
+ if (ins && !this.clean_view && !this.original_view) {
210
213
  this._add_virtual_text('{++ ', current, null);
211
214
  current += 4;
212
- } else if (del_node && !this.clean_view) {
215
+ } else if (del_node && !this.clean_view && !this.original_view) {
213
216
  this._add_virtual_text('{-- ', current, null);
214
217
  current += 4;
215
218
  }
@@ -230,11 +233,11 @@ export class DocumentMapper {
230
233
  cells_processed += 1;
231
234
  }
232
235
 
233
- if (ins && !this.clean_view) {
236
+ if (ins && !this.clean_view && !this.original_view) {
234
237
  const suffix = ` |Chg:${ins.getAttribute('w:id')}++}`;
235
238
  this._add_virtual_text(suffix, current, null);
236
239
  current += suffix.length;
237
- } else if (del_node && !this.clean_view) {
240
+ } else if (del_node && !this.clean_view && !this.original_view) {
238
241
  const suffix = ` |Chg:${del_node.getAttribute('w:id')}--}`;
239
242
  this._add_virtual_text(suffix, current, null);
240
243
  current += suffix.length;
@@ -343,13 +346,16 @@ export class DocumentMapper {
343
346
  if (this.clean_view && Object.keys(active_del).length > 0) {
344
347
  // pass
345
348
  }
349
+ if (this.original_view && Object.keys(active_ins).length > 0) {
350
+ // pass
351
+ }
346
352
 
347
353
  const full_seg_text = run_parts.map(x => x[1]).join('');
348
354
  const curr_ins_id = Object.keys(active_ins).pop() || null;
349
355
  const curr_del_id = Object.keys(active_del).pop() || null;
350
356
 
351
- if (full_seg_text && !(this.clean_view && curr_del_id)) {
352
- const new_wrappers = this.clean_view ? ['', ''] as [string, string] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
357
+ if (full_seg_text && !(this.clean_view && curr_del_id) && !(this.original_view && curr_ins_id)) {
358
+ const new_wrappers = (this.clean_view || this.original_view) ? ['', ''] as [string, string] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
353
359
  const new_style: [string, string] = [prefix, suffix];
354
360
 
355
361
  if (pending_runs.length > 0 && new_wrappers[0] === current_wrappers[0] && new_wrappers[1] === current_wrappers[1]) {
@@ -379,7 +385,7 @@ export class DocumentMapper {
379
385
  }
380
386
  }
381
387
 
382
- if (!this.clean_view) {
388
+ if (!this.clean_view && !this.original_view) {
383
389
  const has_meta = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || active_ids.size > 0 || Object.keys(active_fmt).length > 0;
384
390
  if (has_meta) {
385
391
  deferred_meta_states.push([{...active_ins}, {...active_del}, new Set(active_ids), {...active_fmt}]);
package/src/outline.ts CHANGED
@@ -40,12 +40,16 @@ export function extract_outline(
40
40
  projected_body: string,
41
41
  body_pages: string[],
42
42
  body_page_offsets: number[],
43
- paragraph_offsets: Record<string, [number, number]> | null = null,
43
+ paragraph_offsets: Record<string, [number, number]> | Map<any, [number, number]> | null = null,
44
44
  ): OutlineNode[] {
45
45
  if (body_pages.length !== body_page_offsets.length) {
46
46
  throw new Error("body_pages and body_page_offsets length mismatch");
47
47
  }
48
48
 
49
+ if (paragraph_offsets) {
50
+ return _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets);
51
+ }
52
+
49
53
  const comments_map = extract_comments_data(doc.pkg);
50
54
  const block_records = _walk_doc_body(doc, comments_map);
51
55
 
@@ -397,6 +401,7 @@ function _determine_heading_style(paragraph: Paragraph): string {
397
401
  }
398
402
 
399
403
  let outline_level: number | null = null;
404
+ let outline_level_from_style = false;
400
405
  if (pPr) {
401
406
  const oLvl = findChild(pPr, "w:outlineLvl");
402
407
  if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
@@ -406,6 +411,7 @@ function _determine_heading_style(paragraph: Paragraph): string {
406
411
 
407
412
  if (outline_level === null && style_id && style_cache && style_cache[style_id]) {
408
413
  outline_level = style_cache[style_id].outline_level;
414
+ outline_level_from_style = true;
409
415
  }
410
416
 
411
417
  const style_name =
@@ -422,6 +428,15 @@ function _determine_heading_style(paragraph: Paragraph): string {
422
428
  }
423
429
  }
424
430
 
431
+ if (outline_level_from_style && outline_level !== null) {
432
+ const is_heading_or_title =
433
+ normalized_style_name &&
434
+ (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title");
435
+ if (!is_heading_or_title) {
436
+ outline_level = null;
437
+ }
438
+ }
439
+
425
440
  if (outline_level !== null && outline_level >= 0 && outline_level <= 8) {
426
441
  if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")) {
427
442
  return normalized_style_name;
@@ -505,3 +520,183 @@ function _offset_to_page(offset: number, body_page_offsets: number[]): number {
505
520
  }
506
521
  return page;
507
522
  }
523
+
524
+ function _extract_outline_fast(
525
+ doc: DocumentObject,
526
+ projected_body: string,
527
+ body_page_offsets: number[],
528
+ paragraph_offsets: Map<any, [number, number]> | Record<string, [number, number]>,
529
+ ): OutlineNode[] {
530
+ const paragraphs_and_tables: ["p" | "t", any][] = [];
531
+ const seen_cells = new Set<any>();
532
+
533
+ function walk(container: any) {
534
+ for (const item of iter_block_items(container)) {
535
+ const i_type = item.constructor.name;
536
+ if (i_type === "FootnoteItem") {
537
+ walk(item);
538
+ } else if (item instanceof Paragraph) {
539
+ paragraphs_and_tables.push(["p", item]);
540
+ } else if (item instanceof Table) {
541
+ paragraphs_and_tables.push(["t", item]);
542
+ for (const row of item.rows) {
543
+ for (const cell of row.cells) {
544
+ if (seen_cells.has(cell._element)) {
545
+ continue;
546
+ }
547
+ seen_cells.add(cell._element);
548
+ walk(cell);
549
+ }
550
+ }
551
+ }
552
+ }
553
+ }
554
+
555
+ walk(doc);
556
+
557
+ const heading_indices: number[] = [];
558
+ for (let idx = 0; idx < paragraphs_and_tables.length; idx++) {
559
+ const [kind, item] = paragraphs_and_tables[idx];
560
+ if (kind !== "p") continue;
561
+
562
+ let hasOffset = false;
563
+ if (paragraph_offsets instanceof Map) {
564
+ hasOffset = paragraph_offsets.has(item._element);
565
+ } else {
566
+ hasOffset = item._element in (paragraph_offsets as any);
567
+ }
568
+ if (!hasOffset) {
569
+ continue;
570
+ }
571
+
572
+ if (!_is_heading(item)) continue;
573
+ if (!_heading_passes_quality_filter_fast(item, projected_body, paragraph_offsets)) continue;
574
+
575
+ heading_indices.push(idx);
576
+ }
577
+
578
+ if (heading_indices.length === 0) return [];
579
+
580
+ const nodes: OutlineNode[] = [];
581
+ for (let h_pos = 0; h_pos < heading_indices.length; h_pos++) {
582
+ const item_idx = heading_indices[h_pos];
583
+ const paragraph = paragraphs_and_tables[item_idx][1] as Paragraph;
584
+ const level = _heading_level(paragraph);
585
+ const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
586
+ const style = _determine_heading_style(paragraph);
587
+
588
+ // Owned range: items strictly between this heading and the next equal-or-higher heading.
589
+ let owned_end = item_idx;
590
+ for (let next_h_pos = h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
591
+ const next_idx = heading_indices[next_h_pos];
592
+ const next_paragraph = paragraphs_and_tables[next_idx][1] as Paragraph;
593
+ if (_heading_level(next_paragraph) <= level) {
594
+ owned_end = next_idx;
595
+ break;
596
+ }
597
+ }
598
+ if (owned_end === item_idx) {
599
+ owned_end = paragraphs_and_tables.length;
600
+ }
601
+
602
+ const owned = paragraphs_and_tables.slice(item_idx + 1, owned_end);
603
+
604
+ // has_table: nearest-claim semantics (no bubbling to ancestors).
605
+ let has_table = false;
606
+ for (const [kind2, item2] of owned) {
607
+ if (kind2 === "p" && _is_heading(item2)) {
608
+ break;
609
+ }
610
+ if (kind2 === "t") {
611
+ has_table = true;
612
+ break;
613
+ }
614
+ }
615
+
616
+ // Footnote IDs in document order, deduped.
617
+ const footnote_ids = _collect_footnote_ids_fast(owned);
618
+
619
+ // Page resolution from the paragraph's known offset.
620
+ let para_offset: [number, number] | undefined;
621
+ if (paragraph_offsets instanceof Map) {
622
+ para_offset = paragraph_offsets.get(paragraph._element);
623
+ } else {
624
+ para_offset = paragraph_offsets[paragraph._element as any];
625
+ }
626
+
627
+ let page_num = 1;
628
+ if (para_offset !== undefined) {
629
+ const [start_offset] = para_offset;
630
+ page_num = _offset_to_page(start_offset, body_page_offsets);
631
+ }
632
+
633
+ nodes.push({
634
+ level,
635
+ text,
636
+ page: page_num,
637
+ style,
638
+ has_table,
639
+ footnote_ids,
640
+ });
641
+ }
642
+
643
+ return nodes;
644
+ }
645
+
646
+ function _heading_passes_quality_filter_fast(
647
+ paragraph: Paragraph,
648
+ projected_body: string,
649
+ paragraph_offsets: Map<any, [number, number]> | Record<string, [number, number]>,
650
+ ): boolean {
651
+ const style = _determine_heading_style(paragraph);
652
+ if (style !== "(heuristic)") return true;
653
+
654
+ const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
655
+ if (!text) return false;
656
+ const words = text.match(/\w+/g) || [];
657
+ return words.length >= _HEURISTIC_MIN_WORDS;
658
+ }
659
+
660
+ function _heading_text_fast(
661
+ paragraph: Paragraph,
662
+ projected_body: string,
663
+ paragraph_offsets: Map<any, [number, number]> | Record<string, [number, number]>,
664
+ ): string {
665
+ let offset: [number, number] | undefined;
666
+ if (paragraph_offsets instanceof Map) {
667
+ offset = paragraph_offsets.get(paragraph._element);
668
+ } else {
669
+ offset = paragraph_offsets[paragraph._element as any];
670
+ }
671
+
672
+ if (offset === undefined) {
673
+ return "";
674
+ }
675
+ const [start, length] = offset;
676
+ const raw = projected_body.substring(start, start + length);
677
+ let cleaned = _strip_critic_markup(raw);
678
+ cleaned = _strip_inline_formatting(cleaned);
679
+ cleaned = cleaned.replace(/^#+\s+/, "");
680
+ return cleaned.trim();
681
+ }
682
+
683
+ function _collect_footnote_ids_fast(owned_items: ["p" | "t", any][]): string[] {
684
+ const seen = new Set<string>();
685
+ const ordered: string[] = [];
686
+ for (const [kind, item] of owned_items) {
687
+ if (kind !== "p") continue;
688
+ for (const event of iter_paragraph_content(item)) {
689
+ if (!("type" in event)) continue;
690
+ let fn_id = "";
691
+ if (event.type === "footnote") fn_id = `fn-${event.id}`;
692
+ else if (event.type === "endnote") fn_id = `en-${event.id}`;
693
+ else continue;
694
+
695
+ if (!seen.has(fn_id)) {
696
+ seen.add(fn_id);
697
+ ordered.push(fn_id);
698
+ }
699
+ }
700
+ }
701
+ return ordered;
702
+ }
@@ -0,0 +1,98 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { createTestDocument } from './test-utils.js';
3
+ import { RedlineEngine } from './engine.js';
4
+ import { DocumentMapper } from './mapper.js';
5
+ import { extract_outline } from './outline.js';
6
+ import { _extractTextFromDoc } from './ingest.js';
7
+ import { paginate } from './pagination.js';
8
+
9
+ describe('Parity Gaps (TDD)', () => {
10
+ it('GAP 2: original_view maps deleted text and validate_edits throws actionable deletion error', async () => {
11
+ const doc = await createTestDocument();
12
+ const xmlDoc = doc.element.ownerDocument!;
13
+
14
+ // Create a paragraph with a tracked deletion: <w:p><w:del w:id="1" w:author="Test Negotiator"><w:r><w:t>Deleted sentence.</w:t></w:r></w:del></w:p>
15
+ const p = xmlDoc.createElement('w:p');
16
+ const del = xmlDoc.createElement('w:del');
17
+ del.setAttribute('w:id', '1');
18
+ del.setAttribute('w:author', 'Test Negotiator');
19
+
20
+ const r = xmlDoc.createElement('w:r');
21
+ const t = xmlDoc.createElement('w:t');
22
+ t.textContent = 'Deleted sentence.';
23
+
24
+ r.appendChild(t);
25
+ del.appendChild(r);
26
+ p.appendChild(del);
27
+ doc.element.appendChild(p);
28
+
29
+ // 1. Verify original_view mapping
30
+ const mapperOrig = new DocumentMapper(doc, false, true);
31
+ expect(mapperOrig.full_text).toContain('Deleted sentence.');
32
+
33
+ const mapperRaw = new DocumentMapper(doc, false, false);
34
+ expect(mapperRaw.full_text).toContain('{--Deleted sentence.--}');
35
+
36
+ // 2. Validate modification targetting deleted text
37
+ const engine = new RedlineEngine(doc);
38
+ const errors = engine.validate_edits([
39
+ {
40
+ target_text: 'Deleted sentence.',
41
+ new_text: 'Active replacement text.',
42
+ }
43
+ ]);
44
+
45
+ expect(errors.length).toBe(1);
46
+ expect(errors[0]).toContain('Target text matches text inside a tracked deletion by Test Negotiator.');
47
+ expect(errors[0]).toContain('Reject/accept that change first or target the active replacement text instead.');
48
+ });
49
+
50
+ it('GAP 1: heading inside a deleted region is filtered out when using paragraph_offsets', async () => {
51
+ const doc = await createTestDocument();
52
+ const xmlDoc = doc.element.ownerDocument!;
53
+
54
+ const p1 = xmlDoc.createElement('w:p');
55
+ const p1Pr = xmlDoc.createElement('w:pPr');
56
+ const p1Style = xmlDoc.createElement('w:pStyle');
57
+ p1Style.setAttribute('w:val', 'Heading1');
58
+ p1Pr.appendChild(p1Style);
59
+ p1.appendChild(p1Pr);
60
+ const r1 = xmlDoc.createElement('w:r');
61
+ const t1 = xmlDoc.createElement('w:t');
62
+ t1.textContent = 'Active Heading';
63
+ r1.appendChild(t1);
64
+ p1.appendChild(r1);
65
+ doc.element.appendChild(p1);
66
+
67
+ const p2 = xmlDoc.createElement('w:p');
68
+ const p2Pr = xmlDoc.createElement('w:pPr');
69
+ const p2Style = xmlDoc.createElement('w:pStyle');
70
+ p2Style.setAttribute('w:val', 'Heading1');
71
+ p2Pr.appendChild(p2Style);
72
+ p2.appendChild(p2Pr);
73
+ const r2 = xmlDoc.createElement('w:r');
74
+ const t2 = xmlDoc.createElement('w:t');
75
+ t2.textContent = 'Deleted Heading';
76
+ r2.appendChild(t2);
77
+ p2.appendChild(r2);
78
+ doc.element.appendChild(p2);
79
+
80
+ const extract_res = _extractTextFromDoc(doc, false, false, true) as { text: string; paragraph_offsets: Map<any, [number, number]> };
81
+
82
+ // Simulate deletion/skipping of p2 during projection
83
+ extract_res.paragraph_offsets.delete(p2);
84
+
85
+ const pages = paginate(extract_res.text, '');
86
+ const nodes = extract_outline(
87
+ doc,
88
+ extract_res.text,
89
+ pages.body_pages,
90
+ pages.body_page_offsets,
91
+ extract_res.paragraph_offsets as any,
92
+ );
93
+
94
+ // Only Active Heading should be in the outline, Deleted Heading must be skipped because it is not in paragraph_offsets!
95
+ expect(nodes.length).toBe(1);
96
+ expect(nodes[0].text).toBe('Active Heading');
97
+ });
98
+ });