@adeu/core 1.10.0 → 1.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/mapper.ts CHANGED
@@ -95,15 +95,17 @@ export function renumber_snapshot_ids(doc: DocumentObject): [Record<string, stri
95
95
  export class DocumentMapper {
96
96
  public doc: DocumentObject;
97
97
  public clean_view: boolean;
98
+ public original_view: boolean;
98
99
  public comments_map: Record<string, any>;
99
100
  public full_text: string = '';
100
101
  public spans: TextSpan[] = [];
101
102
  public appendix_start_index: number = -1;
102
103
  private _text_chunks: string[] = [];
103
104
 
104
- constructor(doc: DocumentObject, clean_view: boolean = false) {
105
+ constructor(doc: DocumentObject, clean_view: boolean = false, original_view: boolean = false) {
105
106
  this.doc = doc;
106
107
  this.clean_view = clean_view;
108
+ this.original_view = original_view;
107
109
  this.comments_map = extract_comments_data(doc.pkg);
108
110
  this._build_map();
109
111
  }
@@ -200,16 +202,17 @@ export class DocumentMapper {
200
202
  const del_node = trPr ? findChild(trPr, 'w:del') : null;
201
203
 
202
204
  if (this.clean_view && del_node) continue;
205
+ if (this.original_view && ins) continue;
203
206
 
204
207
  if (rows_processed > 0) {
205
208
  this._add_virtual_text('\n', current, null);
206
209
  current += 1;
207
210
  }
208
211
 
209
- if (ins && !this.clean_view) {
212
+ if (ins && !this.clean_view && !this.original_view) {
210
213
  this._add_virtual_text('{++ ', current, null);
211
214
  current += 4;
212
- } else if (del_node && !this.clean_view) {
215
+ } else if (del_node && !this.clean_view && !this.original_view) {
213
216
  this._add_virtual_text('{-- ', current, null);
214
217
  current += 4;
215
218
  }
@@ -230,11 +233,11 @@ export class DocumentMapper {
230
233
  cells_processed += 1;
231
234
  }
232
235
 
233
- if (ins && !this.clean_view) {
236
+ if (ins && !this.clean_view && !this.original_view) {
234
237
  const suffix = ` |Chg:${ins.getAttribute('w:id')}++}`;
235
238
  this._add_virtual_text(suffix, current, null);
236
239
  current += suffix.length;
237
- } else if (del_node && !this.clean_view) {
240
+ } else if (del_node && !this.clean_view && !this.original_view) {
238
241
  const suffix = ` |Chg:${del_node.getAttribute('w:id')}--}`;
239
242
  this._add_virtual_text(suffix, current, null);
240
243
  current += suffix.length;
@@ -343,13 +346,16 @@ export class DocumentMapper {
343
346
  if (this.clean_view && Object.keys(active_del).length > 0) {
344
347
  // pass
345
348
  }
349
+ if (this.original_view && Object.keys(active_ins).length > 0) {
350
+ // pass
351
+ }
346
352
 
347
353
  const full_seg_text = run_parts.map(x => x[1]).join('');
348
354
  const curr_ins_id = Object.keys(active_ins).pop() || null;
349
355
  const curr_del_id = Object.keys(active_del).pop() || null;
350
356
 
351
- if (full_seg_text && !(this.clean_view && curr_del_id)) {
352
- const new_wrappers = this.clean_view ? ['', ''] as [string, string] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
357
+ if (full_seg_text && !(this.clean_view && curr_del_id) && !(this.original_view && curr_ins_id)) {
358
+ const new_wrappers = (this.clean_view || this.original_view) ? ['', ''] as [string, string] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
353
359
  const new_style: [string, string] = [prefix, suffix];
354
360
 
355
361
  if (pending_runs.length > 0 && new_wrappers[0] === current_wrappers[0] && new_wrappers[1] === current_wrappers[1]) {
@@ -379,7 +385,7 @@ export class DocumentMapper {
379
385
  }
380
386
  }
381
387
 
382
- if (!this.clean_view) {
388
+ if (!this.clean_view && !this.original_view) {
383
389
  const has_meta = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || active_ids.size > 0 || Object.keys(active_fmt).length > 0;
384
390
  if (has_meta) {
385
391
  deferred_meta_states.push([{...active_ins}, {...active_del}, new Set(active_ids), {...active_fmt}]);
package/src/outline.ts CHANGED
@@ -40,12 +40,16 @@ export function extract_outline(
40
40
  projected_body: string,
41
41
  body_pages: string[],
42
42
  body_page_offsets: number[],
43
- paragraph_offsets: Record<string, [number, number]> | null = null,
43
+ paragraph_offsets: Record<string, [number, number]> | Map<any, [number, number]> | null = null,
44
44
  ): OutlineNode[] {
45
45
  if (body_pages.length !== body_page_offsets.length) {
46
46
  throw new Error("body_pages and body_page_offsets length mismatch");
47
47
  }
48
48
 
49
+ if (paragraph_offsets) {
50
+ return _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets);
51
+ }
52
+
49
53
  const comments_map = extract_comments_data(doc.pkg);
50
54
  const block_records = _walk_doc_body(doc, comments_map);
51
55
 
@@ -397,6 +401,7 @@ function _determine_heading_style(paragraph: Paragraph): string {
397
401
  }
398
402
 
399
403
  let outline_level: number | null = null;
404
+ let outline_level_from_style = false;
400
405
  if (pPr) {
401
406
  const oLvl = findChild(pPr, "w:outlineLvl");
402
407
  if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
@@ -406,6 +411,7 @@ function _determine_heading_style(paragraph: Paragraph): string {
406
411
 
407
412
  if (outline_level === null && style_id && style_cache && style_cache[style_id]) {
408
413
  outline_level = style_cache[style_id].outline_level;
414
+ outline_level_from_style = true;
409
415
  }
410
416
 
411
417
  const style_name =
@@ -422,6 +428,15 @@ function _determine_heading_style(paragraph: Paragraph): string {
422
428
  }
423
429
  }
424
430
 
431
+ if (outline_level_from_style && outline_level !== null) {
432
+ const is_heading_or_title =
433
+ normalized_style_name &&
434
+ (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title");
435
+ if (!is_heading_or_title) {
436
+ outline_level = null;
437
+ }
438
+ }
439
+
425
440
  if (outline_level !== null && outline_level >= 0 && outline_level <= 8) {
426
441
  if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")) {
427
442
  return normalized_style_name;
@@ -505,3 +520,183 @@ function _offset_to_page(offset: number, body_page_offsets: number[]): number {
505
520
  }
506
521
  return page;
507
522
  }
523
+
524
+ function _extract_outline_fast(
525
+ doc: DocumentObject,
526
+ projected_body: string,
527
+ body_page_offsets: number[],
528
+ paragraph_offsets: Map<any, [number, number]> | Record<string, [number, number]>,
529
+ ): OutlineNode[] {
530
+ const paragraphs_and_tables: ["p" | "t", any][] = [];
531
+ const seen_cells = new Set<any>();
532
+
533
+ function walk(container: any) {
534
+ for (const item of iter_block_items(container)) {
535
+ const i_type = item.constructor.name;
536
+ if (i_type === "FootnoteItem") {
537
+ walk(item);
538
+ } else if (item instanceof Paragraph) {
539
+ paragraphs_and_tables.push(["p", item]);
540
+ } else if (item instanceof Table) {
541
+ paragraphs_and_tables.push(["t", item]);
542
+ for (const row of item.rows) {
543
+ for (const cell of row.cells) {
544
+ if (seen_cells.has(cell._element)) {
545
+ continue;
546
+ }
547
+ seen_cells.add(cell._element);
548
+ walk(cell);
549
+ }
550
+ }
551
+ }
552
+ }
553
+ }
554
+
555
+ walk(doc);
556
+
557
+ const heading_indices: number[] = [];
558
+ for (let idx = 0; idx < paragraphs_and_tables.length; idx++) {
559
+ const [kind, item] = paragraphs_and_tables[idx];
560
+ if (kind !== "p") continue;
561
+
562
+ let hasOffset = false;
563
+ if (paragraph_offsets instanceof Map) {
564
+ hasOffset = paragraph_offsets.has(item._element);
565
+ } else {
566
+ hasOffset = item._element in (paragraph_offsets as any);
567
+ }
568
+ if (!hasOffset) {
569
+ continue;
570
+ }
571
+
572
+ if (!_is_heading(item)) continue;
573
+ if (!_heading_passes_quality_filter_fast(item, projected_body, paragraph_offsets)) continue;
574
+
575
+ heading_indices.push(idx);
576
+ }
577
+
578
+ if (heading_indices.length === 0) return [];
579
+
580
+ const nodes: OutlineNode[] = [];
581
+ for (let h_pos = 0; h_pos < heading_indices.length; h_pos++) {
582
+ const item_idx = heading_indices[h_pos];
583
+ const paragraph = paragraphs_and_tables[item_idx][1] as Paragraph;
584
+ const level = _heading_level(paragraph);
585
+ const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
586
+ const style = _determine_heading_style(paragraph);
587
+
588
+ // Owned range: items strictly between this heading and the next equal-or-higher heading.
589
+ let owned_end = item_idx;
590
+ for (let next_h_pos = h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
591
+ const next_idx = heading_indices[next_h_pos];
592
+ const next_paragraph = paragraphs_and_tables[next_idx][1] as Paragraph;
593
+ if (_heading_level(next_paragraph) <= level) {
594
+ owned_end = next_idx;
595
+ break;
596
+ }
597
+ }
598
+ if (owned_end === item_idx) {
599
+ owned_end = paragraphs_and_tables.length;
600
+ }
601
+
602
+ const owned = paragraphs_and_tables.slice(item_idx + 1, owned_end);
603
+
604
+ // has_table: nearest-claim semantics (no bubbling to ancestors).
605
+ let has_table = false;
606
+ for (const [kind2, item2] of owned) {
607
+ if (kind2 === "p" && _is_heading(item2)) {
608
+ break;
609
+ }
610
+ if (kind2 === "t") {
611
+ has_table = true;
612
+ break;
613
+ }
614
+ }
615
+
616
+ // Footnote IDs in document order, deduped.
617
+ const footnote_ids = _collect_footnote_ids_fast(owned);
618
+
619
+ // Page resolution from the paragraph's known offset.
620
+ let para_offset: [number, number] | undefined;
621
+ if (paragraph_offsets instanceof Map) {
622
+ para_offset = paragraph_offsets.get(paragraph._element);
623
+ } else {
624
+ para_offset = paragraph_offsets[paragraph._element as any];
625
+ }
626
+
627
+ let page_num = 1;
628
+ if (para_offset !== undefined) {
629
+ const [start_offset] = para_offset;
630
+ page_num = _offset_to_page(start_offset, body_page_offsets);
631
+ }
632
+
633
+ nodes.push({
634
+ level,
635
+ text,
636
+ page: page_num,
637
+ style,
638
+ has_table,
639
+ footnote_ids,
640
+ });
641
+ }
642
+
643
+ return nodes;
644
+ }
645
+
646
+ function _heading_passes_quality_filter_fast(
647
+ paragraph: Paragraph,
648
+ projected_body: string,
649
+ paragraph_offsets: Map<any, [number, number]> | Record<string, [number, number]>,
650
+ ): boolean {
651
+ const style = _determine_heading_style(paragraph);
652
+ if (style !== "(heuristic)") return true;
653
+
654
+ const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
655
+ if (!text) return false;
656
+ const words = text.match(/\w+/g) || [];
657
+ return words.length >= _HEURISTIC_MIN_WORDS;
658
+ }
659
+
660
+ function _heading_text_fast(
661
+ paragraph: Paragraph,
662
+ projected_body: string,
663
+ paragraph_offsets: Map<any, [number, number]> | Record<string, [number, number]>,
664
+ ): string {
665
+ let offset: [number, number] | undefined;
666
+ if (paragraph_offsets instanceof Map) {
667
+ offset = paragraph_offsets.get(paragraph._element);
668
+ } else {
669
+ offset = paragraph_offsets[paragraph._element as any];
670
+ }
671
+
672
+ if (offset === undefined) {
673
+ return "";
674
+ }
675
+ const [start, length] = offset;
676
+ const raw = projected_body.substring(start, start + length);
677
+ let cleaned = _strip_critic_markup(raw);
678
+ cleaned = _strip_inline_formatting(cleaned);
679
+ cleaned = cleaned.replace(/^#+\s+/, "");
680
+ return cleaned.trim();
681
+ }
682
+
683
+ function _collect_footnote_ids_fast(owned_items: ["p" | "t", any][]): string[] {
684
+ const seen = new Set<string>();
685
+ const ordered: string[] = [];
686
+ for (const [kind, item] of owned_items) {
687
+ if (kind !== "p") continue;
688
+ for (const event of iter_paragraph_content(item)) {
689
+ if (!("type" in event)) continue;
690
+ let fn_id = "";
691
+ if (event.type === "footnote") fn_id = `fn-${event.id}`;
692
+ else if (event.type === "endnote") fn_id = `en-${event.id}`;
693
+ else continue;
694
+
695
+ if (!seen.has(fn_id)) {
696
+ seen.add(fn_id);
697
+ ordered.push(fn_id);
698
+ }
699
+ }
700
+ }
701
+ return ordered;
702
+ }
@@ -0,0 +1,98 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { createTestDocument } from './test-utils.js';
3
+ import { RedlineEngine } from './engine.js';
4
+ import { DocumentMapper } from './mapper.js';
5
+ import { extract_outline } from './outline.js';
6
+ import { _extractTextFromDoc } from './ingest.js';
7
+ import { paginate } from './pagination.js';
8
+
9
+ describe('Parity Gaps (TDD)', () => {
10
+ it('GAP 2: original_view maps deleted text and validate_edits throws actionable deletion error', async () => {
11
+ const doc = await createTestDocument();
12
+ const xmlDoc = doc.element.ownerDocument!;
13
+
14
+ // Create a paragraph with a tracked deletion: <w:p><w:del w:id="1" w:author="Test Negotiator"><w:r><w:t>Deleted sentence.</w:t></w:r></w:del></w:p>
15
+ const p = xmlDoc.createElement('w:p');
16
+ const del = xmlDoc.createElement('w:del');
17
+ del.setAttribute('w:id', '1');
18
+ del.setAttribute('w:author', 'Test Negotiator');
19
+
20
+ const r = xmlDoc.createElement('w:r');
21
+ const t = xmlDoc.createElement('w:t');
22
+ t.textContent = 'Deleted sentence.';
23
+
24
+ r.appendChild(t);
25
+ del.appendChild(r);
26
+ p.appendChild(del);
27
+ doc.element.appendChild(p);
28
+
29
+ // 1. Verify original_view mapping
30
+ const mapperOrig = new DocumentMapper(doc, false, true);
31
+ expect(mapperOrig.full_text).toContain('Deleted sentence.');
32
+
33
+ const mapperRaw = new DocumentMapper(doc, false, false);
34
+ expect(mapperRaw.full_text).toContain('{--Deleted sentence.--}');
35
+
36
+ // 2. Validate modification targetting deleted text
37
+ const engine = new RedlineEngine(doc);
38
+ const errors = engine.validate_edits([
39
+ {
40
+ target_text: 'Deleted sentence.',
41
+ new_text: 'Active replacement text.',
42
+ }
43
+ ]);
44
+
45
+ expect(errors.length).toBe(1);
46
+ expect(errors[0]).toContain('Target text matches text inside a tracked deletion by Test Negotiator.');
47
+ expect(errors[0]).toContain('Reject/accept that change first or target the active replacement text instead.');
48
+ });
49
+
50
+ it('GAP 1: heading inside a deleted region is filtered out when using paragraph_offsets', async () => {
51
+ const doc = await createTestDocument();
52
+ const xmlDoc = doc.element.ownerDocument!;
53
+
54
+ const p1 = xmlDoc.createElement('w:p');
55
+ const p1Pr = xmlDoc.createElement('w:pPr');
56
+ const p1Style = xmlDoc.createElement('w:pStyle');
57
+ p1Style.setAttribute('w:val', 'Heading1');
58
+ p1Pr.appendChild(p1Style);
59
+ p1.appendChild(p1Pr);
60
+ const r1 = xmlDoc.createElement('w:r');
61
+ const t1 = xmlDoc.createElement('w:t');
62
+ t1.textContent = 'Active Heading';
63
+ r1.appendChild(t1);
64
+ p1.appendChild(r1);
65
+ doc.element.appendChild(p1);
66
+
67
+ const p2 = xmlDoc.createElement('w:p');
68
+ const p2Pr = xmlDoc.createElement('w:pPr');
69
+ const p2Style = xmlDoc.createElement('w:pStyle');
70
+ p2Style.setAttribute('w:val', 'Heading1');
71
+ p2Pr.appendChild(p2Style);
72
+ p2.appendChild(p2Pr);
73
+ const r2 = xmlDoc.createElement('w:r');
74
+ const t2 = xmlDoc.createElement('w:t');
75
+ t2.textContent = 'Deleted Heading';
76
+ r2.appendChild(t2);
77
+ p2.appendChild(r2);
78
+ doc.element.appendChild(p2);
79
+
80
+ const extract_res = _extractTextFromDoc(doc, false, false, true) as { text: string; paragraph_offsets: Map<any, [number, number]> };
81
+
82
+ // Simulate deletion/skipping of p2 during projection
83
+ extract_res.paragraph_offsets.delete(p2);
84
+
85
+ const pages = paginate(extract_res.text, '');
86
+ const nodes = extract_outline(
87
+ doc,
88
+ extract_res.text,
89
+ pages.body_pages,
90
+ pages.body_page_offsets,
91
+ extract_res.paragraph_offsets as any,
92
+ );
93
+
94
+ // Only Active Heading should be in the outline, Deleted Heading must be skipped because it is not in paragraph_offsets!
95
+ expect(nodes.length).toBe(1);
96
+ expect(nodes[0].text).toBe('Active Heading');
97
+ });
98
+ });