@adeu/core 1.10.1 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +229 -21
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +9 -3
- package/dist/index.d.ts +9 -3
- package/dist/index.js +228 -21
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/engine.bugs.test.ts +2 -2
- package/src/engine.ts +53 -6
- package/src/index.ts +1 -1
- package/src/ingest.ts +32 -8
- package/src/mapper.ts +14 -8
- package/src/outline.ts +196 -1
- package/src/parity_gaps.test.ts +98 -0
package/package.json
CHANGED
package/src/engine.bugs.test.ts
CHANGED
|
@@ -251,14 +251,14 @@ describe("Resolved Bugs Core Engine Verification", () => {
|
|
|
251
251
|
const p = addParagraph(doc, "Short heading");
|
|
252
252
|
|
|
253
253
|
const fakeCache = {
|
|
254
|
-
|
|
254
|
+
Heading3: { name: "Heading 3", outline_level: 2, bold: true },
|
|
255
255
|
};
|
|
256
256
|
(doc.pkg as any)._adeu_style_cache = [fakeCache, "Normal"];
|
|
257
257
|
|
|
258
258
|
const docEl = p.ownerDocument!;
|
|
259
259
|
const pPr = docEl.createElement("w:pPr");
|
|
260
260
|
const pStyle = docEl.createElement("w:pStyle");
|
|
261
|
-
pStyle.setAttribute("w:val", "
|
|
261
|
+
pStyle.setAttribute("w:val", "Heading3");
|
|
262
262
|
pPr.appendChild(pStyle);
|
|
263
263
|
p.insertBefore(pPr, p.firstChild);
|
|
264
264
|
|
package/src/engine.ts
CHANGED
|
@@ -221,6 +221,7 @@ export class RedlineEngine {
|
|
|
221
221
|
public mapper: DocumentMapper;
|
|
222
222
|
public comments_manager: CommentsManager;
|
|
223
223
|
public clean_mapper: DocumentMapper | null = null;
|
|
224
|
+
public original_mapper: DocumentMapper | null = null;
|
|
224
225
|
public skipped_details: string[] = [];
|
|
225
226
|
|
|
226
227
|
constructor(doc: DocumentObject, author: string = "Adeu AI (TS)") {
|
|
@@ -1195,12 +1196,16 @@ export class RedlineEngine {
|
|
|
1195
1196
|
|
|
1196
1197
|
let matches = this.mapper.find_all_match_indices(edit.target_text);
|
|
1197
1198
|
let activeText = this.mapper.full_text;
|
|
1199
|
+
let target_mapper = this.mapper;
|
|
1198
1200
|
|
|
1199
1201
|
if (matches.length === 0) {
|
|
1200
1202
|
if (!this.clean_mapper)
|
|
1201
1203
|
this.clean_mapper = new DocumentMapper(this.doc, true);
|
|
1202
1204
|
matches = this.clean_mapper.find_all_match_indices(edit.target_text);
|
|
1203
|
-
if (matches.length > 0)
|
|
1205
|
+
if (matches.length > 0) {
|
|
1206
|
+
activeText = this.clean_mapper.full_text;
|
|
1207
|
+
target_mapper = this.clean_mapper;
|
|
1208
|
+
}
|
|
1204
1209
|
}
|
|
1205
1210
|
|
|
1206
1211
|
// BUG-23-5: a copy of the target that lives entirely inside a tracked
|
|
@@ -1208,7 +1213,7 @@ export class RedlineEngine {
|
|
|
1208
1213
|
// count toward ambiguity. Drop matches whose overlapping real text is
|
|
1209
1214
|
// exclusively deleted. Only applies to the raw mapper (the clean mapper
|
|
1210
1215
|
// already omits deleted text).
|
|
1211
|
-
if (activeText === this.mapper.full_text && matches.length >
|
|
1216
|
+
if (activeText === this.mapper.full_text && matches.length > 0) {
|
|
1212
1217
|
const liveMatches = matches.filter(([start, length]) => {
|
|
1213
1218
|
const realSpans = this.mapper.spans.filter(
|
|
1214
1219
|
(s) => s.run !== null && s.end > start && s.start < start + length,
|
|
@@ -1218,13 +1223,55 @@ export class RedlineEngine {
|
|
|
1218
1223
|
// part of a tracked deletion).
|
|
1219
1224
|
return realSpans.some((s) => !s.del_id);
|
|
1220
1225
|
});
|
|
1221
|
-
|
|
1226
|
+
matches = liveMatches;
|
|
1222
1227
|
}
|
|
1223
1228
|
|
|
1229
|
+
let is_deleted_text = false;
|
|
1230
|
+
const deleted_authors = new Set<string>();
|
|
1231
|
+
|
|
1224
1232
|
if (matches.length === 0) {
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1233
|
+
if (!this.original_mapper) {
|
|
1234
|
+
this.original_mapper = new DocumentMapper(this.doc, false, true);
|
|
1235
|
+
}
|
|
1236
|
+
const orig_matches = this.original_mapper.find_all_match_indices(edit.target_text);
|
|
1237
|
+
if (orig_matches.length > 0) {
|
|
1238
|
+
is_deleted_text = true;
|
|
1239
|
+
for (const [start, length] of orig_matches) {
|
|
1240
|
+
const spans = this.original_mapper.spans.filter(
|
|
1241
|
+
(s) => s.end > start && s.start < start + length,
|
|
1242
|
+
);
|
|
1243
|
+
for (const s of spans) {
|
|
1244
|
+
if (s.run !== null) {
|
|
1245
|
+
let parent = s.run._element as Node | null;
|
|
1246
|
+
while (parent) {
|
|
1247
|
+
if (parent.nodeType === 1 && (parent as Element).tagName === "w:del") {
|
|
1248
|
+
const auth = (parent as Element).getAttribute("w:author");
|
|
1249
|
+
if (auth) {
|
|
1250
|
+
deleted_authors.add(auth);
|
|
1251
|
+
}
|
|
1252
|
+
break;
|
|
1253
|
+
}
|
|
1254
|
+
parent = parent.parentNode;
|
|
1255
|
+
}
|
|
1256
|
+
}
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
}
|
|
1261
|
+
|
|
1262
|
+
if (matches.length === 0) {
|
|
1263
|
+
if (is_deleted_text) {
|
|
1264
|
+
const author_phrase = deleted_authors.size > 0
|
|
1265
|
+
? `by ${Array.from(deleted_authors).sort().join(", ")}`
|
|
1266
|
+
: "by an existing revision";
|
|
1267
|
+
errors.push(
|
|
1268
|
+
`- Edit ${i + 1} Failed: Target text matches text inside a tracked deletion ${author_phrase}. Reject/accept that change first or target the active replacement text instead.`,
|
|
1269
|
+
);
|
|
1270
|
+
} else {
|
|
1271
|
+
errors.push(
|
|
1272
|
+
`- Edit ${i + 1} Failed: Target text not found in document:\n "${edit.target_text}"`,
|
|
1273
|
+
);
|
|
1274
|
+
}
|
|
1228
1275
|
} else if (matches.length > 1) {
|
|
1229
1276
|
const positions: [number, number][] = matches.map(([start, length]) => [
|
|
1230
1277
|
start,
|
package/src/index.ts
CHANGED
|
@@ -9,5 +9,5 @@ export { generate_edits_from_text, trim_common_context, create_unified_diff, cre
|
|
|
9
9
|
export { apply_edits_to_markdown } from './markup.js';
|
|
10
10
|
export { paginate, split_structural_appendix, PaginationResult, PageInfo } from './pagination.js';
|
|
11
11
|
export { extract_outline, OutlineNode } from './outline.js';
|
|
12
|
-
export { extractTextFromBuffer } from './ingest.js';
|
|
12
|
+
export { extractTextFromBuffer, _extractTextFromDoc } from './ingest.js';
|
|
13
13
|
export { finalize_document, FinalizeOptions, FinalizeResult } from './sanitize/core.js';
|
package/src/ingest.ts
CHANGED
|
@@ -11,18 +11,24 @@ import { extract_comments_data } from './comments.js';
|
|
|
11
11
|
|
|
12
12
|
export async function extractTextFromBuffer(buffer: Buffer, cleanView = false): Promise<string> {
|
|
13
13
|
const doc = await DocumentObject.load(buffer);
|
|
14
|
-
return _extractTextFromDoc(doc, cleanView);
|
|
14
|
+
return _extractTextFromDoc(doc, cleanView) as string;
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
export function _extractTextFromDoc(
|
|
17
|
+
export function _extractTextFromDoc(
|
|
18
|
+
doc: DocumentObject,
|
|
19
|
+
cleanView = false,
|
|
20
|
+
includeAppendix = true,
|
|
21
|
+
return_paragraph_offsets = false,
|
|
22
|
+
): string | { text: string; paragraph_offsets: Map<any, [number, number]> } {
|
|
18
23
|
const comments_map = extract_comments_data(doc.pkg);
|
|
19
24
|
|
|
20
25
|
const full_text: string[] = [];
|
|
26
|
+
const paragraph_offsets = new Map<any, [number, number]>();
|
|
21
27
|
let cursor = 0;
|
|
22
28
|
|
|
23
29
|
for (const part of iter_document_parts(doc)) {
|
|
24
30
|
const part_cursor = full_text.length > 0 ? cursor + 2 : cursor;
|
|
25
|
-
const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor);
|
|
31
|
+
const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor, return_paragraph_offsets ? paragraph_offsets : undefined);
|
|
26
32
|
if (part_text) {
|
|
27
33
|
if (full_text.length > 0) cursor += 2;
|
|
28
34
|
full_text.push(part_text);
|
|
@@ -37,10 +43,19 @@ export function _extractTextFromDoc(doc: DocumentObject, cleanView = false, incl
|
|
|
37
43
|
if (appendix) base_text += appendix;
|
|
38
44
|
}
|
|
39
45
|
|
|
46
|
+
if (return_paragraph_offsets) {
|
|
47
|
+
return { text: base_text, paragraph_offsets };
|
|
48
|
+
}
|
|
40
49
|
return base_text;
|
|
41
50
|
}
|
|
42
51
|
|
|
43
|
-
function _extract_blocks(
|
|
52
|
+
function _extract_blocks(
|
|
53
|
+
container: any,
|
|
54
|
+
comments_map: any,
|
|
55
|
+
cleanView: boolean,
|
|
56
|
+
cursor: number,
|
|
57
|
+
paragraph_offsets?: Map<any, [number, number]>
|
|
58
|
+
): string {
|
|
44
59
|
const part = container.part || container;
|
|
45
60
|
const [style_cache, default_pstyle] = _get_style_cache(part);
|
|
46
61
|
|
|
@@ -62,7 +77,7 @@ function _extract_blocks(container: any, comments_map: any, cleanView: boolean,
|
|
|
62
77
|
const block_start = local_cursor;
|
|
63
78
|
|
|
64
79
|
if (item.constructor.name === 'FootnoteItem') {
|
|
65
|
-
const fn_text = _extract_blocks(item, comments_map, cleanView, block_start);
|
|
80
|
+
const fn_text = _extract_blocks(item, comments_map, cleanView, block_start, paragraph_offsets);
|
|
66
81
|
if (fn_text) {
|
|
67
82
|
blocks.push(fn_text);
|
|
68
83
|
local_cursor = block_start + fn_text.length;
|
|
@@ -78,11 +93,14 @@ function _extract_blocks(container: any, comments_map: any, cleanView: boolean,
|
|
|
78
93
|
const p_text = build_paragraph_text(item, comments_map, cleanView, style_cache, default_pstyle);
|
|
79
94
|
const full_block = prefix + p_text;
|
|
80
95
|
blocks.push(full_block);
|
|
96
|
+
if (paragraph_offsets) {
|
|
97
|
+
paragraph_offsets.set(item._element, [block_start, full_block.length]);
|
|
98
|
+
}
|
|
81
99
|
local_cursor = block_start + full_block.length;
|
|
82
100
|
is_first_para = false;
|
|
83
101
|
is_first_block = false;
|
|
84
102
|
} else if (item instanceof Table) {
|
|
85
|
-
const table_text = extract_table(item, comments_map, cleanView, block_start);
|
|
103
|
+
const table_text = extract_table(item, comments_map, cleanView, block_start, paragraph_offsets);
|
|
86
104
|
if (table_text) {
|
|
87
105
|
blocks.push(table_text);
|
|
88
106
|
local_cursor = block_start + table_text.length;
|
|
@@ -97,7 +115,13 @@ function _extract_blocks(container: any, comments_map: any, cleanView: boolean,
|
|
|
97
115
|
return blocks.join('\n\n');
|
|
98
116
|
}
|
|
99
117
|
|
|
100
|
-
export function extract_table(
|
|
118
|
+
export function extract_table(
|
|
119
|
+
table: Table,
|
|
120
|
+
comments_map: any,
|
|
121
|
+
cleanView: boolean,
|
|
122
|
+
cursor: number,
|
|
123
|
+
paragraph_offsets?: Map<any, [number, number]>
|
|
124
|
+
): string {
|
|
101
125
|
const rows_text: string[] = [];
|
|
102
126
|
let rows_processed = 0;
|
|
103
127
|
let local_cursor = cursor;
|
|
@@ -124,7 +148,7 @@ export function extract_table(table: Table, comments_map: any, cleanView: boolea
|
|
|
124
148
|
|
|
125
149
|
if (!first_cell) cell_cursor += 3;
|
|
126
150
|
|
|
127
|
-
const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor);
|
|
151
|
+
const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor, paragraph_offsets);
|
|
128
152
|
cell_texts.push(cell_content);
|
|
129
153
|
cell_cursor += cell_content.length;
|
|
130
154
|
first_cell = false;
|
package/src/mapper.ts
CHANGED
|
@@ -95,15 +95,17 @@ export function renumber_snapshot_ids(doc: DocumentObject): [Record<string, stri
|
|
|
95
95
|
export class DocumentMapper {
|
|
96
96
|
public doc: DocumentObject;
|
|
97
97
|
public clean_view: boolean;
|
|
98
|
+
public original_view: boolean;
|
|
98
99
|
public comments_map: Record<string, any>;
|
|
99
100
|
public full_text: string = '';
|
|
100
101
|
public spans: TextSpan[] = [];
|
|
101
102
|
public appendix_start_index: number = -1;
|
|
102
103
|
private _text_chunks: string[] = [];
|
|
103
104
|
|
|
104
|
-
constructor(doc: DocumentObject, clean_view: boolean = false) {
|
|
105
|
+
constructor(doc: DocumentObject, clean_view: boolean = false, original_view: boolean = false) {
|
|
105
106
|
this.doc = doc;
|
|
106
107
|
this.clean_view = clean_view;
|
|
108
|
+
this.original_view = original_view;
|
|
107
109
|
this.comments_map = extract_comments_data(doc.pkg);
|
|
108
110
|
this._build_map();
|
|
109
111
|
}
|
|
@@ -200,16 +202,17 @@ export class DocumentMapper {
|
|
|
200
202
|
const del_node = trPr ? findChild(trPr, 'w:del') : null;
|
|
201
203
|
|
|
202
204
|
if (this.clean_view && del_node) continue;
|
|
205
|
+
if (this.original_view && ins) continue;
|
|
203
206
|
|
|
204
207
|
if (rows_processed > 0) {
|
|
205
208
|
this._add_virtual_text('\n', current, null);
|
|
206
209
|
current += 1;
|
|
207
210
|
}
|
|
208
211
|
|
|
209
|
-
if (ins && !this.clean_view) {
|
|
212
|
+
if (ins && !this.clean_view && !this.original_view) {
|
|
210
213
|
this._add_virtual_text('{++ ', current, null);
|
|
211
214
|
current += 4;
|
|
212
|
-
} else if (del_node && !this.clean_view) {
|
|
215
|
+
} else if (del_node && !this.clean_view && !this.original_view) {
|
|
213
216
|
this._add_virtual_text('{-- ', current, null);
|
|
214
217
|
current += 4;
|
|
215
218
|
}
|
|
@@ -230,11 +233,11 @@ export class DocumentMapper {
|
|
|
230
233
|
cells_processed += 1;
|
|
231
234
|
}
|
|
232
235
|
|
|
233
|
-
if (ins && !this.clean_view) {
|
|
236
|
+
if (ins && !this.clean_view && !this.original_view) {
|
|
234
237
|
const suffix = ` |Chg:${ins.getAttribute('w:id')}++}`;
|
|
235
238
|
this._add_virtual_text(suffix, current, null);
|
|
236
239
|
current += suffix.length;
|
|
237
|
-
} else if (del_node && !this.clean_view) {
|
|
240
|
+
} else if (del_node && !this.clean_view && !this.original_view) {
|
|
238
241
|
const suffix = ` |Chg:${del_node.getAttribute('w:id')}--}`;
|
|
239
242
|
this._add_virtual_text(suffix, current, null);
|
|
240
243
|
current += suffix.length;
|
|
@@ -343,13 +346,16 @@ export class DocumentMapper {
|
|
|
343
346
|
if (this.clean_view && Object.keys(active_del).length > 0) {
|
|
344
347
|
// pass
|
|
345
348
|
}
|
|
349
|
+
if (this.original_view && Object.keys(active_ins).length > 0) {
|
|
350
|
+
// pass
|
|
351
|
+
}
|
|
346
352
|
|
|
347
353
|
const full_seg_text = run_parts.map(x => x[1]).join('');
|
|
348
354
|
const curr_ins_id = Object.keys(active_ins).pop() || null;
|
|
349
355
|
const curr_del_id = Object.keys(active_del).pop() || null;
|
|
350
356
|
|
|
351
|
-
if (full_seg_text && !(this.clean_view && curr_del_id)) {
|
|
352
|
-
const new_wrappers = this.clean_view ? ['', ''] as [string, string] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
|
|
357
|
+
if (full_seg_text && !(this.clean_view && curr_del_id) && !(this.original_view && curr_ins_id)) {
|
|
358
|
+
const new_wrappers = (this.clean_view || this.original_view) ? ['', ''] as [string, string] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
|
|
353
359
|
const new_style: [string, string] = [prefix, suffix];
|
|
354
360
|
|
|
355
361
|
if (pending_runs.length > 0 && new_wrappers[0] === current_wrappers[0] && new_wrappers[1] === current_wrappers[1]) {
|
|
@@ -379,7 +385,7 @@ export class DocumentMapper {
|
|
|
379
385
|
}
|
|
380
386
|
}
|
|
381
387
|
|
|
382
|
-
if (!this.clean_view) {
|
|
388
|
+
if (!this.clean_view && !this.original_view) {
|
|
383
389
|
const has_meta = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || active_ids.size > 0 || Object.keys(active_fmt).length > 0;
|
|
384
390
|
if (has_meta) {
|
|
385
391
|
deferred_meta_states.push([{...active_ins}, {...active_del}, new Set(active_ids), {...active_fmt}]);
|
package/src/outline.ts
CHANGED
|
@@ -40,12 +40,16 @@ export function extract_outline(
|
|
|
40
40
|
projected_body: string,
|
|
41
41
|
body_pages: string[],
|
|
42
42
|
body_page_offsets: number[],
|
|
43
|
-
paragraph_offsets: Record<string, [number, number]> | null = null,
|
|
43
|
+
paragraph_offsets: Record<string, [number, number]> | Map<any, [number, number]> | null = null,
|
|
44
44
|
): OutlineNode[] {
|
|
45
45
|
if (body_pages.length !== body_page_offsets.length) {
|
|
46
46
|
throw new Error("body_pages and body_page_offsets length mismatch");
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
+
if (paragraph_offsets) {
|
|
50
|
+
return _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets);
|
|
51
|
+
}
|
|
52
|
+
|
|
49
53
|
const comments_map = extract_comments_data(doc.pkg);
|
|
50
54
|
const block_records = _walk_doc_body(doc, comments_map);
|
|
51
55
|
|
|
@@ -397,6 +401,7 @@ function _determine_heading_style(paragraph: Paragraph): string {
|
|
|
397
401
|
}
|
|
398
402
|
|
|
399
403
|
let outline_level: number | null = null;
|
|
404
|
+
let outline_level_from_style = false;
|
|
400
405
|
if (pPr) {
|
|
401
406
|
const oLvl = findChild(pPr, "w:outlineLvl");
|
|
402
407
|
if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
|
|
@@ -406,6 +411,7 @@ function _determine_heading_style(paragraph: Paragraph): string {
|
|
|
406
411
|
|
|
407
412
|
if (outline_level === null && style_id && style_cache && style_cache[style_id]) {
|
|
408
413
|
outline_level = style_cache[style_id].outline_level;
|
|
414
|
+
outline_level_from_style = true;
|
|
409
415
|
}
|
|
410
416
|
|
|
411
417
|
const style_name =
|
|
@@ -422,6 +428,15 @@ function _determine_heading_style(paragraph: Paragraph): string {
|
|
|
422
428
|
}
|
|
423
429
|
}
|
|
424
430
|
|
|
431
|
+
if (outline_level_from_style && outline_level !== null) {
|
|
432
|
+
const is_heading_or_title =
|
|
433
|
+
normalized_style_name &&
|
|
434
|
+
(normalized_style_name.startsWith("Heading") || normalized_style_name === "Title");
|
|
435
|
+
if (!is_heading_or_title) {
|
|
436
|
+
outline_level = null;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
|
|
425
440
|
if (outline_level !== null && outline_level >= 0 && outline_level <= 8) {
|
|
426
441
|
if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")) {
|
|
427
442
|
return normalized_style_name;
|
|
@@ -505,3 +520,183 @@ function _offset_to_page(offset: number, body_page_offsets: number[]): number {
|
|
|
505
520
|
}
|
|
506
521
|
return page;
|
|
507
522
|
}
|
|
523
|
+
|
|
524
|
+
function _extract_outline_fast(
|
|
525
|
+
doc: DocumentObject,
|
|
526
|
+
projected_body: string,
|
|
527
|
+
body_page_offsets: number[],
|
|
528
|
+
paragraph_offsets: Map<any, [number, number]> | Record<string, [number, number]>,
|
|
529
|
+
): OutlineNode[] {
|
|
530
|
+
const paragraphs_and_tables: ["p" | "t", any][] = [];
|
|
531
|
+
const seen_cells = new Set<any>();
|
|
532
|
+
|
|
533
|
+
function walk(container: any) {
|
|
534
|
+
for (const item of iter_block_items(container)) {
|
|
535
|
+
const i_type = item.constructor.name;
|
|
536
|
+
if (i_type === "FootnoteItem") {
|
|
537
|
+
walk(item);
|
|
538
|
+
} else if (item instanceof Paragraph) {
|
|
539
|
+
paragraphs_and_tables.push(["p", item]);
|
|
540
|
+
} else if (item instanceof Table) {
|
|
541
|
+
paragraphs_and_tables.push(["t", item]);
|
|
542
|
+
for (const row of item.rows) {
|
|
543
|
+
for (const cell of row.cells) {
|
|
544
|
+
if (seen_cells.has(cell._element)) {
|
|
545
|
+
continue;
|
|
546
|
+
}
|
|
547
|
+
seen_cells.add(cell._element);
|
|
548
|
+
walk(cell);
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
walk(doc);
|
|
556
|
+
|
|
557
|
+
const heading_indices: number[] = [];
|
|
558
|
+
for (let idx = 0; idx < paragraphs_and_tables.length; idx++) {
|
|
559
|
+
const [kind, item] = paragraphs_and_tables[idx];
|
|
560
|
+
if (kind !== "p") continue;
|
|
561
|
+
|
|
562
|
+
let hasOffset = false;
|
|
563
|
+
if (paragraph_offsets instanceof Map) {
|
|
564
|
+
hasOffset = paragraph_offsets.has(item._element);
|
|
565
|
+
} else {
|
|
566
|
+
hasOffset = item._element in (paragraph_offsets as any);
|
|
567
|
+
}
|
|
568
|
+
if (!hasOffset) {
|
|
569
|
+
continue;
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
if (!_is_heading(item)) continue;
|
|
573
|
+
if (!_heading_passes_quality_filter_fast(item, projected_body, paragraph_offsets)) continue;
|
|
574
|
+
|
|
575
|
+
heading_indices.push(idx);
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
if (heading_indices.length === 0) return [];
|
|
579
|
+
|
|
580
|
+
const nodes: OutlineNode[] = [];
|
|
581
|
+
for (let h_pos = 0; h_pos < heading_indices.length; h_pos++) {
|
|
582
|
+
const item_idx = heading_indices[h_pos];
|
|
583
|
+
const paragraph = paragraphs_and_tables[item_idx][1] as Paragraph;
|
|
584
|
+
const level = _heading_level(paragraph);
|
|
585
|
+
const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
|
|
586
|
+
const style = _determine_heading_style(paragraph);
|
|
587
|
+
|
|
588
|
+
// Owned range: items strictly between this heading and the next equal-or-higher heading.
|
|
589
|
+
let owned_end = item_idx;
|
|
590
|
+
for (let next_h_pos = h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
|
|
591
|
+
const next_idx = heading_indices[next_h_pos];
|
|
592
|
+
const next_paragraph = paragraphs_and_tables[next_idx][1] as Paragraph;
|
|
593
|
+
if (_heading_level(next_paragraph) <= level) {
|
|
594
|
+
owned_end = next_idx;
|
|
595
|
+
break;
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
if (owned_end === item_idx) {
|
|
599
|
+
owned_end = paragraphs_and_tables.length;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
const owned = paragraphs_and_tables.slice(item_idx + 1, owned_end);
|
|
603
|
+
|
|
604
|
+
// has_table: nearest-claim semantics (no bubbling to ancestors).
|
|
605
|
+
let has_table = false;
|
|
606
|
+
for (const [kind2, item2] of owned) {
|
|
607
|
+
if (kind2 === "p" && _is_heading(item2)) {
|
|
608
|
+
break;
|
|
609
|
+
}
|
|
610
|
+
if (kind2 === "t") {
|
|
611
|
+
has_table = true;
|
|
612
|
+
break;
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
// Footnote IDs in document order, deduped.
|
|
617
|
+
const footnote_ids = _collect_footnote_ids_fast(owned);
|
|
618
|
+
|
|
619
|
+
// Page resolution from the paragraph's known offset.
|
|
620
|
+
let para_offset: [number, number] | undefined;
|
|
621
|
+
if (paragraph_offsets instanceof Map) {
|
|
622
|
+
para_offset = paragraph_offsets.get(paragraph._element);
|
|
623
|
+
} else {
|
|
624
|
+
para_offset = paragraph_offsets[paragraph._element as any];
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
let page_num = 1;
|
|
628
|
+
if (para_offset !== undefined) {
|
|
629
|
+
const [start_offset] = para_offset;
|
|
630
|
+
page_num = _offset_to_page(start_offset, body_page_offsets);
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
nodes.push({
|
|
634
|
+
level,
|
|
635
|
+
text,
|
|
636
|
+
page: page_num,
|
|
637
|
+
style,
|
|
638
|
+
has_table,
|
|
639
|
+
footnote_ids,
|
|
640
|
+
});
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
return nodes;
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
function _heading_passes_quality_filter_fast(
|
|
647
|
+
paragraph: Paragraph,
|
|
648
|
+
projected_body: string,
|
|
649
|
+
paragraph_offsets: Map<any, [number, number]> | Record<string, [number, number]>,
|
|
650
|
+
): boolean {
|
|
651
|
+
const style = _determine_heading_style(paragraph);
|
|
652
|
+
if (style !== "(heuristic)") return true;
|
|
653
|
+
|
|
654
|
+
const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
|
|
655
|
+
if (!text) return false;
|
|
656
|
+
const words = text.match(/\w+/g) || [];
|
|
657
|
+
return words.length >= _HEURISTIC_MIN_WORDS;
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
function _heading_text_fast(
|
|
661
|
+
paragraph: Paragraph,
|
|
662
|
+
projected_body: string,
|
|
663
|
+
paragraph_offsets: Map<any, [number, number]> | Record<string, [number, number]>,
|
|
664
|
+
): string {
|
|
665
|
+
let offset: [number, number] | undefined;
|
|
666
|
+
if (paragraph_offsets instanceof Map) {
|
|
667
|
+
offset = paragraph_offsets.get(paragraph._element);
|
|
668
|
+
} else {
|
|
669
|
+
offset = paragraph_offsets[paragraph._element as any];
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
if (offset === undefined) {
|
|
673
|
+
return "";
|
|
674
|
+
}
|
|
675
|
+
const [start, length] = offset;
|
|
676
|
+
const raw = projected_body.substring(start, start + length);
|
|
677
|
+
let cleaned = _strip_critic_markup(raw);
|
|
678
|
+
cleaned = _strip_inline_formatting(cleaned);
|
|
679
|
+
cleaned = cleaned.replace(/^#+\s+/, "");
|
|
680
|
+
return cleaned.trim();
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
function _collect_footnote_ids_fast(owned_items: ["p" | "t", any][]): string[] {
|
|
684
|
+
const seen = new Set<string>();
|
|
685
|
+
const ordered: string[] = [];
|
|
686
|
+
for (const [kind, item] of owned_items) {
|
|
687
|
+
if (kind !== "p") continue;
|
|
688
|
+
for (const event of iter_paragraph_content(item)) {
|
|
689
|
+
if (!("type" in event)) continue;
|
|
690
|
+
let fn_id = "";
|
|
691
|
+
if (event.type === "footnote") fn_id = `fn-${event.id}`;
|
|
692
|
+
else if (event.type === "endnote") fn_id = `en-${event.id}`;
|
|
693
|
+
else continue;
|
|
694
|
+
|
|
695
|
+
if (!seen.has(fn_id)) {
|
|
696
|
+
seen.add(fn_id);
|
|
697
|
+
ordered.push(fn_id);
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
return ordered;
|
|
702
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { createTestDocument } from './test-utils.js';
|
|
3
|
+
import { RedlineEngine } from './engine.js';
|
|
4
|
+
import { DocumentMapper } from './mapper.js';
|
|
5
|
+
import { extract_outline } from './outline.js';
|
|
6
|
+
import { _extractTextFromDoc } from './ingest.js';
|
|
7
|
+
import { paginate } from './pagination.js';
|
|
8
|
+
|
|
9
|
+
describe('Parity Gaps (TDD)', () => {
|
|
10
|
+
it('GAP 2: original_view maps deleted text and validate_edits throws actionable deletion error', async () => {
|
|
11
|
+
const doc = await createTestDocument();
|
|
12
|
+
const xmlDoc = doc.element.ownerDocument!;
|
|
13
|
+
|
|
14
|
+
// Create a paragraph with a tracked deletion: <w:p><w:del w:id="1" w:author="Test Negotiator"><w:r><w:t>Deleted sentence.</w:t></w:r></w:del></w:p>
|
|
15
|
+
const p = xmlDoc.createElement('w:p');
|
|
16
|
+
const del = xmlDoc.createElement('w:del');
|
|
17
|
+
del.setAttribute('w:id', '1');
|
|
18
|
+
del.setAttribute('w:author', 'Test Negotiator');
|
|
19
|
+
|
|
20
|
+
const r = xmlDoc.createElement('w:r');
|
|
21
|
+
const t = xmlDoc.createElement('w:t');
|
|
22
|
+
t.textContent = 'Deleted sentence.';
|
|
23
|
+
|
|
24
|
+
r.appendChild(t);
|
|
25
|
+
del.appendChild(r);
|
|
26
|
+
p.appendChild(del);
|
|
27
|
+
doc.element.appendChild(p);
|
|
28
|
+
|
|
29
|
+
// 1. Verify original_view mapping
|
|
30
|
+
const mapperOrig = new DocumentMapper(doc, false, true);
|
|
31
|
+
expect(mapperOrig.full_text).toContain('Deleted sentence.');
|
|
32
|
+
|
|
33
|
+
const mapperRaw = new DocumentMapper(doc, false, false);
|
|
34
|
+
expect(mapperRaw.full_text).toContain('{--Deleted sentence.--}');
|
|
35
|
+
|
|
36
|
+
// 2. Validate modification targetting deleted text
|
|
37
|
+
const engine = new RedlineEngine(doc);
|
|
38
|
+
const errors = engine.validate_edits([
|
|
39
|
+
{
|
|
40
|
+
target_text: 'Deleted sentence.',
|
|
41
|
+
new_text: 'Active replacement text.',
|
|
42
|
+
}
|
|
43
|
+
]);
|
|
44
|
+
|
|
45
|
+
expect(errors.length).toBe(1);
|
|
46
|
+
expect(errors[0]).toContain('Target text matches text inside a tracked deletion by Test Negotiator.');
|
|
47
|
+
expect(errors[0]).toContain('Reject/accept that change first or target the active replacement text instead.');
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it('GAP 1: heading inside a deleted region is filtered out when using paragraph_offsets', async () => {
|
|
51
|
+
const doc = await createTestDocument();
|
|
52
|
+
const xmlDoc = doc.element.ownerDocument!;
|
|
53
|
+
|
|
54
|
+
const p1 = xmlDoc.createElement('w:p');
|
|
55
|
+
const p1Pr = xmlDoc.createElement('w:pPr');
|
|
56
|
+
const p1Style = xmlDoc.createElement('w:pStyle');
|
|
57
|
+
p1Style.setAttribute('w:val', 'Heading1');
|
|
58
|
+
p1Pr.appendChild(p1Style);
|
|
59
|
+
p1.appendChild(p1Pr);
|
|
60
|
+
const r1 = xmlDoc.createElement('w:r');
|
|
61
|
+
const t1 = xmlDoc.createElement('w:t');
|
|
62
|
+
t1.textContent = 'Active Heading';
|
|
63
|
+
r1.appendChild(t1);
|
|
64
|
+
p1.appendChild(r1);
|
|
65
|
+
doc.element.appendChild(p1);
|
|
66
|
+
|
|
67
|
+
const p2 = xmlDoc.createElement('w:p');
|
|
68
|
+
const p2Pr = xmlDoc.createElement('w:pPr');
|
|
69
|
+
const p2Style = xmlDoc.createElement('w:pStyle');
|
|
70
|
+
p2Style.setAttribute('w:val', 'Heading1');
|
|
71
|
+
p2Pr.appendChild(p2Style);
|
|
72
|
+
p2.appendChild(p2Pr);
|
|
73
|
+
const r2 = xmlDoc.createElement('w:r');
|
|
74
|
+
const t2 = xmlDoc.createElement('w:t');
|
|
75
|
+
t2.textContent = 'Deleted Heading';
|
|
76
|
+
r2.appendChild(t2);
|
|
77
|
+
p2.appendChild(r2);
|
|
78
|
+
doc.element.appendChild(p2);
|
|
79
|
+
|
|
80
|
+
const extract_res = _extractTextFromDoc(doc, false, false, true) as { text: string; paragraph_offsets: Map<any, [number, number]> };
|
|
81
|
+
|
|
82
|
+
// Simulate deletion/skipping of p2 during projection
|
|
83
|
+
extract_res.paragraph_offsets.delete(p2);
|
|
84
|
+
|
|
85
|
+
const pages = paginate(extract_res.text, '');
|
|
86
|
+
const nodes = extract_outline(
|
|
87
|
+
doc,
|
|
88
|
+
extract_res.text,
|
|
89
|
+
pages.body_pages,
|
|
90
|
+
pages.body_page_offsets,
|
|
91
|
+
extract_res.paragraph_offsets as any,
|
|
92
|
+
);
|
|
93
|
+
|
|
94
|
+
// Only Active Heading should be in the outline, Deleted Heading must be skipped because it is not in paragraph_offsets!
|
|
95
|
+
expect(nodes.length).toBe(1);
|
|
96
|
+
expect(nodes[0].text).toBe('Active Heading');
|
|
97
|
+
});
|
|
98
|
+
});
|