@adeu/core 1.10.0 → 1.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -68,12 +68,13 @@ interface TextSpan {
68
68
  declare class DocumentMapper {
69
69
  doc: DocumentObject;
70
70
  clean_view: boolean;
71
+ original_view: boolean;
71
72
  comments_map: Record<string, any>;
72
73
  full_text: string;
73
74
  spans: TextSpan[];
74
75
  appendix_start_index: number;
75
76
  private _text_chunks;
76
- constructor(doc: DocumentObject, clean_view?: boolean);
77
+ constructor(doc: DocumentObject, clean_view?: boolean, original_view?: boolean);
77
78
  private _build_map;
78
79
  private _map_blocks;
79
80
  private _map_table;
@@ -178,6 +179,7 @@ declare class RedlineEngine {
178
179
  mapper: DocumentMapper;
179
180
  comments_manager: CommentsManager;
180
181
  clean_mapper: DocumentMapper | null;
182
+ original_mapper: DocumentMapper | null;
181
183
  skipped_details: string[];
182
184
  constructor(doc: DocumentObject, author?: string);
183
185
  private _check_punctuation_warning;
@@ -321,9 +323,13 @@ interface OutlineNode {
321
323
  has_table: boolean;
322
324
  footnote_ids: string[];
323
325
  }
324
- declare function extract_outline(doc: DocumentObject, projected_body: string, body_pages: string[], body_page_offsets: number[], paragraph_offsets?: Record<string, [number, number]> | null): OutlineNode[];
326
+ declare function extract_outline(doc: DocumentObject, projected_body: string, body_pages: string[], body_page_offsets: number[], paragraph_offsets?: Record<string, [number, number]> | Map<any, [number, number]> | null): OutlineNode[];
325
327
 
326
328
  declare function extractTextFromBuffer(buffer: Buffer, cleanView?: boolean): Promise<string>;
329
+ declare function _extractTextFromDoc(doc: DocumentObject, cleanView?: boolean, includeAppendix?: boolean, return_paragraph_offsets?: boolean): string | {
330
+ text: string;
331
+ paragraph_offsets: Map<any, [number, number]>;
332
+ };
327
333
 
328
334
  interface FinalizeOptions {
329
335
  filename: string;
@@ -342,4 +348,4 @@ declare function finalize_document(doc: DocumentObject, options: FinalizeOptions
342
348
 
343
349
  declare function identifyEngine(): string;
344
350
 
345
- export { BatchValidationError, DocumentMapper, DocumentObject, type FinalizeOptions, type FinalizeResult, type OutlineNode, type PageInfo, type PaginationResult, RedlineEngine, type TextSpan, apply_edits_to_markdown, create_unified_diff, create_word_patch_diff, extractTextFromBuffer, extract_outline, finalize_document, generate_edits_from_text, identifyEngine, paginate, split_structural_appendix, trim_common_context };
351
+ export { BatchValidationError, DocumentMapper, DocumentObject, type FinalizeOptions, type FinalizeResult, type OutlineNode, type PageInfo, type PaginationResult, RedlineEngine, type TextSpan, _extractTextFromDoc, apply_edits_to_markdown, create_unified_diff, create_word_patch_diff, extractTextFromBuffer, extract_outline, finalize_document, generate_edits_from_text, identifyEngine, paginate, split_structural_appendix, trim_common_context };
package/dist/index.d.ts CHANGED
@@ -68,12 +68,13 @@ interface TextSpan {
68
68
  declare class DocumentMapper {
69
69
  doc: DocumentObject;
70
70
  clean_view: boolean;
71
+ original_view: boolean;
71
72
  comments_map: Record<string, any>;
72
73
  full_text: string;
73
74
  spans: TextSpan[];
74
75
  appendix_start_index: number;
75
76
  private _text_chunks;
76
- constructor(doc: DocumentObject, clean_view?: boolean);
77
+ constructor(doc: DocumentObject, clean_view?: boolean, original_view?: boolean);
77
78
  private _build_map;
78
79
  private _map_blocks;
79
80
  private _map_table;
@@ -178,6 +179,7 @@ declare class RedlineEngine {
178
179
  mapper: DocumentMapper;
179
180
  comments_manager: CommentsManager;
180
181
  clean_mapper: DocumentMapper | null;
182
+ original_mapper: DocumentMapper | null;
181
183
  skipped_details: string[];
182
184
  constructor(doc: DocumentObject, author?: string);
183
185
  private _check_punctuation_warning;
@@ -321,9 +323,13 @@ interface OutlineNode {
321
323
  has_table: boolean;
322
324
  footnote_ids: string[];
323
325
  }
324
- declare function extract_outline(doc: DocumentObject, projected_body: string, body_pages: string[], body_page_offsets: number[], paragraph_offsets?: Record<string, [number, number]> | null): OutlineNode[];
326
+ declare function extract_outline(doc: DocumentObject, projected_body: string, body_pages: string[], body_page_offsets: number[], paragraph_offsets?: Record<string, [number, number]> | Map<any, [number, number]> | null): OutlineNode[];
325
327
 
326
328
  declare function extractTextFromBuffer(buffer: Buffer, cleanView?: boolean): Promise<string>;
329
+ declare function _extractTextFromDoc(doc: DocumentObject, cleanView?: boolean, includeAppendix?: boolean, return_paragraph_offsets?: boolean): string | {
330
+ text: string;
331
+ paragraph_offsets: Map<any, [number, number]>;
332
+ };
327
333
 
328
334
  interface FinalizeOptions {
329
335
  filename: string;
@@ -342,4 +348,4 @@ declare function finalize_document(doc: DocumentObject, options: FinalizeOptions
342
348
 
343
349
  declare function identifyEngine(): string;
344
350
 
345
- export { BatchValidationError, DocumentMapper, DocumentObject, type FinalizeOptions, type FinalizeResult, type OutlineNode, type PageInfo, type PaginationResult, RedlineEngine, type TextSpan, apply_edits_to_markdown, create_unified_diff, create_word_patch_diff, extractTextFromBuffer, extract_outline, finalize_document, generate_edits_from_text, identifyEngine, paginate, split_structural_appendix, trim_common_context };
351
+ export { BatchValidationError, DocumentMapper, DocumentObject, type FinalizeOptions, type FinalizeResult, type OutlineNode, type PageInfo, type PaginationResult, RedlineEngine, type TextSpan, _extractTextFromDoc, apply_edits_to_markdown, create_unified_diff, create_word_patch_diff, extractTextFromBuffer, extract_outline, finalize_document, generate_edits_from_text, identifyEngine, paginate, split_structural_appendix, trim_common_context };
package/dist/index.js CHANGED
@@ -1216,14 +1216,16 @@ function* iter_paragraph_content(paragraph) {
1216
1216
  var DocumentMapper = class {
1217
1217
  doc;
1218
1218
  clean_view;
1219
+ original_view;
1219
1220
  comments_map;
1220
1221
  full_text = "";
1221
1222
  spans = [];
1222
1223
  appendix_start_index = -1;
1223
1224
  _text_chunks = [];
1224
- constructor(doc, clean_view = false) {
1225
+ constructor(doc, clean_view = false, original_view = false) {
1225
1226
  this.doc = doc;
1226
1227
  this.clean_view = clean_view;
1228
+ this.original_view = original_view;
1227
1229
  this.comments_map = extract_comments_data(doc.pkg);
1228
1230
  this._build_map();
1229
1231
  }
@@ -1305,14 +1307,15 @@ ${header}`;
1305
1307
  const ins = trPr ? findChild(trPr, "w:ins") : null;
1306
1308
  const del_node = trPr ? findChild(trPr, "w:del") : null;
1307
1309
  if (this.clean_view && del_node) continue;
1310
+ if (this.original_view && ins) continue;
1308
1311
  if (rows_processed > 0) {
1309
1312
  this._add_virtual_text("\n", current, null);
1310
1313
  current += 1;
1311
1314
  }
1312
- if (ins && !this.clean_view) {
1315
+ if (ins && !this.clean_view && !this.original_view) {
1313
1316
  this._add_virtual_text("{++ ", current, null);
1314
1317
  current += 4;
1315
- } else if (del_node && !this.clean_view) {
1318
+ } else if (del_node && !this.clean_view && !this.original_view) {
1316
1319
  this._add_virtual_text("{-- ", current, null);
1317
1320
  current += 4;
1318
1321
  }
@@ -1328,11 +1331,11 @@ ${header}`;
1328
1331
  current = this._map_blocks(cell, current);
1329
1332
  cells_processed += 1;
1330
1333
  }
1331
- if (ins && !this.clean_view) {
1334
+ if (ins && !this.clean_view && !this.original_view) {
1332
1335
  const suffix = ` |Chg:${ins.getAttribute("w:id")}++}`;
1333
1336
  this._add_virtual_text(suffix, current, null);
1334
1337
  current += suffix.length;
1335
- } else if (del_node && !this.clean_view) {
1338
+ } else if (del_node && !this.clean_view && !this.original_view) {
1336
1339
  const suffix = ` |Chg:${del_node.getAttribute("w:id")}--}`;
1337
1340
  this._add_virtual_text(suffix, current, null);
1338
1341
  current += suffix.length;
@@ -1426,11 +1429,13 @@ ${header}`;
1426
1429
  }
1427
1430
  if (this.clean_view && Object.keys(active_del).length > 0) {
1428
1431
  }
1432
+ if (this.original_view && Object.keys(active_ins).length > 0) {
1433
+ }
1429
1434
  const full_seg_text = run_parts.map((x) => x[1]).join("");
1430
1435
  const curr_ins_id = Object.keys(active_ins).pop() || null;
1431
1436
  const curr_del_id = Object.keys(active_del).pop() || null;
1432
- if (full_seg_text && !(this.clean_view && curr_del_id)) {
1433
- const new_wrappers = this.clean_view ? ["", ""] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
1437
+ if (full_seg_text && !(this.clean_view && curr_del_id) && !(this.original_view && curr_ins_id)) {
1438
+ const new_wrappers = this.clean_view || this.original_view ? ["", ""] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
1434
1439
  const new_style = [prefix, suffix];
1435
1440
  if (pending_runs.length > 0 && new_wrappers[0] === current_wrappers[0] && new_wrappers[1] === current_wrappers[1]) {
1436
1441
  let skip_leading_prefix = false;
@@ -1455,7 +1460,7 @@ ${header}`;
1455
1460
  }
1456
1461
  }
1457
1462
  }
1458
- if (!this.clean_view) {
1463
+ if (!this.clean_view && !this.original_view) {
1459
1464
  const has_meta = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || active_ids.size > 0 || Object.keys(active_fmt).length > 0;
1460
1465
  if (has_meta) {
1461
1466
  deferred_meta_states.push([{ ...active_ins }, { ...active_del }, new Set(active_ids), { ...active_fmt }]);
@@ -2631,6 +2636,7 @@ var RedlineEngine = class {
2631
2636
  mapper;
2632
2637
  comments_manager;
2633
2638
  clean_mapper = null;
2639
+ original_mapper = null;
2634
2640
  skipped_details = [];
2635
2641
  constructor(doc, author = "Adeu AI (TS)") {
2636
2642
  this.doc = doc;
@@ -2670,7 +2676,10 @@ var RedlineEngine = class {
2670
2676
  if (!full_text) return [null, null];
2671
2677
  const before_start = Math.max(0, start_idx - 30);
2672
2678
  const context_before = full_text.substring(before_start, start_idx);
2673
- const context_after = full_text.substring(start_idx + length, start_idx + length + 30);
2679
+ const context_after = full_text.substring(
2680
+ start_idx + length,
2681
+ start_idx + length + 30
2682
+ );
2674
2683
  const critic_markup = `${context_before}{--${target_text}--}{++${new_text}++}${context_after}`;
2675
2684
  let clean_text = critic_markup;
2676
2685
  clean_text = clean_text.replace(/\{>>.*?<<\}/gs, "");
@@ -2842,7 +2851,9 @@ var RedlineEngine = class {
2842
2851
  overrideEl.parentNode?.removeChild(overrideEl);
2843
2852
  }
2844
2853
  }
2845
- pkg.parts = pkg.parts.filter((p) => !p.partname.toLowerCase().includes("comments"));
2854
+ pkg.parts = pkg.parts.filter(
2855
+ (p) => !p.partname.toLowerCase().includes("comments")
2856
+ );
2846
2857
  for (const key of Object.keys(pkg.unzipped)) {
2847
2858
  if (key.toLowerCase().includes("comments")) {
2848
2859
  delete pkg.unzipped[key];
@@ -3387,13 +3398,17 @@ var RedlineEngine = class {
3387
3398
  if (!edit.target_text) continue;
3388
3399
  let matches = this.mapper.find_all_match_indices(edit.target_text);
3389
3400
  let activeText = this.mapper.full_text;
3401
+ let target_mapper = this.mapper;
3390
3402
  if (matches.length === 0) {
3391
3403
  if (!this.clean_mapper)
3392
3404
  this.clean_mapper = new DocumentMapper(this.doc, true);
3393
3405
  matches = this.clean_mapper.find_all_match_indices(edit.target_text);
3394
- if (matches.length > 0) activeText = this.clean_mapper.full_text;
3406
+ if (matches.length > 0) {
3407
+ activeText = this.clean_mapper.full_text;
3408
+ target_mapper = this.clean_mapper;
3409
+ }
3395
3410
  }
3396
- if (activeText === this.mapper.full_text && matches.length > 1) {
3411
+ if (activeText === this.mapper.full_text && matches.length > 0) {
3397
3412
  const liveMatches = matches.filter(([start, length]) => {
3398
3413
  const realSpans = this.mapper.spans.filter(
3399
3414
  (s) => s.run !== null && s.end > start && s.start < start + length
@@ -3401,13 +3416,51 @@ var RedlineEngine = class {
3401
3416
  if (realSpans.length === 0) return true;
3402
3417
  return realSpans.some((s) => !s.del_id);
3403
3418
  });
3404
- if (liveMatches.length > 0) matches = liveMatches;
3419
+ matches = liveMatches;
3405
3420
  }
3421
+ let is_deleted_text = false;
3422
+ const deleted_authors = /* @__PURE__ */ new Set();
3406
3423
  if (matches.length === 0) {
3407
- errors.push(
3408
- `- Edit ${i + 1} Failed: Target text not found in document:
3424
+ if (!this.original_mapper) {
3425
+ this.original_mapper = new DocumentMapper(this.doc, false, true);
3426
+ }
3427
+ const orig_matches = this.original_mapper.find_all_match_indices(edit.target_text);
3428
+ if (orig_matches.length > 0) {
3429
+ is_deleted_text = true;
3430
+ for (const [start, length] of orig_matches) {
3431
+ const spans = this.original_mapper.spans.filter(
3432
+ (s) => s.end > start && s.start < start + length
3433
+ );
3434
+ for (const s of spans) {
3435
+ if (s.run !== null) {
3436
+ let parent = s.run._element;
3437
+ while (parent) {
3438
+ if (parent.nodeType === 1 && parent.tagName === "w:del") {
3439
+ const auth = parent.getAttribute("w:author");
3440
+ if (auth) {
3441
+ deleted_authors.add(auth);
3442
+ }
3443
+ break;
3444
+ }
3445
+ parent = parent.parentNode;
3446
+ }
3447
+ }
3448
+ }
3449
+ }
3450
+ }
3451
+ }
3452
+ if (matches.length === 0) {
3453
+ if (is_deleted_text) {
3454
+ const author_phrase = deleted_authors.size > 0 ? `by ${Array.from(deleted_authors).sort().join(", ")}` : "by an existing revision";
3455
+ errors.push(
3456
+ `- Edit ${i + 1} Failed: Target text matches text inside a tracked deletion ${author_phrase}. Reject/accept that change first or target the active replacement text instead.`
3457
+ );
3458
+ } else {
3459
+ errors.push(
3460
+ `- Edit ${i + 1} Failed: Target text not found in document:
3409
3461
  "${edit.target_text}"`
3410
- );
3462
+ );
3463
+ }
3411
3464
  } else if (matches.length > 1) {
3412
3465
  const positions = matches.map(([start, length]) => [
3413
3466
  start,
@@ -3428,7 +3481,10 @@ var RedlineEngine = class {
3428
3481
  const [pfx, sfx] = trim_common_context(matched, edit.new_text || "");
3429
3482
  const t_end = matched.length - sfx;
3430
3483
  const final_target = matched.substring(pfx, t_end);
3431
- const final_new = (edit.new_text || "").substring(pfx, (edit.new_text || "").length - sfx);
3484
+ const final_new = (edit.new_text || "").substring(
3485
+ pfx,
3486
+ (edit.new_text || "").length - sfx
3487
+ );
3432
3488
  if (final_target.includes("\n\n")) {
3433
3489
  if (final_new.includes("\n\n")) {
3434
3490
  const parts = matched.split("\n\n");
@@ -3584,7 +3640,9 @@ var RedlineEngine = class {
3584
3640
  if (dry_run_mode) {
3585
3641
  for (const edit of edits) {
3586
3642
  const single_errors = this.validate_edits([edit]);
3587
- const warning = this._check_punctuation_warning(edit.target_text || "");
3643
+ const warning = this._check_punctuation_warning(
3644
+ edit.target_text || ""
3645
+ );
3588
3646
  if (single_errors.length > 0) {
3589
3647
  skipped_edits++;
3590
3648
  edits_reports.push({
@@ -3638,7 +3696,9 @@ var RedlineEngine = class {
3638
3696
  for (const edit of cloned_edits) {
3639
3697
  const success = edit._applied_status || false;
3640
3698
  const error_msg = edit._error_msg || null;
3641
- const warning = this._check_punctuation_warning(edit.target_text || "");
3699
+ const warning = this._check_punctuation_warning(
3700
+ edit.target_text || ""
3701
+ );
3642
3702
  let critic_markup = null;
3643
3703
  let clean_text = null;
3644
3704
  if (success) {
@@ -3666,7 +3726,7 @@ var RedlineEngine = class {
3666
3726
  skipped_details: this.skipped_details,
3667
3727
  edits: edits_reports,
3668
3728
  engine: "node",
3669
- version: "1.9.0"
3729
+ version: "1.10.0"
3670
3730
  };
3671
3731
  }
3672
3732
  apply_edits(edits) {
@@ -4189,7 +4249,10 @@ var RedlineEngine = class {
4189
4249
  if (result.first_node.tagName === "w:p") {
4190
4250
  first_anchor_target = findAllDescendants(result.first_node, "w:ins")[0] || result.first_node;
4191
4251
  }
4192
- const anchor = ascend_to_paragraph_child(first_anchor_target, host_p);
4252
+ const anchor = ascend_to_paragraph_child(
4253
+ first_anchor_target,
4254
+ host_p
4255
+ );
4193
4256
  this._attach_comment(host_p, anchor, anchor, edit.comment);
4194
4257
  }
4195
4258
  }
@@ -4201,7 +4264,10 @@ var RedlineEngine = class {
4201
4264
  length,
4202
4265
  rebuild_map
4203
4266
  );
4204
- const virtual_spans = active_mapper.get_virtual_spans_in_range(start_idx, length);
4267
+ const virtual_spans = active_mapper.get_virtual_spans_in_range(
4268
+ start_idx,
4269
+ length
4270
+ );
4205
4271
  if (target_runs.length === 0 && virtual_spans.length === 0) return false;
4206
4272
  const affected_ps = /* @__PURE__ */ new Set();
4207
4273
  for (const run of target_runs) {
@@ -4284,7 +4350,10 @@ var RedlineEngine = class {
4284
4350
  let pPr = findChild(p1_element, "w:pPr");
4285
4351
  if (!pPr) {
4286
4352
  pPr = p1_element.ownerDocument.createElement("w:pPr");
4287
- p1_element.insertBefore(pPr, p1_element.firstChild);
4353
+ p1_element.insertBefore(
4354
+ pPr,
4355
+ p1_element.firstChild
4356
+ );
4288
4357
  }
4289
4358
  let rPr = findChild(pPr, "w:rPr");
4290
4359
  if (!rPr) {
@@ -4813,13 +4882,14 @@ async function extractTextFromBuffer(buffer, cleanView = false) {
4813
4882
  const doc = await DocumentObject.load(buffer);
4814
4883
  return _extractTextFromDoc(doc, cleanView);
4815
4884
  }
4816
- function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
4885
+ function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true, return_paragraph_offsets = false) {
4817
4886
  const comments_map = extract_comments_data(doc.pkg);
4818
4887
  const full_text = [];
4888
+ const paragraph_offsets = /* @__PURE__ */ new Map();
4819
4889
  let cursor = 0;
4820
4890
  for (const part of iter_document_parts(doc)) {
4821
4891
  const part_cursor = full_text.length > 0 ? cursor + 2 : cursor;
4822
- const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor);
4892
+ const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor, return_paragraph_offsets ? paragraph_offsets : void 0);
4823
4893
  if (part_text) {
4824
4894
  if (full_text.length > 0) cursor += 2;
4825
4895
  full_text.push(part_text);
@@ -4831,9 +4901,12 @@ function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
4831
4901
  const appendix = build_structural_appendix(doc, base_text);
4832
4902
  if (appendix) base_text += appendix;
4833
4903
  }
4904
+ if (return_paragraph_offsets) {
4905
+ return { text: base_text, paragraph_offsets };
4906
+ }
4834
4907
  return base_text;
4835
4908
  }
4836
- function _extract_blocks(container, comments_map, cleanView, cursor) {
4909
+ function _extract_blocks(container, comments_map, cleanView, cursor, paragraph_offsets) {
4837
4910
  const part = container.part || container;
4838
4911
  const [style_cache, default_pstyle] = _get_style_cache(part);
4839
4912
  const blocks = [];
@@ -4852,7 +4925,7 @@ ${header}`;
4852
4925
  if (!is_first_block) local_cursor += 2;
4853
4926
  const block_start = local_cursor;
4854
4927
  if (item.constructor.name === "FootnoteItem") {
4855
- const fn_text = _extract_blocks(item, comments_map, cleanView, block_start);
4928
+ const fn_text = _extract_blocks(item, comments_map, cleanView, block_start, paragraph_offsets);
4856
4929
  if (fn_text) {
4857
4930
  blocks.push(fn_text);
4858
4931
  local_cursor = block_start + fn_text.length;
@@ -4868,11 +4941,14 @@ ${header}`;
4868
4941
  const p_text = build_paragraph_text(item, comments_map, cleanView, style_cache, default_pstyle);
4869
4942
  const full_block = prefix + p_text;
4870
4943
  blocks.push(full_block);
4944
+ if (paragraph_offsets) {
4945
+ paragraph_offsets.set(item._element, [block_start, full_block.length]);
4946
+ }
4871
4947
  local_cursor = block_start + full_block.length;
4872
4948
  is_first_para = false;
4873
4949
  is_first_block = false;
4874
4950
  } else if (item instanceof Table) {
4875
- const table_text = extract_table(item, comments_map, cleanView, block_start);
4951
+ const table_text = extract_table(item, comments_map, cleanView, block_start, paragraph_offsets);
4876
4952
  if (table_text) {
4877
4953
  blocks.push(table_text);
4878
4954
  local_cursor = block_start + table_text.length;
@@ -4885,7 +4961,7 @@ ${header}`;
4885
4961
  }
4886
4962
  return blocks.join("\n\n");
4887
4963
  }
4888
- function extract_table(table, comments_map, cleanView, cursor) {
4964
+ function extract_table(table, comments_map, cleanView, cursor, paragraph_offsets) {
4889
4965
  const rows_text = [];
4890
4966
  let rows_processed = 0;
4891
4967
  let local_cursor = cursor;
@@ -4904,7 +4980,7 @@ function extract_table(table, comments_map, cleanView, cursor) {
4904
4980
  if (seen_cells.has(cell)) continue;
4905
4981
  seen_cells.add(cell);
4906
4982
  if (!first_cell) cell_cursor += 3;
4907
- const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor);
4983
+ const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor, paragraph_offsets);
4908
4984
  cell_texts.push(cell_content);
4909
4985
  cell_cursor += cell_content.length;
4910
4986
  first_cell = false;
@@ -5161,6 +5237,9 @@ function extract_outline(doc, projected_body, body_pages, body_page_offsets, par
5161
5237
  if (body_pages.length !== body_page_offsets.length) {
5162
5238
  throw new Error("body_pages and body_page_offsets length mismatch");
5163
5239
  }
5240
+ if (paragraph_offsets) {
5241
+ return _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets);
5242
+ }
5164
5243
  const comments_map = extract_comments_data(doc.pkg);
5165
5244
  const block_records = _walk_doc_body(doc, comments_map);
5166
5245
  const heading_indices = [];
@@ -5436,6 +5515,7 @@ function _determine_heading_style(paragraph) {
5436
5515
  if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
5437
5516
  }
5438
5517
  let outline_level = null;
5518
+ let outline_level_from_style = false;
5439
5519
  if (pPr) {
5440
5520
  const oLvl = findChild(pPr, "w:outlineLvl");
5441
5521
  if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
@@ -5444,6 +5524,7 @@ function _determine_heading_style(paragraph) {
5444
5524
  }
5445
5525
  if (outline_level === null && style_id && style_cache && style_cache[style_id]) {
5446
5526
  outline_level = style_cache[style_id].outline_level;
5527
+ outline_level_from_style = true;
5447
5528
  }
5448
5529
  const style_name = style_id && style_cache && style_cache[style_id] ? style_cache[style_id].name : style_id;
5449
5530
  let normalized_style_name = style_name;
@@ -5454,6 +5535,12 @@ function _determine_heading_style(paragraph) {
5454
5535
  normalized_style_name = "Title";
5455
5536
  }
5456
5537
  }
5538
+ if (outline_level_from_style && outline_level !== null) {
5539
+ const is_heading_or_title = normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title");
5540
+ if (!is_heading_or_title) {
5541
+ outline_level = null;
5542
+ }
5543
+ }
5457
5544
  if (outline_level !== null && outline_level >= 0 && outline_level <= 8) {
5458
5545
  if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")) {
5459
5546
  return normalized_style_name;
@@ -5502,6 +5589,146 @@ function _offset_to_page(offset, body_page_offsets) {
5502
5589
  }
5503
5590
  return page;
5504
5591
  }
5592
+ function _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets) {
5593
+ const paragraphs_and_tables = [];
5594
+ const seen_cells = /* @__PURE__ */ new Set();
5595
+ function walk(container) {
5596
+ for (const item of iter_block_items(container)) {
5597
+ const i_type = item.constructor.name;
5598
+ if (i_type === "FootnoteItem") {
5599
+ walk(item);
5600
+ } else if (item instanceof Paragraph) {
5601
+ paragraphs_and_tables.push(["p", item]);
5602
+ } else if (item instanceof Table) {
5603
+ paragraphs_and_tables.push(["t", item]);
5604
+ for (const row of item.rows) {
5605
+ for (const cell of row.cells) {
5606
+ if (seen_cells.has(cell._element)) {
5607
+ continue;
5608
+ }
5609
+ seen_cells.add(cell._element);
5610
+ walk(cell);
5611
+ }
5612
+ }
5613
+ }
5614
+ }
5615
+ }
5616
+ walk(doc);
5617
+ const heading_indices = [];
5618
+ for (let idx = 0; idx < paragraphs_and_tables.length; idx++) {
5619
+ const [kind, item] = paragraphs_and_tables[idx];
5620
+ if (kind !== "p") continue;
5621
+ let hasOffset = false;
5622
+ if (paragraph_offsets instanceof Map) {
5623
+ hasOffset = paragraph_offsets.has(item._element);
5624
+ } else {
5625
+ hasOffset = item._element in paragraph_offsets;
5626
+ }
5627
+ if (!hasOffset) {
5628
+ continue;
5629
+ }
5630
+ if (!_is_heading(item)) continue;
5631
+ if (!_heading_passes_quality_filter_fast(item, projected_body, paragraph_offsets)) continue;
5632
+ heading_indices.push(idx);
5633
+ }
5634
+ if (heading_indices.length === 0) return [];
5635
+ const nodes = [];
5636
+ for (let h_pos = 0; h_pos < heading_indices.length; h_pos++) {
5637
+ const item_idx = heading_indices[h_pos];
5638
+ const paragraph = paragraphs_and_tables[item_idx][1];
5639
+ const level = _heading_level(paragraph);
5640
+ const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
5641
+ const style = _determine_heading_style(paragraph);
5642
+ let owned_end = item_idx;
5643
+ for (let next_h_pos = h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
5644
+ const next_idx = heading_indices[next_h_pos];
5645
+ const next_paragraph = paragraphs_and_tables[next_idx][1];
5646
+ if (_heading_level(next_paragraph) <= level) {
5647
+ owned_end = next_idx;
5648
+ break;
5649
+ }
5650
+ }
5651
+ if (owned_end === item_idx) {
5652
+ owned_end = paragraphs_and_tables.length;
5653
+ }
5654
+ const owned = paragraphs_and_tables.slice(item_idx + 1, owned_end);
5655
+ let has_table = false;
5656
+ for (const [kind2, item2] of owned) {
5657
+ if (kind2 === "p" && _is_heading(item2)) {
5658
+ break;
5659
+ }
5660
+ if (kind2 === "t") {
5661
+ has_table = true;
5662
+ break;
5663
+ }
5664
+ }
5665
+ const footnote_ids = _collect_footnote_ids_fast(owned);
5666
+ let para_offset;
5667
+ if (paragraph_offsets instanceof Map) {
5668
+ para_offset = paragraph_offsets.get(paragraph._element);
5669
+ } else {
5670
+ para_offset = paragraph_offsets[paragraph._element];
5671
+ }
5672
+ let page_num = 1;
5673
+ if (para_offset !== void 0) {
5674
+ const [start_offset] = para_offset;
5675
+ page_num = _offset_to_page(start_offset, body_page_offsets);
5676
+ }
5677
+ nodes.push({
5678
+ level,
5679
+ text,
5680
+ page: page_num,
5681
+ style,
5682
+ has_table,
5683
+ footnote_ids
5684
+ });
5685
+ }
5686
+ return nodes;
5687
+ }
5688
+ function _heading_passes_quality_filter_fast(paragraph, projected_body, paragraph_offsets) {
5689
+ const style = _determine_heading_style(paragraph);
5690
+ if (style !== "(heuristic)") return true;
5691
+ const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
5692
+ if (!text) return false;
5693
+ const words = text.match(/\w+/g) || [];
5694
+ return words.length >= _HEURISTIC_MIN_WORDS;
5695
+ }
5696
+ function _heading_text_fast(paragraph, projected_body, paragraph_offsets) {
5697
+ let offset;
5698
+ if (paragraph_offsets instanceof Map) {
5699
+ offset = paragraph_offsets.get(paragraph._element);
5700
+ } else {
5701
+ offset = paragraph_offsets[paragraph._element];
5702
+ }
5703
+ if (offset === void 0) {
5704
+ return "";
5705
+ }
5706
+ const [start, length] = offset;
5707
+ const raw = projected_body.substring(start, start + length);
5708
+ let cleaned = _strip_critic_markup(raw);
5709
+ cleaned = _strip_inline_formatting(cleaned);
5710
+ cleaned = cleaned.replace(/^#+\s+/, "");
5711
+ return cleaned.trim();
5712
+ }
5713
+ function _collect_footnote_ids_fast(owned_items) {
5714
+ const seen = /* @__PURE__ */ new Set();
5715
+ const ordered = [];
5716
+ for (const [kind, item] of owned_items) {
5717
+ if (kind !== "p") continue;
5718
+ for (const event of iter_paragraph_content(item)) {
5719
+ if (!("type" in event)) continue;
5720
+ let fn_id = "";
5721
+ if (event.type === "footnote") fn_id = `fn-${event.id}`;
5722
+ else if (event.type === "endnote") fn_id = `en-${event.id}`;
5723
+ else continue;
5724
+ if (!seen.has(fn_id)) {
5725
+ seen.add(fn_id);
5726
+ ordered.push(fn_id);
5727
+ }
5728
+ }
5729
+ }
5730
+ return ordered;
5731
+ }
5505
5732
 
5506
5733
  // src/sanitize/report.ts
5507
5734
  var SanitizeReport = class {
@@ -6188,6 +6415,7 @@ export {
6188
6415
  DocumentMapper,
6189
6416
  DocumentObject,
6190
6417
  RedlineEngine,
6418
+ _extractTextFromDoc,
6191
6419
  apply_edits_to_markdown,
6192
6420
  create_unified_diff,
6193
6421
  create_word_patch_diff,