@adeu/core 1.10.1 → 1.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -68,12 +68,13 @@ interface TextSpan {
68
68
  declare class DocumentMapper {
69
69
  doc: DocumentObject;
70
70
  clean_view: boolean;
71
+ original_view: boolean;
71
72
  comments_map: Record<string, any>;
72
73
  full_text: string;
73
74
  spans: TextSpan[];
74
75
  appendix_start_index: number;
75
76
  private _text_chunks;
76
- constructor(doc: DocumentObject, clean_view?: boolean);
77
+ constructor(doc: DocumentObject, clean_view?: boolean, original_view?: boolean);
77
78
  private _build_map;
78
79
  private _map_blocks;
79
80
  private _map_table;
@@ -178,6 +179,7 @@ declare class RedlineEngine {
178
179
  mapper: DocumentMapper;
179
180
  comments_manager: CommentsManager;
180
181
  clean_mapper: DocumentMapper | null;
182
+ original_mapper: DocumentMapper | null;
181
183
  skipped_details: string[];
182
184
  constructor(doc: DocumentObject, author?: string);
183
185
  private _check_punctuation_warning;
@@ -321,9 +323,13 @@ interface OutlineNode {
321
323
  has_table: boolean;
322
324
  footnote_ids: string[];
323
325
  }
324
- declare function extract_outline(doc: DocumentObject, projected_body: string, body_pages: string[], body_page_offsets: number[], paragraph_offsets?: Record<string, [number, number]> | null): OutlineNode[];
326
+ declare function extract_outline(doc: DocumentObject, projected_body: string, body_pages: string[], body_page_offsets: number[], paragraph_offsets?: Record<string, [number, number]> | Map<any, [number, number]> | null): OutlineNode[];
325
327
 
326
328
  declare function extractTextFromBuffer(buffer: Buffer, cleanView?: boolean): Promise<string>;
329
+ declare function _extractTextFromDoc(doc: DocumentObject, cleanView?: boolean, includeAppendix?: boolean, return_paragraph_offsets?: boolean): string | {
330
+ text: string;
331
+ paragraph_offsets: Map<any, [number, number]>;
332
+ };
327
333
 
328
334
  interface FinalizeOptions {
329
335
  filename: string;
@@ -342,4 +348,4 @@ declare function finalize_document(doc: DocumentObject, options: FinalizeOptions
342
348
 
343
349
  declare function identifyEngine(): string;
344
350
 
345
- export { BatchValidationError, DocumentMapper, DocumentObject, type FinalizeOptions, type FinalizeResult, type OutlineNode, type PageInfo, type PaginationResult, RedlineEngine, type TextSpan, apply_edits_to_markdown, create_unified_diff, create_word_patch_diff, extractTextFromBuffer, extract_outline, finalize_document, generate_edits_from_text, identifyEngine, paginate, split_structural_appendix, trim_common_context };
351
+ export { BatchValidationError, DocumentMapper, DocumentObject, type FinalizeOptions, type FinalizeResult, type OutlineNode, type PageInfo, type PaginationResult, RedlineEngine, type TextSpan, _extractTextFromDoc, apply_edits_to_markdown, create_unified_diff, create_word_patch_diff, extractTextFromBuffer, extract_outline, finalize_document, generate_edits_from_text, identifyEngine, paginate, split_structural_appendix, trim_common_context };
package/dist/index.d.ts CHANGED
@@ -68,12 +68,13 @@ interface TextSpan {
68
68
  declare class DocumentMapper {
69
69
  doc: DocumentObject;
70
70
  clean_view: boolean;
71
+ original_view: boolean;
71
72
  comments_map: Record<string, any>;
72
73
  full_text: string;
73
74
  spans: TextSpan[];
74
75
  appendix_start_index: number;
75
76
  private _text_chunks;
76
- constructor(doc: DocumentObject, clean_view?: boolean);
77
+ constructor(doc: DocumentObject, clean_view?: boolean, original_view?: boolean);
77
78
  private _build_map;
78
79
  private _map_blocks;
79
80
  private _map_table;
@@ -178,6 +179,7 @@ declare class RedlineEngine {
178
179
  mapper: DocumentMapper;
179
180
  comments_manager: CommentsManager;
180
181
  clean_mapper: DocumentMapper | null;
182
+ original_mapper: DocumentMapper | null;
181
183
  skipped_details: string[];
182
184
  constructor(doc: DocumentObject, author?: string);
183
185
  private _check_punctuation_warning;
@@ -321,9 +323,13 @@ interface OutlineNode {
321
323
  has_table: boolean;
322
324
  footnote_ids: string[];
323
325
  }
324
- declare function extract_outline(doc: DocumentObject, projected_body: string, body_pages: string[], body_page_offsets: number[], paragraph_offsets?: Record<string, [number, number]> | null): OutlineNode[];
326
+ declare function extract_outline(doc: DocumentObject, projected_body: string, body_pages: string[], body_page_offsets: number[], paragraph_offsets?: Record<string, [number, number]> | Map<any, [number, number]> | null): OutlineNode[];
325
327
 
326
328
  declare function extractTextFromBuffer(buffer: Buffer, cleanView?: boolean): Promise<string>;
329
+ declare function _extractTextFromDoc(doc: DocumentObject, cleanView?: boolean, includeAppendix?: boolean, return_paragraph_offsets?: boolean): string | {
330
+ text: string;
331
+ paragraph_offsets: Map<any, [number, number]>;
332
+ };
327
333
 
328
334
  interface FinalizeOptions {
329
335
  filename: string;
@@ -342,4 +348,4 @@ declare function finalize_document(doc: DocumentObject, options: FinalizeOptions
342
348
 
343
349
  declare function identifyEngine(): string;
344
350
 
345
- export { BatchValidationError, DocumentMapper, DocumentObject, type FinalizeOptions, type FinalizeResult, type OutlineNode, type PageInfo, type PaginationResult, RedlineEngine, type TextSpan, apply_edits_to_markdown, create_unified_diff, create_word_patch_diff, extractTextFromBuffer, extract_outline, finalize_document, generate_edits_from_text, identifyEngine, paginate, split_structural_appendix, trim_common_context };
351
+ export { BatchValidationError, DocumentMapper, DocumentObject, type FinalizeOptions, type FinalizeResult, type OutlineNode, type PageInfo, type PaginationResult, RedlineEngine, type TextSpan, _extractTextFromDoc, apply_edits_to_markdown, create_unified_diff, create_word_patch_diff, extractTextFromBuffer, extract_outline, finalize_document, generate_edits_from_text, identifyEngine, paginate, split_structural_appendix, trim_common_context };
package/dist/index.js CHANGED
@@ -1216,14 +1216,16 @@ function* iter_paragraph_content(paragraph) {
1216
1216
  var DocumentMapper = class {
1217
1217
  doc;
1218
1218
  clean_view;
1219
+ original_view;
1219
1220
  comments_map;
1220
1221
  full_text = "";
1221
1222
  spans = [];
1222
1223
  appendix_start_index = -1;
1223
1224
  _text_chunks = [];
1224
- constructor(doc, clean_view = false) {
1225
+ constructor(doc, clean_view = false, original_view = false) {
1225
1226
  this.doc = doc;
1226
1227
  this.clean_view = clean_view;
1228
+ this.original_view = original_view;
1227
1229
  this.comments_map = extract_comments_data(doc.pkg);
1228
1230
  this._build_map();
1229
1231
  }
@@ -1305,14 +1307,15 @@ ${header}`;
1305
1307
  const ins = trPr ? findChild(trPr, "w:ins") : null;
1306
1308
  const del_node = trPr ? findChild(trPr, "w:del") : null;
1307
1309
  if (this.clean_view && del_node) continue;
1310
+ if (this.original_view && ins) continue;
1308
1311
  if (rows_processed > 0) {
1309
1312
  this._add_virtual_text("\n", current, null);
1310
1313
  current += 1;
1311
1314
  }
1312
- if (ins && !this.clean_view) {
1315
+ if (ins && !this.clean_view && !this.original_view) {
1313
1316
  this._add_virtual_text("{++ ", current, null);
1314
1317
  current += 4;
1315
- } else if (del_node && !this.clean_view) {
1318
+ } else if (del_node && !this.clean_view && !this.original_view) {
1316
1319
  this._add_virtual_text("{-- ", current, null);
1317
1320
  current += 4;
1318
1321
  }
@@ -1328,11 +1331,11 @@ ${header}`;
1328
1331
  current = this._map_blocks(cell, current);
1329
1332
  cells_processed += 1;
1330
1333
  }
1331
- if (ins && !this.clean_view) {
1334
+ if (ins && !this.clean_view && !this.original_view) {
1332
1335
  const suffix = ` |Chg:${ins.getAttribute("w:id")}++}`;
1333
1336
  this._add_virtual_text(suffix, current, null);
1334
1337
  current += suffix.length;
1335
- } else if (del_node && !this.clean_view) {
1338
+ } else if (del_node && !this.clean_view && !this.original_view) {
1336
1339
  const suffix = ` |Chg:${del_node.getAttribute("w:id")}--}`;
1337
1340
  this._add_virtual_text(suffix, current, null);
1338
1341
  current += suffix.length;
@@ -1426,11 +1429,13 @@ ${header}`;
1426
1429
  }
1427
1430
  if (this.clean_view && Object.keys(active_del).length > 0) {
1428
1431
  }
1432
+ if (this.original_view && Object.keys(active_ins).length > 0) {
1433
+ }
1429
1434
  const full_seg_text = run_parts.map((x) => x[1]).join("");
1430
1435
  const curr_ins_id = Object.keys(active_ins).pop() || null;
1431
1436
  const curr_del_id = Object.keys(active_del).pop() || null;
1432
- if (full_seg_text && !(this.clean_view && curr_del_id)) {
1433
- const new_wrappers = this.clean_view ? ["", ""] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
1437
+ if (full_seg_text && !(this.clean_view && curr_del_id) && !(this.original_view && curr_ins_id)) {
1438
+ const new_wrappers = this.clean_view || this.original_view ? ["", ""] : this._get_wrappers(curr_ins_id, curr_del_id, active_ids, active_fmt);
1434
1439
  const new_style = [prefix, suffix];
1435
1440
  if (pending_runs.length > 0 && new_wrappers[0] === current_wrappers[0] && new_wrappers[1] === current_wrappers[1]) {
1436
1441
  let skip_leading_prefix = false;
@@ -1455,7 +1460,7 @@ ${header}`;
1455
1460
  }
1456
1461
  }
1457
1462
  }
1458
- if (!this.clean_view) {
1463
+ if (!this.clean_view && !this.original_view) {
1459
1464
  const has_meta = Object.keys(active_ins).length > 0 || Object.keys(active_del).length > 0 || active_ids.size > 0 || Object.keys(active_fmt).length > 0;
1460
1465
  if (has_meta) {
1461
1466
  deferred_meta_states.push([{ ...active_ins }, { ...active_del }, new Set(active_ids), { ...active_fmt }]);
@@ -2631,6 +2636,7 @@ var RedlineEngine = class {
2631
2636
  mapper;
2632
2637
  comments_manager;
2633
2638
  clean_mapper = null;
2639
+ original_mapper = null;
2634
2640
  skipped_details = [];
2635
2641
  constructor(doc, author = "Adeu AI (TS)") {
2636
2642
  this.doc = doc;
@@ -3392,13 +3398,17 @@ var RedlineEngine = class {
3392
3398
  if (!edit.target_text) continue;
3393
3399
  let matches = this.mapper.find_all_match_indices(edit.target_text);
3394
3400
  let activeText = this.mapper.full_text;
3401
+ let target_mapper = this.mapper;
3395
3402
  if (matches.length === 0) {
3396
3403
  if (!this.clean_mapper)
3397
3404
  this.clean_mapper = new DocumentMapper(this.doc, true);
3398
3405
  matches = this.clean_mapper.find_all_match_indices(edit.target_text);
3399
- if (matches.length > 0) activeText = this.clean_mapper.full_text;
3406
+ if (matches.length > 0) {
3407
+ activeText = this.clean_mapper.full_text;
3408
+ target_mapper = this.clean_mapper;
3409
+ }
3400
3410
  }
3401
- if (activeText === this.mapper.full_text && matches.length > 1) {
3411
+ if (activeText === this.mapper.full_text && matches.length > 0) {
3402
3412
  const liveMatches = matches.filter(([start, length]) => {
3403
3413
  const realSpans = this.mapper.spans.filter(
3404
3414
  (s) => s.run !== null && s.end > start && s.start < start + length
@@ -3406,13 +3416,51 @@ var RedlineEngine = class {
3406
3416
  if (realSpans.length === 0) return true;
3407
3417
  return realSpans.some((s) => !s.del_id);
3408
3418
  });
3409
- if (liveMatches.length > 0) matches = liveMatches;
3419
+ matches = liveMatches;
3410
3420
  }
3421
+ let is_deleted_text = false;
3422
+ const deleted_authors = /* @__PURE__ */ new Set();
3411
3423
  if (matches.length === 0) {
3412
- errors.push(
3413
- `- Edit ${i + 1} Failed: Target text not found in document:
3424
+ if (!this.original_mapper) {
3425
+ this.original_mapper = new DocumentMapper(this.doc, false, true);
3426
+ }
3427
+ const orig_matches = this.original_mapper.find_all_match_indices(edit.target_text);
3428
+ if (orig_matches.length > 0) {
3429
+ is_deleted_text = true;
3430
+ for (const [start, length] of orig_matches) {
3431
+ const spans = this.original_mapper.spans.filter(
3432
+ (s) => s.end > start && s.start < start + length
3433
+ );
3434
+ for (const s of spans) {
3435
+ if (s.run !== null) {
3436
+ let parent = s.run._element;
3437
+ while (parent) {
3438
+ if (parent.nodeType === 1 && parent.tagName === "w:del") {
3439
+ const auth = parent.getAttribute("w:author");
3440
+ if (auth) {
3441
+ deleted_authors.add(auth);
3442
+ }
3443
+ break;
3444
+ }
3445
+ parent = parent.parentNode;
3446
+ }
3447
+ }
3448
+ }
3449
+ }
3450
+ }
3451
+ }
3452
+ if (matches.length === 0) {
3453
+ if (is_deleted_text) {
3454
+ const author_phrase = deleted_authors.size > 0 ? `by ${Array.from(deleted_authors).sort().join(", ")}` : "by an existing revision";
3455
+ errors.push(
3456
+ `- Edit ${i + 1} Failed: Target text matches text inside a tracked deletion ${author_phrase}. Reject/accept that change first or target the active replacement text instead.`
3457
+ );
3458
+ } else {
3459
+ errors.push(
3460
+ `- Edit ${i + 1} Failed: Target text not found in document:
3414
3461
  "${edit.target_text}"`
3415
- );
3462
+ );
3463
+ }
3416
3464
  } else if (matches.length > 1) {
3417
3465
  const positions = matches.map(([start, length]) => [
3418
3466
  start,
@@ -4834,13 +4882,14 @@ async function extractTextFromBuffer(buffer, cleanView = false) {
4834
4882
  const doc = await DocumentObject.load(buffer);
4835
4883
  return _extractTextFromDoc(doc, cleanView);
4836
4884
  }
4837
- function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
4885
+ function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true, return_paragraph_offsets = false) {
4838
4886
  const comments_map = extract_comments_data(doc.pkg);
4839
4887
  const full_text = [];
4888
+ const paragraph_offsets = /* @__PURE__ */ new Map();
4840
4889
  let cursor = 0;
4841
4890
  for (const part of iter_document_parts(doc)) {
4842
4891
  const part_cursor = full_text.length > 0 ? cursor + 2 : cursor;
4843
- const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor);
4892
+ const part_text = _extract_blocks(part, comments_map, cleanView, part_cursor, return_paragraph_offsets ? paragraph_offsets : void 0);
4844
4893
  if (part_text) {
4845
4894
  if (full_text.length > 0) cursor += 2;
4846
4895
  full_text.push(part_text);
@@ -4852,9 +4901,12 @@ function _extractTextFromDoc(doc, cleanView = false, includeAppendix = true) {
4852
4901
  const appendix = build_structural_appendix(doc, base_text);
4853
4902
  if (appendix) base_text += appendix;
4854
4903
  }
4904
+ if (return_paragraph_offsets) {
4905
+ return { text: base_text, paragraph_offsets };
4906
+ }
4855
4907
  return base_text;
4856
4908
  }
4857
- function _extract_blocks(container, comments_map, cleanView, cursor) {
4909
+ function _extract_blocks(container, comments_map, cleanView, cursor, paragraph_offsets) {
4858
4910
  const part = container.part || container;
4859
4911
  const [style_cache, default_pstyle] = _get_style_cache(part);
4860
4912
  const blocks = [];
@@ -4873,7 +4925,7 @@ ${header}`;
4873
4925
  if (!is_first_block) local_cursor += 2;
4874
4926
  const block_start = local_cursor;
4875
4927
  if (item.constructor.name === "FootnoteItem") {
4876
- const fn_text = _extract_blocks(item, comments_map, cleanView, block_start);
4928
+ const fn_text = _extract_blocks(item, comments_map, cleanView, block_start, paragraph_offsets);
4877
4929
  if (fn_text) {
4878
4930
  blocks.push(fn_text);
4879
4931
  local_cursor = block_start + fn_text.length;
@@ -4889,11 +4941,14 @@ ${header}`;
4889
4941
  const p_text = build_paragraph_text(item, comments_map, cleanView, style_cache, default_pstyle);
4890
4942
  const full_block = prefix + p_text;
4891
4943
  blocks.push(full_block);
4944
+ if (paragraph_offsets) {
4945
+ paragraph_offsets.set(item._element, [block_start, full_block.length]);
4946
+ }
4892
4947
  local_cursor = block_start + full_block.length;
4893
4948
  is_first_para = false;
4894
4949
  is_first_block = false;
4895
4950
  } else if (item instanceof Table) {
4896
- const table_text = extract_table(item, comments_map, cleanView, block_start);
4951
+ const table_text = extract_table(item, comments_map, cleanView, block_start, paragraph_offsets);
4897
4952
  if (table_text) {
4898
4953
  blocks.push(table_text);
4899
4954
  local_cursor = block_start + table_text.length;
@@ -4906,7 +4961,7 @@ ${header}`;
4906
4961
  }
4907
4962
  return blocks.join("\n\n");
4908
4963
  }
4909
- function extract_table(table, comments_map, cleanView, cursor) {
4964
+ function extract_table(table, comments_map, cleanView, cursor, paragraph_offsets) {
4910
4965
  const rows_text = [];
4911
4966
  let rows_processed = 0;
4912
4967
  let local_cursor = cursor;
@@ -4925,7 +4980,7 @@ function extract_table(table, comments_map, cleanView, cursor) {
4925
4980
  if (seen_cells.has(cell)) continue;
4926
4981
  seen_cells.add(cell);
4927
4982
  if (!first_cell) cell_cursor += 3;
4928
- const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor);
4983
+ const cell_content = _extract_blocks(cell, comments_map, cleanView, cell_cursor, paragraph_offsets);
4929
4984
  cell_texts.push(cell_content);
4930
4985
  cell_cursor += cell_content.length;
4931
4986
  first_cell = false;
@@ -5182,6 +5237,9 @@ function extract_outline(doc, projected_body, body_pages, body_page_offsets, par
5182
5237
  if (body_pages.length !== body_page_offsets.length) {
5183
5238
  throw new Error("body_pages and body_page_offsets length mismatch");
5184
5239
  }
5240
+ if (paragraph_offsets) {
5241
+ return _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets);
5242
+ }
5185
5243
  const comments_map = extract_comments_data(doc.pkg);
5186
5244
  const block_records = _walk_doc_body(doc, comments_map);
5187
5245
  const heading_indices = [];
@@ -5457,6 +5515,7 @@ function _determine_heading_style(paragraph) {
5457
5515
  if (pStyle) style_id = pStyle.getAttribute("w:val") || default_pstyle;
5458
5516
  }
5459
5517
  let outline_level = null;
5518
+ let outline_level_from_style = false;
5460
5519
  if (pPr) {
5461
5520
  const oLvl = findChild(pPr, "w:outlineLvl");
5462
5521
  if (oLvl && /^\d+$/.test(oLvl.getAttribute("w:val") || "")) {
@@ -5465,6 +5524,7 @@ function _determine_heading_style(paragraph) {
5465
5524
  }
5466
5525
  if (outline_level === null && style_id && style_cache && style_cache[style_id]) {
5467
5526
  outline_level = style_cache[style_id].outline_level;
5527
+ outline_level_from_style = true;
5468
5528
  }
5469
5529
  const style_name = style_id && style_cache && style_cache[style_id] ? style_cache[style_id].name : style_id;
5470
5530
  let normalized_style_name = style_name;
@@ -5475,6 +5535,12 @@ function _determine_heading_style(paragraph) {
5475
5535
  normalized_style_name = "Title";
5476
5536
  }
5477
5537
  }
5538
+ if (outline_level_from_style && outline_level !== null) {
5539
+ const is_heading_or_title = normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title");
5540
+ if (!is_heading_or_title) {
5541
+ outline_level = null;
5542
+ }
5543
+ }
5478
5544
  if (outline_level !== null && outline_level >= 0 && outline_level <= 8) {
5479
5545
  if (normalized_style_name && (normalized_style_name.startsWith("Heading") || normalized_style_name === "Title")) {
5480
5546
  return normalized_style_name;
@@ -5523,6 +5589,146 @@ function _offset_to_page(offset, body_page_offsets) {
5523
5589
  }
5524
5590
  return page;
5525
5591
  }
5592
+ function _extract_outline_fast(doc, projected_body, body_page_offsets, paragraph_offsets) {
5593
+ const paragraphs_and_tables = [];
5594
+ const seen_cells = /* @__PURE__ */ new Set();
5595
+ function walk(container) {
5596
+ for (const item of iter_block_items(container)) {
5597
+ const i_type = item.constructor.name;
5598
+ if (i_type === "FootnoteItem") {
5599
+ walk(item);
5600
+ } else if (item instanceof Paragraph) {
5601
+ paragraphs_and_tables.push(["p", item]);
5602
+ } else if (item instanceof Table) {
5603
+ paragraphs_and_tables.push(["t", item]);
5604
+ for (const row of item.rows) {
5605
+ for (const cell of row.cells) {
5606
+ if (seen_cells.has(cell._element)) {
5607
+ continue;
5608
+ }
5609
+ seen_cells.add(cell._element);
5610
+ walk(cell);
5611
+ }
5612
+ }
5613
+ }
5614
+ }
5615
+ }
5616
+ walk(doc);
5617
+ const heading_indices = [];
5618
+ for (let idx = 0; idx < paragraphs_and_tables.length; idx++) {
5619
+ const [kind, item] = paragraphs_and_tables[idx];
5620
+ if (kind !== "p") continue;
5621
+ let hasOffset = false;
5622
+ if (paragraph_offsets instanceof Map) {
5623
+ hasOffset = paragraph_offsets.has(item._element);
5624
+ } else {
5625
+ hasOffset = item._element in paragraph_offsets;
5626
+ }
5627
+ if (!hasOffset) {
5628
+ continue;
5629
+ }
5630
+ if (!_is_heading(item)) continue;
5631
+ if (!_heading_passes_quality_filter_fast(item, projected_body, paragraph_offsets)) continue;
5632
+ heading_indices.push(idx);
5633
+ }
5634
+ if (heading_indices.length === 0) return [];
5635
+ const nodes = [];
5636
+ for (let h_pos = 0; h_pos < heading_indices.length; h_pos++) {
5637
+ const item_idx = heading_indices[h_pos];
5638
+ const paragraph = paragraphs_and_tables[item_idx][1];
5639
+ const level = _heading_level(paragraph);
5640
+ const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
5641
+ const style = _determine_heading_style(paragraph);
5642
+ let owned_end = item_idx;
5643
+ for (let next_h_pos = h_pos + 1; next_h_pos < heading_indices.length; next_h_pos++) {
5644
+ const next_idx = heading_indices[next_h_pos];
5645
+ const next_paragraph = paragraphs_and_tables[next_idx][1];
5646
+ if (_heading_level(next_paragraph) <= level) {
5647
+ owned_end = next_idx;
5648
+ break;
5649
+ }
5650
+ }
5651
+ if (owned_end === item_idx) {
5652
+ owned_end = paragraphs_and_tables.length;
5653
+ }
5654
+ const owned = paragraphs_and_tables.slice(item_idx + 1, owned_end);
5655
+ let has_table = false;
5656
+ for (const [kind2, item2] of owned) {
5657
+ if (kind2 === "p" && _is_heading(item2)) {
5658
+ break;
5659
+ }
5660
+ if (kind2 === "t") {
5661
+ has_table = true;
5662
+ break;
5663
+ }
5664
+ }
5665
+ const footnote_ids = _collect_footnote_ids_fast(owned);
5666
+ let para_offset;
5667
+ if (paragraph_offsets instanceof Map) {
5668
+ para_offset = paragraph_offsets.get(paragraph._element);
5669
+ } else {
5670
+ para_offset = paragraph_offsets[paragraph._element];
5671
+ }
5672
+ let page_num = 1;
5673
+ if (para_offset !== void 0) {
5674
+ const [start_offset] = para_offset;
5675
+ page_num = _offset_to_page(start_offset, body_page_offsets);
5676
+ }
5677
+ nodes.push({
5678
+ level,
5679
+ text,
5680
+ page: page_num,
5681
+ style,
5682
+ has_table,
5683
+ footnote_ids
5684
+ });
5685
+ }
5686
+ return nodes;
5687
+ }
5688
+ function _heading_passes_quality_filter_fast(paragraph, projected_body, paragraph_offsets) {
5689
+ const style = _determine_heading_style(paragraph);
5690
+ if (style !== "(heuristic)") return true;
5691
+ const text = _heading_text_fast(paragraph, projected_body, paragraph_offsets);
5692
+ if (!text) return false;
5693
+ const words = text.match(/\w+/g) || [];
5694
+ return words.length >= _HEURISTIC_MIN_WORDS;
5695
+ }
5696
+ function _heading_text_fast(paragraph, projected_body, paragraph_offsets) {
5697
+ let offset;
5698
+ if (paragraph_offsets instanceof Map) {
5699
+ offset = paragraph_offsets.get(paragraph._element);
5700
+ } else {
5701
+ offset = paragraph_offsets[paragraph._element];
5702
+ }
5703
+ if (offset === void 0) {
5704
+ return "";
5705
+ }
5706
+ const [start, length] = offset;
5707
+ const raw = projected_body.substring(start, start + length);
5708
+ let cleaned = _strip_critic_markup(raw);
5709
+ cleaned = _strip_inline_formatting(cleaned);
5710
+ cleaned = cleaned.replace(/^#+\s+/, "");
5711
+ return cleaned.trim();
5712
+ }
5713
+ function _collect_footnote_ids_fast(owned_items) {
5714
+ const seen = /* @__PURE__ */ new Set();
5715
+ const ordered = [];
5716
+ for (const [kind, item] of owned_items) {
5717
+ if (kind !== "p") continue;
5718
+ for (const event of iter_paragraph_content(item)) {
5719
+ if (!("type" in event)) continue;
5720
+ let fn_id = "";
5721
+ if (event.type === "footnote") fn_id = `fn-${event.id}`;
5722
+ else if (event.type === "endnote") fn_id = `en-${event.id}`;
5723
+ else continue;
5724
+ if (!seen.has(fn_id)) {
5725
+ seen.add(fn_id);
5726
+ ordered.push(fn_id);
5727
+ }
5728
+ }
5729
+ }
5730
+ return ordered;
5731
+ }
5526
5732
 
5527
5733
  // src/sanitize/report.ts
5528
5734
  var SanitizeReport = class {
@@ -6209,6 +6415,7 @@ export {
6209
6415
  DocumentMapper,
6210
6416
  DocumentObject,
6211
6417
  RedlineEngine,
6418
+ _extractTextFromDoc,
6212
6419
  apply_edits_to_markdown,
6213
6420
  create_unified_diff,
6214
6421
  create_word_patch_diff,