paddlex 3.0.1__py3-none-any.whl → 3.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. paddlex/.version +1 -1
  2. paddlex/inference/models/base/predictor/base_predictor.py +2 -0
  3. paddlex/inference/models/common/static_infer.py +20 -14
  4. paddlex/inference/models/common/ts/funcs.py +19 -8
  5. paddlex/inference/models/formula_recognition/predictor.py +1 -1
  6. paddlex/inference/models/formula_recognition/processors.py +2 -2
  7. paddlex/inference/models/text_recognition/result.py +1 -1
  8. paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
  9. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +144 -205
  10. paddlex/inference/pipelines/layout_parsing/result_v2.py +13 -272
  11. paddlex/inference/pipelines/layout_parsing/setting.py +1 -0
  12. paddlex/inference/pipelines/layout_parsing/utils.py +108 -312
  13. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +302 -247
  14. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +156 -104
  15. paddlex/inference/pipelines/ocr/result.py +2 -2
  16. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +1 -1
  17. paddlex/inference/serving/basic_serving/_app.py +47 -13
  18. paddlex/inference/serving/infra/utils.py +22 -17
  19. paddlex/inference/utils/hpi.py +60 -25
  20. paddlex/inference/utils/hpi_model_info_collection.json +627 -204
  21. paddlex/inference/utils/misc.py +20 -0
  22. paddlex/inference/utils/mkldnn_blocklist.py +36 -2
  23. paddlex/inference/utils/official_models.py +126 -5
  24. paddlex/inference/utils/pp_option.py +81 -21
  25. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
  26. paddlex/ops/__init__.py +6 -3
  27. paddlex/utils/deps.py +2 -2
  28. paddlex/utils/device.py +4 -19
  29. paddlex/utils/download.py +10 -7
  30. paddlex/utils/flags.py +9 -0
  31. paddlex/utils/subclass_register.py +2 -2
  32. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/METADATA +307 -162
  33. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/RECORD +37 -35
  34. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/WHEEL +1 -1
  35. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/entry_points.txt +1 -0
  36. {paddlex-3.0.1.dist-info/licenses → paddlex-3.0.3.dist-info}/LICENSE +0 -0
  37. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/top_level.txt +0 -0
@@ -16,9 +16,13 @@ from typing import List, Tuple
16
16
 
17
17
  import numpy as np
18
18
 
19
- from ..result_v2 import LayoutParsingBlock, LayoutParsingRegion
19
+ from ..layout_objects import LayoutBlock, LayoutRegion
20
20
  from ..setting import BLOCK_LABEL_MAP, XYCUT_SETTINGS
21
- from ..utils import calculate_projection_overlap_ratio
21
+ from ..utils import (
22
+ calculate_overlap_ratio,
23
+ calculate_projection_overlap_ratio,
24
+ get_seg_flag,
25
+ )
22
26
 
23
27
 
24
28
  def get_nearest_edge_distance(
@@ -264,8 +268,8 @@ def recursive_xy_cut(
264
268
 
265
269
 
266
270
  def reference_insert(
267
- block: LayoutParsingBlock,
268
- sorted_blocks: List[LayoutParsingBlock],
271
+ block: LayoutBlock,
272
+ sorted_blocks: List[LayoutBlock],
269
273
  **kwargs,
270
274
  ):
271
275
  """
@@ -294,8 +298,8 @@ def reference_insert(
294
298
 
295
299
 
296
300
  def manhattan_insert(
297
- block: LayoutParsingBlock,
298
- sorted_blocks: List[LayoutParsingBlock],
301
+ block: LayoutBlock,
302
+ sorted_blocks: List[LayoutBlock],
299
303
  **kwargs,
300
304
  ):
301
305
  """
@@ -322,10 +326,38 @@ def manhattan_insert(
322
326
  return sorted_blocks
323
327
 
324
328
 
329
+ def euclidean_insert(
330
+ block: LayoutRegion,
331
+ sorted_blocks: List[LayoutRegion],
332
+ **kwargs,
333
+ ):
334
+ """
335
+ Insert a block into a sorted list of blocks based on the Euclidean distance between the block and the nearest sorted block.
336
+
337
+ Args:
338
+ block: The block to insert into the sorted blocks.
339
+ sorted_blocks: The sorted blocks where the new block will be inserted.
340
+ config: Configuration dictionary containing parameters related to the layout parsing.
341
+ median_width: Median width of the document. Defaults to 0.0.
342
+
343
+ Returns:
344
+ sorted_blocks: The updated sorted blocks after insertion.
345
+ """
346
+ nearest_sorted_block_index = len(sorted_blocks)
347
+ block_euclidean_distance = block.euclidean_distance
348
+ for sorted_block_idx, sorted_block in enumerate(sorted_blocks):
349
+ distance = sorted_block.euclidean_distance
350
+ if distance > block_euclidean_distance:
351
+ nearest_sorted_block_index = sorted_block_idx
352
+ break
353
+ sorted_blocks.insert(nearest_sorted_block_index, block)
354
+ return sorted_blocks
355
+
356
+
325
357
  def weighted_distance_insert(
326
- block: LayoutParsingBlock,
327
- sorted_blocks: List[LayoutParsingBlock],
328
- region: LayoutParsingRegion,
358
+ block: LayoutBlock,
359
+ sorted_blocks: List[LayoutBlock],
360
+ region: LayoutRegion,
329
361
  ):
330
362
  """
331
363
  Insert a block into a sorted list of blocks based on the weighted distance between the block and the nearest sorted block.
@@ -398,18 +430,75 @@ def weighted_distance_insert(
398
430
  if weighted_distance < min_weighted_distance:
399
431
  nearest_sorted_block_index = sorted_block_idx
400
432
  min_weighted_distance = weighted_distance
401
- if y1 > y1_prime or (y1 == y1_prime and x1 > x1_prime):
433
+ if abs(y1 // 2 - y1_prime // 2) > 0:
434
+ sorted_distance = y1_prime
435
+ block_distance = y1
436
+ else:
437
+ if region.direction == "horizontal":
438
+ if abs(x1 // 2 - x2 // 2) > 0:
439
+ sorted_distance = x1_prime
440
+ block_distance = x1
441
+ else:
442
+ # distance with (0,0)
443
+ sorted_block_center_x, sorted_block_center_y = (
444
+ sorted_block.get_centroid()
445
+ )
446
+ block_center_x, block_center_y = block.get_centroid()
447
+ sorted_distance = (
448
+ sorted_block_center_x**2 + sorted_block_center_y**2
449
+ )
450
+ block_distance = block_center_x**2 + block_center_y**2
451
+ else:
452
+ if abs(x1 - x2) > 0:
453
+ sorted_distance = -x2_prime
454
+ block_distance = -x2
455
+ else:
456
+ # distance with (max,0)
457
+ sorted_block_center_x, sorted_block_center_y = (
458
+ sorted_block.get_centroid()
459
+ )
460
+ block_center_x, block_center_y = block.get_centroid()
461
+ sorted_distance = (
462
+ sorted_block_center_x**2 + sorted_block_center_y**2
463
+ )
464
+ block_distance = block_center_x**2 + block_center_y**2
465
+ if block_distance > sorted_distance:
402
466
  nearest_sorted_block_index = sorted_block_idx + 1
467
+ if (
468
+ sorted_block_idx < len(sorted_blocks) - 1
469
+ and block.label
470
+ in BLOCK_LABEL_MAP["vision_labels"]
471
+ + BLOCK_LABEL_MAP["vision_title_labels"]
472
+ ):
473
+ seg_start_flag, _ = get_seg_flag(
474
+ sorted_blocks[sorted_block_idx + 1],
475
+ sorted_blocks[sorted_block_idx],
476
+ )
477
+ if not seg_start_flag:
478
+ nearest_sorted_block_index += 1
479
+ else:
480
+ if (
481
+ sorted_block_idx > 0
482
+ and block.label
483
+ in BLOCK_LABEL_MAP["vision_labels"]
484
+ + BLOCK_LABEL_MAP["vision_title_labels"]
485
+ ):
486
+ seg_start_flag, _ = get_seg_flag(
487
+ sorted_blocks[sorted_block_idx],
488
+ sorted_blocks[sorted_block_idx - 1],
489
+ )
490
+ if not seg_start_flag:
491
+ nearest_sorted_block_index = sorted_block_idx - 1
403
492
 
404
493
  sorted_blocks.insert(nearest_sorted_block_index, block)
405
494
  return sorted_blocks
406
495
 
407
496
 
408
497
  def insert_child_blocks(
409
- block: LayoutParsingBlock,
498
+ block: LayoutBlock,
410
499
  block_idx: int,
411
- sorted_blocks: List[LayoutParsingBlock],
412
- ) -> List[LayoutParsingBlock]:
500
+ sorted_blocks: List[LayoutBlock],
501
+ ) -> List[LayoutBlock]:
413
502
  """
414
503
  Insert child blocks of a block into the sorted blocks list.
415
504
 
@@ -432,34 +521,37 @@ def insert_child_blocks(
432
521
  return sorted_blocks
433
522
 
434
523
 
435
- def sort_child_blocks(blocks, direction="horizontal") -> List[LayoutParsingBlock]:
524
+ def sort_child_blocks(
525
+ blocks: List[LayoutRegion], direction="horizontal"
526
+ ) -> List[LayoutBlock]:
436
527
  """
437
528
  Sort child blocks based on their bounding box coordinates.
438
529
 
439
530
  Args:
440
- blocks: A list of LayoutParsingBlock objects representing the child blocks.
531
+ blocks: A list of LayoutBlock objects representing the child blocks.
441
532
  direction: direction of the blocks ('horizontal' or 'vertical'). Default is 'horizontal'.
442
533
  Returns:
443
- sorted_blocks: A sorted list of LayoutParsingBlock objects.
534
+ sorted_blocks: A sorted list of LayoutBlock objects.
444
535
  """
445
- if direction == "horizontal":
446
- # from top to bottom
447
- blocks.sort(
448
- key=lambda x: (
449
- x.bbox[1], # y_min
450
- x.bbox[0], # x_min
451
- x.bbox[1] ** 2 + x.bbox[0] ** 2, # distance with (0,0)
452
- ),
453
- )
536
+ if blocks[0].label != "region":
537
+ if direction == "horizontal":
538
+ blocks.sort(
539
+ key=lambda x: (
540
+ x.bbox[1],
541
+ x.bbox[0],
542
+ x.get_centroid()[0] ** 2 + x.get_centroid()[1] ** 2,
543
+ ), # distance with (0,0)
544
+ )
545
+ else:
546
+ blocks.sort(
547
+ key=lambda x: (
548
+ -x.bbox[2],
549
+ x.bbox[1],
550
+ -x.get_centroid()[0] ** 2 + x.get_centroid()[1] ** 2,
551
+ ), # distance with (max,0)
552
+ )
454
553
  else:
455
- # from right to left
456
- blocks.sort(
457
- key=lambda x: (
458
- -x.bbox[0], # x_min
459
- x.bbox[1], # y_min
460
- x.bbox[1] ** 2 - x.bbox[0] ** 2, # distance with (max,0)
461
- ),
462
- )
554
+ blocks.sort(key=lambda x: x.euclidean_distance)
463
555
  return blocks
464
556
 
465
557
 
@@ -504,41 +596,34 @@ def _manhattan_distance(
504
596
  return weight_x * abs(point1[0] - point2[0]) + weight_y * abs(point1[1] - point2[1])
505
597
 
506
598
 
507
- def sort_normal_blocks(blocks, text_line_height, text_line_width, region_direction):
508
- if region_direction == "horizontal":
509
- blocks.sort(
510
- key=lambda x: (
511
- x.bbox[1] // text_line_height,
512
- x.bbox[0] // text_line_width,
513
- x.bbox[1] ** 2 + x.bbox[0] ** 2,
514
- ),
515
- )
516
- else:
517
- blocks.sort(
518
- key=lambda x: (
519
- -x.bbox[0] // text_line_width,
520
- x.bbox[1] // text_line_height,
521
- x.bbox[1] ** 2 - x.bbox[2] ** 2, # distance with (max,0)
522
- ),
523
- )
524
- return blocks
599
+ def sort_normal_blocks(
600
+ blocks, text_line_height, text_line_width, region_direction
601
+ ) -> List[LayoutBlock]:
602
+ """Sort blocks by their position within the page
525
603
 
604
+ Args:
605
+ blocks (List[LayoutBlock]): List of blocks to be sorted.
606
+ text_line_height (int): Height of each line of text.
607
+ text_line_width (int): Width of each line of text.
608
+ region_direction (str): Direction of the region, either "horizontal" or "vertical".
526
609
 
527
- def sort_normal_blocks(blocks, text_line_height, text_line_width, region_direction):
610
+ Returns:
611
+ List[LayoutBlock]: Sorted list of blocks.
612
+ """
528
613
  if region_direction == "horizontal":
529
614
  blocks.sort(
530
615
  key=lambda x: (
531
616
  x.bbox[1] // text_line_height,
532
617
  x.bbox[0] // text_line_width,
533
- x.bbox[1] ** 2 + x.bbox[0] ** 2,
618
+ x.get_centroid()[0] ** 2 + x.get_centroid()[1] ** 2,
534
619
  ),
535
620
  )
536
621
  else:
537
622
  blocks.sort(
538
623
  key=lambda x: (
539
- -x.bbox[0] // text_line_width,
624
+ -x.bbox[2] // text_line_width,
540
625
  x.bbox[1] // text_line_height,
541
- -(x.bbox[2] ** 2 + x.bbox[1] ** 2),
626
+ -x.get_centroid()[0] ** 2 + x.get_centroid()[1] ** 2,
542
627
  ),
543
628
  )
544
629
  return blocks
@@ -585,45 +670,54 @@ def get_cut_blocks(blocks, cut_direction, cut_coordinates, mask_labels=[]):
585
670
  return cuted_list
586
671
 
587
672
 
588
- def add_split_block(
589
- blocks: List[LayoutParsingBlock], region_bbox: List[int]
590
- ) -> List[LayoutParsingBlock]:
591
- block_bboxes = np.array([block.bbox for block in blocks])
592
- discontinuous = calculate_discontinuous_projection(
593
- block_bboxes, direction="vertical"
594
- )
595
- current_interval = discontinuous[0]
596
- for interval in discontinuous[1:]:
597
- gap_len = interval[0] - current_interval[1]
598
- if gap_len > 40:
599
- x1, _, x2, __ = region_bbox
600
- y1 = current_interval[1] + 5
601
- y2 = interval[0] - 5
602
- bbox = [x1, y1, x2, y2]
603
- split_block = LayoutParsingBlock(label="split", bbox=bbox)
604
- blocks.append(split_block)
605
- current_interval = interval
673
+ def get_blocks_by_direction_interval(
674
+ blocks: List[LayoutBlock],
675
+ start_index: int,
676
+ end_index: int,
677
+ direction: str = "horizontal",
678
+ ) -> List[LayoutBlock]:
679
+ """
680
+ Get blocks within a specified direction interval.
681
+
682
+ Args:
683
+ blocks (List[LayoutBlock]): A list of blocks.
684
+ start_index (int): The starting index of the direction.
685
+ end_index (int): The ending index of the direction.
686
+ direction (str, optional): The direction to consider. Defaults to "horizontal".
687
+
688
+ Returns:
689
+ List[LayoutBlock]: A list of blocks within the specified direction interval.
690
+ """
691
+ interval_blocks = []
692
+ aixis = 0 if direction == "horizontal" else 1
693
+ blocks.sort(key=lambda x: x.bbox[aixis + 2])
694
+
695
+ for block in blocks:
696
+ if block.bbox[aixis] >= start_index and block.bbox[aixis + 2] <= end_index:
697
+ interval_blocks.append(block)
698
+
699
+ return interval_blocks
606
700
 
607
701
 
608
702
  def get_nearest_blocks(
609
- block: LayoutParsingBlock,
610
- ref_blocks: List[LayoutParsingBlock],
703
+ block: LayoutBlock,
704
+ ref_blocks: List[LayoutBlock],
611
705
  overlap_threshold,
612
706
  direction="horizontal",
613
707
  ) -> List:
614
708
  """
615
709
  Get the adjacent blocks with the same direction as the current block.
616
710
  Args:
617
- block (LayoutParsingBlock): The current block.
618
- blocks (List[LayoutParsingBlock]): A list of all blocks.
711
+ block (LayoutBlock): The current block.
712
+ blocks (List[LayoutBlock]): A list of all blocks.
619
713
  ref_block_idxes (List[int]): A list of indices of reference blocks.
620
714
  iou_threshold (float): The IOU threshold to determine if two blocks are considered adjacent.
621
715
  Returns:
622
716
  Int: The index of the previous block with same direction.
623
717
  Int: The index of the following block with same direction.
624
718
  """
625
- prev_blocks: List[LayoutParsingBlock] = []
626
- post_blocks: List[LayoutParsingBlock] = []
719
+ prev_blocks: List[LayoutBlock] = []
720
+ post_blocks: List[LayoutBlock] = []
627
721
  sort_index = 1 if direction == "horizontal" else 0
628
722
  for ref_block in ref_blocks:
629
723
  if ref_block.index == block.index:
@@ -645,110 +739,9 @@ def get_nearest_blocks(
645
739
  return prev_blocks, post_blocks
646
740
 
647
741
 
648
- def get_adjacent_blocks_by_direction(
649
- blocks: List[LayoutParsingBlock],
650
- block_idx: int,
651
- ref_block_idxes: List[int],
652
- iou_threshold,
653
- ) -> List:
654
- """
655
- Get the adjacent blocks with the same direction as the current block.
656
- Args:
657
- block (LayoutParsingBlock): The current block.
658
- blocks (List[LayoutParsingBlock]): A list of all blocks.
659
- ref_block_idxes (List[int]): A list of indices of reference blocks.
660
- iou_threshold (float): The IOU threshold to determine if two blocks are considered adjacent.
661
- Returns:
662
- Int: The index of the previous block with same direction.
663
- Int: The index of the following block with same direction.
664
- """
665
- min_prev_block_distance = float("inf")
666
- prev_block_index = None
667
- min_post_block_distance = float("inf")
668
- post_block_index = None
669
- block = blocks[block_idx]
670
- child_labels = [
671
- "vision_footnote",
672
- "sub_paragraph_title",
673
- "doc_title_text",
674
- "vision_title",
675
- ]
676
-
677
- # find the nearest text block with same direction to the current block
678
- for ref_block_idx in ref_block_idxes:
679
- ref_block = blocks[ref_block_idx]
680
- ref_block_direction = ref_block.direction
681
- if ref_block.order_label in child_labels:
682
- continue
683
- match_block_iou = calculate_projection_overlap_ratio(
684
- block.bbox,
685
- ref_block.bbox,
686
- ref_block_direction,
687
- )
688
-
689
- child_match_distance_tolerance_len = block.short_side_length / 10
690
-
691
- if block.order_label == "vision":
692
- if ref_block.num_of_lines == 1:
693
- gap_tolerance_len = ref_block.short_side_length * 2
694
- else:
695
- gap_tolerance_len = block.short_side_length / 10
696
- else:
697
- gap_tolerance_len = block.short_side_length * 2
698
-
699
- if match_block_iou >= iou_threshold:
700
- prev_distance = (
701
- block.secondary_direction_start_coordinate
702
- - ref_block.secondary_direction_end_coordinate
703
- + child_match_distance_tolerance_len
704
- ) // 5 + ref_block.start_coordinate / 5000
705
- next_distance = (
706
- ref_block.secondary_direction_start_coordinate
707
- - block.secondary_direction_end_coordinate
708
- + child_match_distance_tolerance_len
709
- ) // 5 + ref_block.start_coordinate / 5000
710
- if (
711
- ref_block.secondary_direction_end_coordinate
712
- <= block.secondary_direction_start_coordinate
713
- + child_match_distance_tolerance_len
714
- and prev_distance < min_prev_block_distance
715
- ):
716
- min_prev_block_distance = prev_distance
717
- if (
718
- block.secondary_direction_start_coordinate
719
- - ref_block.secondary_direction_end_coordinate
720
- < gap_tolerance_len
721
- ):
722
- prev_block_index = ref_block_idx
723
- elif (
724
- ref_block.secondary_direction_start_coordinate
725
- > block.secondary_direction_end_coordinate
726
- - child_match_distance_tolerance_len
727
- and next_distance < min_post_block_distance
728
- ):
729
- min_post_block_distance = next_distance
730
- if (
731
- ref_block.secondary_direction_start_coordinate
732
- - block.secondary_direction_end_coordinate
733
- < gap_tolerance_len
734
- ):
735
- post_block_index = ref_block_idx
736
-
737
- diff_dist = abs(min_prev_block_distance - min_post_block_distance)
738
-
739
- # if the difference in distance is too large, only consider the nearest one
740
- if diff_dist * 5 > block.short_side_length:
741
- if min_prev_block_distance < min_post_block_distance:
742
- post_block_index = None
743
- else:
744
- prev_block_index = None
745
-
746
- return prev_block_index, post_block_index
747
-
748
-
749
742
  def update_doc_title_child_blocks(
750
- block: LayoutParsingBlock,
751
- region: LayoutParsingRegion,
743
+ block: LayoutBlock,
744
+ region: LayoutRegion,
752
745
  ) -> None:
753
746
  """
754
747
  Update the child blocks of a document title block.
@@ -762,8 +755,8 @@ def update_doc_title_child_blocks(
762
755
  6. The nearest edge distance should be less than 2 times of the text line height.
763
756
 
764
757
  Args:
765
- blocks (List[LayoutParsingBlock]): overall blocks.
766
- block (LayoutParsingBlock): document title block.
758
+ blocks (List[LayoutBlock]): overall blocks.
759
+ block (LayoutBlock): document title block.
767
760
  prev_idx (int): previous block index, None if not exist.
768
761
  post_idx (int): post block index, None if not exist.
769
762
  config (dict): configurations.
@@ -813,10 +806,24 @@ def update_doc_title_child_blocks(
813
806
  block.append_child_block(ref_block)
814
807
  region.normal_text_block_idxes.remove(ref_block.index)
815
808
 
809
+ for ref_block in ref_blocks:
810
+ if ref_block.order_label == "doc_title_text":
811
+ continue
812
+ with_seem_direction = ref_block.direction == block.direction
813
+
814
+ overlap_ratio = calculate_overlap_ratio(
815
+ block.bbox, ref_block.bbox, mode="small"
816
+ )
817
+
818
+ if overlap_ratio > 0.9 and with_seem_direction:
819
+ ref_block.order_label = "doc_title_text"
820
+ block.append_child_block(ref_block)
821
+ region.normal_text_block_idxes.remove(ref_block.index)
822
+
816
823
 
817
824
  def update_paragraph_title_child_blocks(
818
- block: LayoutParsingBlock,
819
- region: LayoutParsingRegion,
825
+ block: LayoutBlock,
826
+ region: LayoutRegion,
820
827
  ) -> None:
821
828
  """
822
829
  Update the child blocks of a paragraph title block.
@@ -827,8 +834,8 @@ def update_paragraph_title_child_blocks(
827
834
  3. The child block must be paragraph title block.
828
835
 
829
836
  Args:
830
- blocks (List[LayoutParsingBlock]): overall blocks.
831
- block (LayoutParsingBlock): document title block.
837
+ blocks (List[LayoutBlock]): overall blocks.
838
+ block (LayoutBlock): document title block.
832
839
  prev_idx (int): previous block index, None if not exist.
833
840
  post_idx (int): post block index, None if not exist.
834
841
  config (dict): configurations.
@@ -858,8 +865,13 @@ def update_paragraph_title_child_blocks(
858
865
  block.bbox, ref_block.bbox
859
866
  )
860
867
  with_seem_direction = ref_block.direction == block.direction
868
+ with_seem_start = (
869
+ abs(ref_block.start_coordinate - block.start_coordinate)
870
+ < min_text_line_height * 2
871
+ )
861
872
  if (
862
873
  with_seem_direction
874
+ and with_seem_start
863
875
  and nearest_edge_distance <= min_text_line_height * 1.5
864
876
  ):
865
877
  ref_block.order_label = "sub_paragraph_title"
@@ -868,8 +880,8 @@ def update_paragraph_title_child_blocks(
868
880
 
869
881
 
870
882
  def update_vision_child_blocks(
871
- block: LayoutParsingBlock,
872
- region: LayoutParsingRegion,
883
+ block: LayoutBlock,
884
+ region: LayoutRegion,
873
885
  ) -> None:
874
886
  """
875
887
  Update the child blocks of a paragraph title block.
@@ -887,8 +899,8 @@ def update_vision_child_blocks(
887
899
  4. The difference between their centers is very small.
888
900
 
889
901
  Args:
890
- blocks (List[LayoutParsingBlock]): overall blocks.
891
- block (LayoutParsingBlock): document title block.
902
+ blocks (List[LayoutBlock]): overall blocks.
903
+ block (LayoutBlock): document title block.
892
904
  ref_block_idxes (List[int]): A list of indices of reference blocks.
893
905
  prev_idx (int): previous block index, None if not exist.
894
906
  post_idx (int): post block index, None if not exist.
@@ -934,11 +946,11 @@ def update_vision_child_blocks(
934
946
  not has_vision_footnote
935
947
  and ref_block.direction == block.direction
936
948
  and ref_block.long_side_length < block.long_side_length
949
+ and nearest_edge_distance <= ref_block.text_line_height * 2
937
950
  ):
938
951
  if (
939
952
  (
940
- nearest_edge_distance <= block.text_line_height * 2
941
- and ref_block.short_side_length < block.short_side_length
953
+ ref_block.short_side_length < block.short_side_length
942
954
  and ref_block.long_side_length
943
955
  < 0.5 * block.long_side_length
944
956
  and abs(block_center[0] - ref_block_center[0]) < 10
@@ -979,12 +991,17 @@ def update_vision_child_blocks(
979
991
  if ref_block.label in BLOCK_LABEL_MAP["text_labels"]:
980
992
  if (
981
993
  not has_vision_footnote
982
- and nearest_edge_distance <= block.text_line_height * 2
983
- and ref_block.short_side_length < block.short_side_length
984
- and ref_block.long_side_length < 0.5 * block.long_side_length
985
994
  and ref_block.direction == block.direction
986
- and (
987
- abs(block_center[0] - ref_block_center[0]) < 10
995
+ and ref_block.long_side_length < block.long_side_length
996
+ and nearest_edge_distance <= ref_block.text_line_height * 2
997
+ ):
998
+ if (
999
+ (
1000
+ ref_block.short_side_length < block.short_side_length
1001
+ and ref_block.long_side_length
1002
+ < 0.5 * block.long_side_length
1003
+ and abs(block_center[0] - ref_block_center[0]) < 10
1004
+ )
988
1005
  or (
989
1006
  block.bbox[0] - ref_block.bbox[0] < 10
990
1007
  and ref_block.num_of_lines == 1
@@ -993,16 +1010,56 @@ def update_vision_child_blocks(
993
1010
  block.bbox[2] - ref_block.bbox[2] < 10
994
1011
  and ref_block.num_of_lines == 1
995
1012
  )
996
- )
997
- ):
998
- has_vision_footnote = True
999
- ref_block.order_label = "vision_footnote"
1000
- block.append_child_block(ref_block)
1001
- region.normal_text_block_idxes.remove(ref_block.index)
1013
+ ):
1014
+ has_vision_footnote = True
1015
+ ref_block.label = "vision_footnote"
1016
+ ref_block.order_label = "vision_footnote"
1017
+ block.append_child_block(ref_block)
1018
+ region.normal_text_block_idxes.remove(ref_block.index)
1002
1019
  break
1003
1020
  if has_vision_title:
1004
1021
  break
1005
1022
 
1023
+ for ref_block in ref_blocks:
1024
+ if ref_block.index not in region.normal_text_block_idxes:
1025
+ continue
1026
+
1027
+ overlap_ratio = calculate_overlap_ratio(
1028
+ block.bbox, ref_block.bbox, mode="small"
1029
+ )
1030
+
1031
+ if overlap_ratio > 0.9:
1032
+ ref_block.label = "vision_footnote"
1033
+ ref_block.order_label = "vision_footnote"
1034
+ block.append_child_block(ref_block)
1035
+ region.normal_text_block_idxes.remove(ref_block.index)
1036
+
1037
+
1038
+ def update_region_child_blocks(
1039
+ block: LayoutBlock,
1040
+ region: LayoutRegion,
1041
+ ) -> None:
1042
+ """Update child blocks of a region.
1043
+
1044
+ Args:
1045
+ block (LayoutBlock): document title block.
1046
+ region (LayoutRegion): layout region.
1047
+
1048
+ Returns:
1049
+ None
1050
+ """
1051
+ for ref_block in region.block_map.values():
1052
+ if block.index != ref_block.index:
1053
+ bbox_iou = calculate_overlap_ratio(block.bbox, ref_block.bbox)
1054
+ if (
1055
+ bbox_iou > 0
1056
+ and block.area > ref_block.area
1057
+ and ref_block.order_label != "sub_region"
1058
+ ):
1059
+ ref_block.order_label = "sub_region"
1060
+ block.append_child_block(ref_block)
1061
+ region.normal_text_block_idxes.remove(ref_block.index)
1062
+
1006
1063
 
1007
1064
  def calculate_discontinuous_projection(
1008
1065
  boxes, direction="horizontal", return_num=False
@@ -1049,44 +1106,6 @@ def calculate_discontinuous_projection(
1049
1106
  return merged_intervals
1050
1107
 
1051
1108
 
1052
- def is_projection_consistent(blocks, intervals, direction="horizontal"):
1053
-
1054
- for interval in intervals:
1055
- if direction == "horizontal":
1056
- start_index, stop_index = 0, 2
1057
- interval_box = [interval[0], 0, interval[1], 1]
1058
- else:
1059
- start_index, stop_index = 1, 3
1060
- interval_box = [0, interval[0], 1, interval[1]]
1061
- same_interval_bboxes = []
1062
- for block in blocks:
1063
- overlap_ratio = calculate_projection_overlap_ratio(
1064
- interval_box, block.bbox, direction=direction
1065
- )
1066
- if overlap_ratio > 0 and block.label in BLOCK_LABEL_MAP["text_labels"]:
1067
- same_interval_bboxes.append(block.bbox)
1068
- start_coordinates = [bbox[start_index] for bbox in same_interval_bboxes]
1069
- if start_coordinates:
1070
- min_start_coordinate = min(start_coordinates)
1071
- max_start_coordinate = max(start_coordinates)
1072
- is_start_consistent = (
1073
- False
1074
- if max_start_coordinate - min_start_coordinate
1075
- >= abs(interval[0] - interval[1]) * 0.05
1076
- else True
1077
- )
1078
- stop_coordinates = [bbox[stop_index] for bbox in same_interval_bboxes]
1079
- min_stop_coordinate = min(stop_coordinates)
1080
- max_stop_coordinate = max(stop_coordinates)
1081
- if (
1082
- max_stop_coordinate - min_stop_coordinate
1083
- >= abs(interval[0] - interval[1]) * 0.05
1084
- and is_start_consistent
1085
- ):
1086
- return False
1087
- return True
1088
-
1089
-
1090
1109
  def shrink_overlapping_boxes(
1091
1110
  boxes, direction="horizontal", min_threshold=0, max_threshold=0.1
1092
1111
  ) -> List:
@@ -1125,8 +1144,12 @@ def shrink_overlapping_boxes(
1125
1144
  split_y = int((overlap_y_min + overlap_y_max) / 2)
1126
1145
  overlap_y_min = split_y - 1
1127
1146
  overlap_y_max = split_y + 1
1128
- current_block.bbox = [x1, y1, x2, overlap_y_min]
1129
- block.bbox = [x1_prime, overlap_y_max, x2_prime, y2_prime]
1147
+ if y1 < y1_prime:
1148
+ current_block.bbox = [x1, y1, x2, overlap_y_min]
1149
+ block.bbox = [x1_prime, overlap_y_max, x2_prime, y2_prime]
1150
+ else:
1151
+ current_block.bbox = [x1, overlap_y_min, x2, y2]
1152
+ block.bbox = [x1_prime, y1_prime, x2_prime, overlap_y_max]
1130
1153
  else:
1131
1154
  if (
1132
1155
  (match_iou > 0 and cut_iou > min_threshold and cut_iou < max_threshold)
@@ -1138,7 +1161,39 @@ def shrink_overlapping_boxes(
1138
1161
  split_x = int((overlap_x_min + overlap_x_max) / 2)
1139
1162
  overlap_x_min = split_x - 1
1140
1163
  overlap_x_max = split_x + 1
1141
- current_block.bbox = [x1, y1, overlap_x_min, y2]
1142
- block.bbox = [overlap_x_max, y1_prime, x2_prime, y2_prime]
1164
+ if x1 < x1_prime:
1165
+ current_block.bbox = [x1, y1, overlap_x_min, y2]
1166
+ block.bbox = [overlap_x_max, y1_prime, x2_prime, y2_prime]
1167
+ else:
1168
+ current_block.bbox = [overlap_x_min, y1, x2, y2]
1169
+ block.bbox = [x1_prime, y1_prime, overlap_x_max, y2_prime]
1143
1170
  current_block = block
1144
1171
  return boxes
1172
+
1173
+
1174
+ def find_local_minima_flat_regions(arr) -> List:
1175
+ """
1176
+ Find all local minima regions in a flat array.
1177
+
1178
+ Args:
1179
+ arr (list): The input array.
1180
+
1181
+ Returns:
1182
+ list: A list of tuples containing the indices of the local minima regions.
1183
+ """
1184
+ n = len(arr)
1185
+ if n == 0:
1186
+ return []
1187
+
1188
+ flat_minima_regions = []
1189
+ start = 0
1190
+
1191
+ for i in range(1, n):
1192
+ if arr[i] != arr[i - 1]:
1193
+ if (start == 0 or arr[start - 1] > arr[start]) and (
1194
+ i == n or arr[i] > arr[start]
1195
+ ):
1196
+ flat_minima_regions.append((start, i - 1))
1197
+ start = i
1198
+
1199
+ return flat_minima_regions[1:] if len(flat_minima_regions) > 1 else None