paddlex 3.0.1__py3-none-any.whl → 3.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/inference/models/base/predictor/base_predictor.py +2 -0
- paddlex/inference/models/common/static_infer.py +20 -14
- paddlex/inference/models/common/ts/funcs.py +19 -8
- paddlex/inference/models/formula_recognition/predictor.py +1 -1
- paddlex/inference/models/formula_recognition/processors.py +2 -2
- paddlex/inference/models/text_recognition/result.py +1 -1
- paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +144 -205
- paddlex/inference/pipelines/layout_parsing/result_v2.py +13 -272
- paddlex/inference/pipelines/layout_parsing/setting.py +1 -0
- paddlex/inference/pipelines/layout_parsing/utils.py +108 -312
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +302 -247
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +156 -104
- paddlex/inference/pipelines/ocr/result.py +2 -2
- paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +1 -1
- paddlex/inference/serving/basic_serving/_app.py +47 -13
- paddlex/inference/serving/infra/utils.py +22 -17
- paddlex/inference/utils/hpi.py +60 -25
- paddlex/inference/utils/hpi_model_info_collection.json +627 -204
- paddlex/inference/utils/misc.py +20 -0
- paddlex/inference/utils/mkldnn_blocklist.py +36 -2
- paddlex/inference/utils/official_models.py +126 -5
- paddlex/inference/utils/pp_option.py +81 -21
- paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
- paddlex/ops/__init__.py +6 -3
- paddlex/utils/deps.py +2 -2
- paddlex/utils/device.py +4 -19
- paddlex/utils/download.py +10 -7
- paddlex/utils/flags.py +9 -0
- paddlex/utils/subclass_register.py +2 -2
- {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/METADATA +307 -162
- {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/RECORD +37 -35
- {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/WHEEL +1 -1
- {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/entry_points.txt +1 -0
- {paddlex-3.0.1.dist-info/licenses → paddlex-3.0.3.dist-info}/LICENSE +0 -0
- {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/top_level.txt +0 -0
@@ -16,9 +16,13 @@ from typing import List, Tuple
|
|
16
16
|
|
17
17
|
import numpy as np
|
18
18
|
|
19
|
-
from ..
|
19
|
+
from ..layout_objects import LayoutBlock, LayoutRegion
|
20
20
|
from ..setting import BLOCK_LABEL_MAP, XYCUT_SETTINGS
|
21
|
-
from ..utils import
|
21
|
+
from ..utils import (
|
22
|
+
calculate_overlap_ratio,
|
23
|
+
calculate_projection_overlap_ratio,
|
24
|
+
get_seg_flag,
|
25
|
+
)
|
22
26
|
|
23
27
|
|
24
28
|
def get_nearest_edge_distance(
|
@@ -264,8 +268,8 @@ def recursive_xy_cut(
|
|
264
268
|
|
265
269
|
|
266
270
|
def reference_insert(
|
267
|
-
block:
|
268
|
-
sorted_blocks: List[
|
271
|
+
block: LayoutBlock,
|
272
|
+
sorted_blocks: List[LayoutBlock],
|
269
273
|
**kwargs,
|
270
274
|
):
|
271
275
|
"""
|
@@ -294,8 +298,8 @@ def reference_insert(
|
|
294
298
|
|
295
299
|
|
296
300
|
def manhattan_insert(
|
297
|
-
block:
|
298
|
-
sorted_blocks: List[
|
301
|
+
block: LayoutBlock,
|
302
|
+
sorted_blocks: List[LayoutBlock],
|
299
303
|
**kwargs,
|
300
304
|
):
|
301
305
|
"""
|
@@ -322,10 +326,38 @@ def manhattan_insert(
|
|
322
326
|
return sorted_blocks
|
323
327
|
|
324
328
|
|
329
|
+
def euclidean_insert(
|
330
|
+
block: LayoutRegion,
|
331
|
+
sorted_blocks: List[LayoutRegion],
|
332
|
+
**kwargs,
|
333
|
+
):
|
334
|
+
"""
|
335
|
+
Insert a block into a sorted list of blocks based on the Euclidean distance between the block and the nearest sorted block.
|
336
|
+
|
337
|
+
Args:
|
338
|
+
block: The block to insert into the sorted blocks.
|
339
|
+
sorted_blocks: The sorted blocks where the new block will be inserted.
|
340
|
+
config: Configuration dictionary containing parameters related to the layout parsing.
|
341
|
+
median_width: Median width of the document. Defaults to 0.0.
|
342
|
+
|
343
|
+
Returns:
|
344
|
+
sorted_blocks: The updated sorted blocks after insertion.
|
345
|
+
"""
|
346
|
+
nearest_sorted_block_index = len(sorted_blocks)
|
347
|
+
block_euclidean_distance = block.euclidean_distance
|
348
|
+
for sorted_block_idx, sorted_block in enumerate(sorted_blocks):
|
349
|
+
distance = sorted_block.euclidean_distance
|
350
|
+
if distance > block_euclidean_distance:
|
351
|
+
nearest_sorted_block_index = sorted_block_idx
|
352
|
+
break
|
353
|
+
sorted_blocks.insert(nearest_sorted_block_index, block)
|
354
|
+
return sorted_blocks
|
355
|
+
|
356
|
+
|
325
357
|
def weighted_distance_insert(
|
326
|
-
block:
|
327
|
-
sorted_blocks: List[
|
328
|
-
region:
|
358
|
+
block: LayoutBlock,
|
359
|
+
sorted_blocks: List[LayoutBlock],
|
360
|
+
region: LayoutRegion,
|
329
361
|
):
|
330
362
|
"""
|
331
363
|
Insert a block into a sorted list of blocks based on the weighted distance between the block and the nearest sorted block.
|
@@ -398,18 +430,75 @@ def weighted_distance_insert(
|
|
398
430
|
if weighted_distance < min_weighted_distance:
|
399
431
|
nearest_sorted_block_index = sorted_block_idx
|
400
432
|
min_weighted_distance = weighted_distance
|
401
|
-
if y1
|
433
|
+
if abs(y1 // 2 - y1_prime // 2) > 0:
|
434
|
+
sorted_distance = y1_prime
|
435
|
+
block_distance = y1
|
436
|
+
else:
|
437
|
+
if region.direction == "horizontal":
|
438
|
+
if abs(x1 // 2 - x2 // 2) > 0:
|
439
|
+
sorted_distance = x1_prime
|
440
|
+
block_distance = x1
|
441
|
+
else:
|
442
|
+
# distance with (0,0)
|
443
|
+
sorted_block_center_x, sorted_block_center_y = (
|
444
|
+
sorted_block.get_centroid()
|
445
|
+
)
|
446
|
+
block_center_x, block_center_y = block.get_centroid()
|
447
|
+
sorted_distance = (
|
448
|
+
sorted_block_center_x**2 + sorted_block_center_y**2
|
449
|
+
)
|
450
|
+
block_distance = block_center_x**2 + block_center_y**2
|
451
|
+
else:
|
452
|
+
if abs(x1 - x2) > 0:
|
453
|
+
sorted_distance = -x2_prime
|
454
|
+
block_distance = -x2
|
455
|
+
else:
|
456
|
+
# distance with (max,0)
|
457
|
+
sorted_block_center_x, sorted_block_center_y = (
|
458
|
+
sorted_block.get_centroid()
|
459
|
+
)
|
460
|
+
block_center_x, block_center_y = block.get_centroid()
|
461
|
+
sorted_distance = (
|
462
|
+
sorted_block_center_x**2 + sorted_block_center_y**2
|
463
|
+
)
|
464
|
+
block_distance = block_center_x**2 + block_center_y**2
|
465
|
+
if block_distance > sorted_distance:
|
402
466
|
nearest_sorted_block_index = sorted_block_idx + 1
|
467
|
+
if (
|
468
|
+
sorted_block_idx < len(sorted_blocks) - 1
|
469
|
+
and block.label
|
470
|
+
in BLOCK_LABEL_MAP["vision_labels"]
|
471
|
+
+ BLOCK_LABEL_MAP["vision_title_labels"]
|
472
|
+
):
|
473
|
+
seg_start_flag, _ = get_seg_flag(
|
474
|
+
sorted_blocks[sorted_block_idx + 1],
|
475
|
+
sorted_blocks[sorted_block_idx],
|
476
|
+
)
|
477
|
+
if not seg_start_flag:
|
478
|
+
nearest_sorted_block_index += 1
|
479
|
+
else:
|
480
|
+
if (
|
481
|
+
sorted_block_idx > 0
|
482
|
+
and block.label
|
483
|
+
in BLOCK_LABEL_MAP["vision_labels"]
|
484
|
+
+ BLOCK_LABEL_MAP["vision_title_labels"]
|
485
|
+
):
|
486
|
+
seg_start_flag, _ = get_seg_flag(
|
487
|
+
sorted_blocks[sorted_block_idx],
|
488
|
+
sorted_blocks[sorted_block_idx - 1],
|
489
|
+
)
|
490
|
+
if not seg_start_flag:
|
491
|
+
nearest_sorted_block_index = sorted_block_idx - 1
|
403
492
|
|
404
493
|
sorted_blocks.insert(nearest_sorted_block_index, block)
|
405
494
|
return sorted_blocks
|
406
495
|
|
407
496
|
|
408
497
|
def insert_child_blocks(
|
409
|
-
block:
|
498
|
+
block: LayoutBlock,
|
410
499
|
block_idx: int,
|
411
|
-
sorted_blocks: List[
|
412
|
-
) -> List[
|
500
|
+
sorted_blocks: List[LayoutBlock],
|
501
|
+
) -> List[LayoutBlock]:
|
413
502
|
"""
|
414
503
|
Insert child blocks of a block into the sorted blocks list.
|
415
504
|
|
@@ -432,34 +521,37 @@ def insert_child_blocks(
|
|
432
521
|
return sorted_blocks
|
433
522
|
|
434
523
|
|
435
|
-
def sort_child_blocks(
|
524
|
+
def sort_child_blocks(
|
525
|
+
blocks: List[LayoutRegion], direction="horizontal"
|
526
|
+
) -> List[LayoutBlock]:
|
436
527
|
"""
|
437
528
|
Sort child blocks based on their bounding box coordinates.
|
438
529
|
|
439
530
|
Args:
|
440
|
-
blocks: A list of
|
531
|
+
blocks: A list of LayoutBlock objects representing the child blocks.
|
441
532
|
direction: direction of the blocks ('horizontal' or 'vertical'). Default is 'horizontal'.
|
442
533
|
Returns:
|
443
|
-
sorted_blocks: A sorted list of
|
534
|
+
sorted_blocks: A sorted list of LayoutBlock objects.
|
444
535
|
"""
|
445
|
-
if
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
536
|
+
if blocks[0].label != "region":
|
537
|
+
if direction == "horizontal":
|
538
|
+
blocks.sort(
|
539
|
+
key=lambda x: (
|
540
|
+
x.bbox[1],
|
541
|
+
x.bbox[0],
|
542
|
+
x.get_centroid()[0] ** 2 + x.get_centroid()[1] ** 2,
|
543
|
+
), # distance with (0,0)
|
544
|
+
)
|
545
|
+
else:
|
546
|
+
blocks.sort(
|
547
|
+
key=lambda x: (
|
548
|
+
-x.bbox[2],
|
549
|
+
x.bbox[1],
|
550
|
+
-x.get_centroid()[0] ** 2 + x.get_centroid()[1] ** 2,
|
551
|
+
), # distance with (max,0)
|
552
|
+
)
|
454
553
|
else:
|
455
|
-
|
456
|
-
blocks.sort(
|
457
|
-
key=lambda x: (
|
458
|
-
-x.bbox[0], # x_min
|
459
|
-
x.bbox[1], # y_min
|
460
|
-
x.bbox[1] ** 2 - x.bbox[0] ** 2, # distance with (max,0)
|
461
|
-
),
|
462
|
-
)
|
554
|
+
blocks.sort(key=lambda x: x.euclidean_distance)
|
463
555
|
return blocks
|
464
556
|
|
465
557
|
|
@@ -504,41 +596,34 @@ def _manhattan_distance(
|
|
504
596
|
return weight_x * abs(point1[0] - point2[0]) + weight_y * abs(point1[1] - point2[1])
|
505
597
|
|
506
598
|
|
507
|
-
def sort_normal_blocks(
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
x.bbox[1] // text_line_height,
|
512
|
-
x.bbox[0] // text_line_width,
|
513
|
-
x.bbox[1] ** 2 + x.bbox[0] ** 2,
|
514
|
-
),
|
515
|
-
)
|
516
|
-
else:
|
517
|
-
blocks.sort(
|
518
|
-
key=lambda x: (
|
519
|
-
-x.bbox[0] // text_line_width,
|
520
|
-
x.bbox[1] // text_line_height,
|
521
|
-
x.bbox[1] ** 2 - x.bbox[2] ** 2, # distance with (max,0)
|
522
|
-
),
|
523
|
-
)
|
524
|
-
return blocks
|
599
|
+
def sort_normal_blocks(
|
600
|
+
blocks, text_line_height, text_line_width, region_direction
|
601
|
+
) -> List[LayoutBlock]:
|
602
|
+
"""Sort blocks by their position within the page
|
525
603
|
|
604
|
+
Args:
|
605
|
+
blocks (List[LayoutBlock]): List of blocks to be sorted.
|
606
|
+
text_line_height (int): Height of each line of text.
|
607
|
+
text_line_width (int): Width of each line of text.
|
608
|
+
region_direction (str): Direction of the region, either "horizontal" or "vertical".
|
526
609
|
|
527
|
-
|
610
|
+
Returns:
|
611
|
+
List[LayoutBlock]: Sorted list of blocks.
|
612
|
+
"""
|
528
613
|
if region_direction == "horizontal":
|
529
614
|
blocks.sort(
|
530
615
|
key=lambda x: (
|
531
616
|
x.bbox[1] // text_line_height,
|
532
617
|
x.bbox[0] // text_line_width,
|
533
|
-
x.
|
618
|
+
x.get_centroid()[0] ** 2 + x.get_centroid()[1] ** 2,
|
534
619
|
),
|
535
620
|
)
|
536
621
|
else:
|
537
622
|
blocks.sort(
|
538
623
|
key=lambda x: (
|
539
|
-
-x.bbox[
|
624
|
+
-x.bbox[2] // text_line_width,
|
540
625
|
x.bbox[1] // text_line_height,
|
541
|
-
-
|
626
|
+
-x.get_centroid()[0] ** 2 + x.get_centroid()[1] ** 2,
|
542
627
|
),
|
543
628
|
)
|
544
629
|
return blocks
|
@@ -585,45 +670,54 @@ def get_cut_blocks(blocks, cut_direction, cut_coordinates, mask_labels=[]):
|
|
585
670
|
return cuted_list
|
586
671
|
|
587
672
|
|
588
|
-
def
|
589
|
-
blocks: List[
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
673
|
+
def get_blocks_by_direction_interval(
|
674
|
+
blocks: List[LayoutBlock],
|
675
|
+
start_index: int,
|
676
|
+
end_index: int,
|
677
|
+
direction: str = "horizontal",
|
678
|
+
) -> List[LayoutBlock]:
|
679
|
+
"""
|
680
|
+
Get blocks within a specified direction interval.
|
681
|
+
|
682
|
+
Args:
|
683
|
+
blocks (List[LayoutBlock]): A list of blocks.
|
684
|
+
start_index (int): The starting index of the direction.
|
685
|
+
end_index (int): The ending index of the direction.
|
686
|
+
direction (str, optional): The direction to consider. Defaults to "horizontal".
|
687
|
+
|
688
|
+
Returns:
|
689
|
+
List[LayoutBlock]: A list of blocks within the specified direction interval.
|
690
|
+
"""
|
691
|
+
interval_blocks = []
|
692
|
+
aixis = 0 if direction == "horizontal" else 1
|
693
|
+
blocks.sort(key=lambda x: x.bbox[aixis + 2])
|
694
|
+
|
695
|
+
for block in blocks:
|
696
|
+
if block.bbox[aixis] >= start_index and block.bbox[aixis + 2] <= end_index:
|
697
|
+
interval_blocks.append(block)
|
698
|
+
|
699
|
+
return interval_blocks
|
606
700
|
|
607
701
|
|
608
702
|
def get_nearest_blocks(
|
609
|
-
block:
|
610
|
-
ref_blocks: List[
|
703
|
+
block: LayoutBlock,
|
704
|
+
ref_blocks: List[LayoutBlock],
|
611
705
|
overlap_threshold,
|
612
706
|
direction="horizontal",
|
613
707
|
) -> List:
|
614
708
|
"""
|
615
709
|
Get the adjacent blocks with the same direction as the current block.
|
616
710
|
Args:
|
617
|
-
block (
|
618
|
-
blocks (List[
|
711
|
+
block (LayoutBlock): The current block.
|
712
|
+
blocks (List[LayoutBlock]): A list of all blocks.
|
619
713
|
ref_block_idxes (List[int]): A list of indices of reference blocks.
|
620
714
|
iou_threshold (float): The IOU threshold to determine if two blocks are considered adjacent.
|
621
715
|
Returns:
|
622
716
|
Int: The index of the previous block with same direction.
|
623
717
|
Int: The index of the following block with same direction.
|
624
718
|
"""
|
625
|
-
prev_blocks: List[
|
626
|
-
post_blocks: List[
|
719
|
+
prev_blocks: List[LayoutBlock] = []
|
720
|
+
post_blocks: List[LayoutBlock] = []
|
627
721
|
sort_index = 1 if direction == "horizontal" else 0
|
628
722
|
for ref_block in ref_blocks:
|
629
723
|
if ref_block.index == block.index:
|
@@ -645,110 +739,9 @@ def get_nearest_blocks(
|
|
645
739
|
return prev_blocks, post_blocks
|
646
740
|
|
647
741
|
|
648
|
-
def get_adjacent_blocks_by_direction(
|
649
|
-
blocks: List[LayoutParsingBlock],
|
650
|
-
block_idx: int,
|
651
|
-
ref_block_idxes: List[int],
|
652
|
-
iou_threshold,
|
653
|
-
) -> List:
|
654
|
-
"""
|
655
|
-
Get the adjacent blocks with the same direction as the current block.
|
656
|
-
Args:
|
657
|
-
block (LayoutParsingBlock): The current block.
|
658
|
-
blocks (List[LayoutParsingBlock]): A list of all blocks.
|
659
|
-
ref_block_idxes (List[int]): A list of indices of reference blocks.
|
660
|
-
iou_threshold (float): The IOU threshold to determine if two blocks are considered adjacent.
|
661
|
-
Returns:
|
662
|
-
Int: The index of the previous block with same direction.
|
663
|
-
Int: The index of the following block with same direction.
|
664
|
-
"""
|
665
|
-
min_prev_block_distance = float("inf")
|
666
|
-
prev_block_index = None
|
667
|
-
min_post_block_distance = float("inf")
|
668
|
-
post_block_index = None
|
669
|
-
block = blocks[block_idx]
|
670
|
-
child_labels = [
|
671
|
-
"vision_footnote",
|
672
|
-
"sub_paragraph_title",
|
673
|
-
"doc_title_text",
|
674
|
-
"vision_title",
|
675
|
-
]
|
676
|
-
|
677
|
-
# find the nearest text block with same direction to the current block
|
678
|
-
for ref_block_idx in ref_block_idxes:
|
679
|
-
ref_block = blocks[ref_block_idx]
|
680
|
-
ref_block_direction = ref_block.direction
|
681
|
-
if ref_block.order_label in child_labels:
|
682
|
-
continue
|
683
|
-
match_block_iou = calculate_projection_overlap_ratio(
|
684
|
-
block.bbox,
|
685
|
-
ref_block.bbox,
|
686
|
-
ref_block_direction,
|
687
|
-
)
|
688
|
-
|
689
|
-
child_match_distance_tolerance_len = block.short_side_length / 10
|
690
|
-
|
691
|
-
if block.order_label == "vision":
|
692
|
-
if ref_block.num_of_lines == 1:
|
693
|
-
gap_tolerance_len = ref_block.short_side_length * 2
|
694
|
-
else:
|
695
|
-
gap_tolerance_len = block.short_side_length / 10
|
696
|
-
else:
|
697
|
-
gap_tolerance_len = block.short_side_length * 2
|
698
|
-
|
699
|
-
if match_block_iou >= iou_threshold:
|
700
|
-
prev_distance = (
|
701
|
-
block.secondary_direction_start_coordinate
|
702
|
-
- ref_block.secondary_direction_end_coordinate
|
703
|
-
+ child_match_distance_tolerance_len
|
704
|
-
) // 5 + ref_block.start_coordinate / 5000
|
705
|
-
next_distance = (
|
706
|
-
ref_block.secondary_direction_start_coordinate
|
707
|
-
- block.secondary_direction_end_coordinate
|
708
|
-
+ child_match_distance_tolerance_len
|
709
|
-
) // 5 + ref_block.start_coordinate / 5000
|
710
|
-
if (
|
711
|
-
ref_block.secondary_direction_end_coordinate
|
712
|
-
<= block.secondary_direction_start_coordinate
|
713
|
-
+ child_match_distance_tolerance_len
|
714
|
-
and prev_distance < min_prev_block_distance
|
715
|
-
):
|
716
|
-
min_prev_block_distance = prev_distance
|
717
|
-
if (
|
718
|
-
block.secondary_direction_start_coordinate
|
719
|
-
- ref_block.secondary_direction_end_coordinate
|
720
|
-
< gap_tolerance_len
|
721
|
-
):
|
722
|
-
prev_block_index = ref_block_idx
|
723
|
-
elif (
|
724
|
-
ref_block.secondary_direction_start_coordinate
|
725
|
-
> block.secondary_direction_end_coordinate
|
726
|
-
- child_match_distance_tolerance_len
|
727
|
-
and next_distance < min_post_block_distance
|
728
|
-
):
|
729
|
-
min_post_block_distance = next_distance
|
730
|
-
if (
|
731
|
-
ref_block.secondary_direction_start_coordinate
|
732
|
-
- block.secondary_direction_end_coordinate
|
733
|
-
< gap_tolerance_len
|
734
|
-
):
|
735
|
-
post_block_index = ref_block_idx
|
736
|
-
|
737
|
-
diff_dist = abs(min_prev_block_distance - min_post_block_distance)
|
738
|
-
|
739
|
-
# if the difference in distance is too large, only consider the nearest one
|
740
|
-
if diff_dist * 5 > block.short_side_length:
|
741
|
-
if min_prev_block_distance < min_post_block_distance:
|
742
|
-
post_block_index = None
|
743
|
-
else:
|
744
|
-
prev_block_index = None
|
745
|
-
|
746
|
-
return prev_block_index, post_block_index
|
747
|
-
|
748
|
-
|
749
742
|
def update_doc_title_child_blocks(
|
750
|
-
block:
|
751
|
-
region:
|
743
|
+
block: LayoutBlock,
|
744
|
+
region: LayoutRegion,
|
752
745
|
) -> None:
|
753
746
|
"""
|
754
747
|
Update the child blocks of a document title block.
|
@@ -762,8 +755,8 @@ def update_doc_title_child_blocks(
|
|
762
755
|
6. The nearest edge distance should be less than 2 times of the text line height.
|
763
756
|
|
764
757
|
Args:
|
765
|
-
blocks (List[
|
766
|
-
block (
|
758
|
+
blocks (List[LayoutBlock]): overall blocks.
|
759
|
+
block (LayoutBlock): document title block.
|
767
760
|
prev_idx (int): previous block index, None if not exist.
|
768
761
|
post_idx (int): post block index, None if not exist.
|
769
762
|
config (dict): configurations.
|
@@ -813,10 +806,24 @@ def update_doc_title_child_blocks(
|
|
813
806
|
block.append_child_block(ref_block)
|
814
807
|
region.normal_text_block_idxes.remove(ref_block.index)
|
815
808
|
|
809
|
+
for ref_block in ref_blocks:
|
810
|
+
if ref_block.order_label == "doc_title_text":
|
811
|
+
continue
|
812
|
+
with_seem_direction = ref_block.direction == block.direction
|
813
|
+
|
814
|
+
overlap_ratio = calculate_overlap_ratio(
|
815
|
+
block.bbox, ref_block.bbox, mode="small"
|
816
|
+
)
|
817
|
+
|
818
|
+
if overlap_ratio > 0.9 and with_seem_direction:
|
819
|
+
ref_block.order_label = "doc_title_text"
|
820
|
+
block.append_child_block(ref_block)
|
821
|
+
region.normal_text_block_idxes.remove(ref_block.index)
|
822
|
+
|
816
823
|
|
817
824
|
def update_paragraph_title_child_blocks(
|
818
|
-
block:
|
819
|
-
region:
|
825
|
+
block: LayoutBlock,
|
826
|
+
region: LayoutRegion,
|
820
827
|
) -> None:
|
821
828
|
"""
|
822
829
|
Update the child blocks of a paragraph title block.
|
@@ -827,8 +834,8 @@ def update_paragraph_title_child_blocks(
|
|
827
834
|
3. The child block must be paragraph title block.
|
828
835
|
|
829
836
|
Args:
|
830
|
-
blocks (List[
|
831
|
-
block (
|
837
|
+
blocks (List[LayoutBlock]): overall blocks.
|
838
|
+
block (LayoutBlock): document title block.
|
832
839
|
prev_idx (int): previous block index, None if not exist.
|
833
840
|
post_idx (int): post block index, None if not exist.
|
834
841
|
config (dict): configurations.
|
@@ -858,8 +865,13 @@ def update_paragraph_title_child_blocks(
|
|
858
865
|
block.bbox, ref_block.bbox
|
859
866
|
)
|
860
867
|
with_seem_direction = ref_block.direction == block.direction
|
868
|
+
with_seem_start = (
|
869
|
+
abs(ref_block.start_coordinate - block.start_coordinate)
|
870
|
+
< min_text_line_height * 2
|
871
|
+
)
|
861
872
|
if (
|
862
873
|
with_seem_direction
|
874
|
+
and with_seem_start
|
863
875
|
and nearest_edge_distance <= min_text_line_height * 1.5
|
864
876
|
):
|
865
877
|
ref_block.order_label = "sub_paragraph_title"
|
@@ -868,8 +880,8 @@ def update_paragraph_title_child_blocks(
|
|
868
880
|
|
869
881
|
|
870
882
|
def update_vision_child_blocks(
|
871
|
-
block:
|
872
|
-
region:
|
883
|
+
block: LayoutBlock,
|
884
|
+
region: LayoutRegion,
|
873
885
|
) -> None:
|
874
886
|
"""
|
875
887
|
Update the child blocks of a paragraph title block.
|
@@ -887,8 +899,8 @@ def update_vision_child_blocks(
|
|
887
899
|
4. The difference between their centers is very small.
|
888
900
|
|
889
901
|
Args:
|
890
|
-
blocks (List[
|
891
|
-
block (
|
902
|
+
blocks (List[LayoutBlock]): overall blocks.
|
903
|
+
block (LayoutBlock): document title block.
|
892
904
|
ref_block_idxes (List[int]): A list of indices of reference blocks.
|
893
905
|
prev_idx (int): previous block index, None if not exist.
|
894
906
|
post_idx (int): post block index, None if not exist.
|
@@ -934,11 +946,11 @@ def update_vision_child_blocks(
|
|
934
946
|
not has_vision_footnote
|
935
947
|
and ref_block.direction == block.direction
|
936
948
|
and ref_block.long_side_length < block.long_side_length
|
949
|
+
and nearest_edge_distance <= ref_block.text_line_height * 2
|
937
950
|
):
|
938
951
|
if (
|
939
952
|
(
|
940
|
-
|
941
|
-
and ref_block.short_side_length < block.short_side_length
|
953
|
+
ref_block.short_side_length < block.short_side_length
|
942
954
|
and ref_block.long_side_length
|
943
955
|
< 0.5 * block.long_side_length
|
944
956
|
and abs(block_center[0] - ref_block_center[0]) < 10
|
@@ -979,12 +991,17 @@ def update_vision_child_blocks(
|
|
979
991
|
if ref_block.label in BLOCK_LABEL_MAP["text_labels"]:
|
980
992
|
if (
|
981
993
|
not has_vision_footnote
|
982
|
-
and nearest_edge_distance <= block.text_line_height * 2
|
983
|
-
and ref_block.short_side_length < block.short_side_length
|
984
|
-
and ref_block.long_side_length < 0.5 * block.long_side_length
|
985
994
|
and ref_block.direction == block.direction
|
986
|
-
and
|
987
|
-
|
995
|
+
and ref_block.long_side_length < block.long_side_length
|
996
|
+
and nearest_edge_distance <= ref_block.text_line_height * 2
|
997
|
+
):
|
998
|
+
if (
|
999
|
+
(
|
1000
|
+
ref_block.short_side_length < block.short_side_length
|
1001
|
+
and ref_block.long_side_length
|
1002
|
+
< 0.5 * block.long_side_length
|
1003
|
+
and abs(block_center[0] - ref_block_center[0]) < 10
|
1004
|
+
)
|
988
1005
|
or (
|
989
1006
|
block.bbox[0] - ref_block.bbox[0] < 10
|
990
1007
|
and ref_block.num_of_lines == 1
|
@@ -993,16 +1010,56 @@ def update_vision_child_blocks(
|
|
993
1010
|
block.bbox[2] - ref_block.bbox[2] < 10
|
994
1011
|
and ref_block.num_of_lines == 1
|
995
1012
|
)
|
996
|
-
)
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1013
|
+
):
|
1014
|
+
has_vision_footnote = True
|
1015
|
+
ref_block.label = "vision_footnote"
|
1016
|
+
ref_block.order_label = "vision_footnote"
|
1017
|
+
block.append_child_block(ref_block)
|
1018
|
+
region.normal_text_block_idxes.remove(ref_block.index)
|
1002
1019
|
break
|
1003
1020
|
if has_vision_title:
|
1004
1021
|
break
|
1005
1022
|
|
1023
|
+
for ref_block in ref_blocks:
|
1024
|
+
if ref_block.index not in region.normal_text_block_idxes:
|
1025
|
+
continue
|
1026
|
+
|
1027
|
+
overlap_ratio = calculate_overlap_ratio(
|
1028
|
+
block.bbox, ref_block.bbox, mode="small"
|
1029
|
+
)
|
1030
|
+
|
1031
|
+
if overlap_ratio > 0.9:
|
1032
|
+
ref_block.label = "vision_footnote"
|
1033
|
+
ref_block.order_label = "vision_footnote"
|
1034
|
+
block.append_child_block(ref_block)
|
1035
|
+
region.normal_text_block_idxes.remove(ref_block.index)
|
1036
|
+
|
1037
|
+
|
1038
|
+
def update_region_child_blocks(
|
1039
|
+
block: LayoutBlock,
|
1040
|
+
region: LayoutRegion,
|
1041
|
+
) -> None:
|
1042
|
+
"""Update child blocks of a region.
|
1043
|
+
|
1044
|
+
Args:
|
1045
|
+
block (LayoutBlock): document title block.
|
1046
|
+
region (LayoutRegion): layout region.
|
1047
|
+
|
1048
|
+
Returns:
|
1049
|
+
None
|
1050
|
+
"""
|
1051
|
+
for ref_block in region.block_map.values():
|
1052
|
+
if block.index != ref_block.index:
|
1053
|
+
bbox_iou = calculate_overlap_ratio(block.bbox, ref_block.bbox)
|
1054
|
+
if (
|
1055
|
+
bbox_iou > 0
|
1056
|
+
and block.area > ref_block.area
|
1057
|
+
and ref_block.order_label != "sub_region"
|
1058
|
+
):
|
1059
|
+
ref_block.order_label = "sub_region"
|
1060
|
+
block.append_child_block(ref_block)
|
1061
|
+
region.normal_text_block_idxes.remove(ref_block.index)
|
1062
|
+
|
1006
1063
|
|
1007
1064
|
def calculate_discontinuous_projection(
|
1008
1065
|
boxes, direction="horizontal", return_num=False
|
@@ -1049,44 +1106,6 @@ def calculate_discontinuous_projection(
|
|
1049
1106
|
return merged_intervals
|
1050
1107
|
|
1051
1108
|
|
1052
|
-
def is_projection_consistent(blocks, intervals, direction="horizontal"):
|
1053
|
-
|
1054
|
-
for interval in intervals:
|
1055
|
-
if direction == "horizontal":
|
1056
|
-
start_index, stop_index = 0, 2
|
1057
|
-
interval_box = [interval[0], 0, interval[1], 1]
|
1058
|
-
else:
|
1059
|
-
start_index, stop_index = 1, 3
|
1060
|
-
interval_box = [0, interval[0], 1, interval[1]]
|
1061
|
-
same_interval_bboxes = []
|
1062
|
-
for block in blocks:
|
1063
|
-
overlap_ratio = calculate_projection_overlap_ratio(
|
1064
|
-
interval_box, block.bbox, direction=direction
|
1065
|
-
)
|
1066
|
-
if overlap_ratio > 0 and block.label in BLOCK_LABEL_MAP["text_labels"]:
|
1067
|
-
same_interval_bboxes.append(block.bbox)
|
1068
|
-
start_coordinates = [bbox[start_index] for bbox in same_interval_bboxes]
|
1069
|
-
if start_coordinates:
|
1070
|
-
min_start_coordinate = min(start_coordinates)
|
1071
|
-
max_start_coordinate = max(start_coordinates)
|
1072
|
-
is_start_consistent = (
|
1073
|
-
False
|
1074
|
-
if max_start_coordinate - min_start_coordinate
|
1075
|
-
>= abs(interval[0] - interval[1]) * 0.05
|
1076
|
-
else True
|
1077
|
-
)
|
1078
|
-
stop_coordinates = [bbox[stop_index] for bbox in same_interval_bboxes]
|
1079
|
-
min_stop_coordinate = min(stop_coordinates)
|
1080
|
-
max_stop_coordinate = max(stop_coordinates)
|
1081
|
-
if (
|
1082
|
-
max_stop_coordinate - min_stop_coordinate
|
1083
|
-
>= abs(interval[0] - interval[1]) * 0.05
|
1084
|
-
and is_start_consistent
|
1085
|
-
):
|
1086
|
-
return False
|
1087
|
-
return True
|
1088
|
-
|
1089
|
-
|
1090
1109
|
def shrink_overlapping_boxes(
|
1091
1110
|
boxes, direction="horizontal", min_threshold=0, max_threshold=0.1
|
1092
1111
|
) -> List:
|
@@ -1125,8 +1144,12 @@ def shrink_overlapping_boxes(
|
|
1125
1144
|
split_y = int((overlap_y_min + overlap_y_max) / 2)
|
1126
1145
|
overlap_y_min = split_y - 1
|
1127
1146
|
overlap_y_max = split_y + 1
|
1128
|
-
|
1129
|
-
|
1147
|
+
if y1 < y1_prime:
|
1148
|
+
current_block.bbox = [x1, y1, x2, overlap_y_min]
|
1149
|
+
block.bbox = [x1_prime, overlap_y_max, x2_prime, y2_prime]
|
1150
|
+
else:
|
1151
|
+
current_block.bbox = [x1, overlap_y_min, x2, y2]
|
1152
|
+
block.bbox = [x1_prime, y1_prime, x2_prime, overlap_y_max]
|
1130
1153
|
else:
|
1131
1154
|
if (
|
1132
1155
|
(match_iou > 0 and cut_iou > min_threshold and cut_iou < max_threshold)
|
@@ -1138,7 +1161,39 @@ def shrink_overlapping_boxes(
|
|
1138
1161
|
split_x = int((overlap_x_min + overlap_x_max) / 2)
|
1139
1162
|
overlap_x_min = split_x - 1
|
1140
1163
|
overlap_x_max = split_x + 1
|
1141
|
-
|
1142
|
-
|
1164
|
+
if x1 < x1_prime:
|
1165
|
+
current_block.bbox = [x1, y1, overlap_x_min, y2]
|
1166
|
+
block.bbox = [overlap_x_max, y1_prime, x2_prime, y2_prime]
|
1167
|
+
else:
|
1168
|
+
current_block.bbox = [overlap_x_min, y1, x2, y2]
|
1169
|
+
block.bbox = [x1_prime, y1_prime, overlap_x_max, y2_prime]
|
1143
1170
|
current_block = block
|
1144
1171
|
return boxes
|
1172
|
+
|
1173
|
+
|
1174
|
+
def find_local_minima_flat_regions(arr) -> List:
|
1175
|
+
"""
|
1176
|
+
Find all local minima regions in a flat array.
|
1177
|
+
|
1178
|
+
Args:
|
1179
|
+
arr (list): The input array.
|
1180
|
+
|
1181
|
+
Returns:
|
1182
|
+
list: A list of tuples containing the indices of the local minima regions.
|
1183
|
+
"""
|
1184
|
+
n = len(arr)
|
1185
|
+
if n == 0:
|
1186
|
+
return []
|
1187
|
+
|
1188
|
+
flat_minima_regions = []
|
1189
|
+
start = 0
|
1190
|
+
|
1191
|
+
for i in range(1, n):
|
1192
|
+
if arr[i] != arr[i - 1]:
|
1193
|
+
if (start == 0 or arr[start - 1] > arr[start]) and (
|
1194
|
+
i == n or arr[i] > arr[start]
|
1195
|
+
):
|
1196
|
+
flat_minima_regions.append((start, i - 1))
|
1197
|
+
start = i
|
1198
|
+
|
1199
|
+
return flat_minima_regions[1:] if len(flat_minima_regions) > 1 else None
|