paddlex 3.0.1__py3-none-any.whl → 3.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paddlex/.version +1 -1
- paddlex/inference/models/base/predictor/base_predictor.py +2 -0
- paddlex/inference/models/common/static_infer.py +20 -14
- paddlex/inference/models/common/ts/funcs.py +19 -8
- paddlex/inference/models/formula_recognition/predictor.py +1 -1
- paddlex/inference/models/formula_recognition/processors.py +2 -2
- paddlex/inference/models/text_recognition/result.py +1 -1
- paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
- paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +144 -205
- paddlex/inference/pipelines/layout_parsing/result_v2.py +13 -272
- paddlex/inference/pipelines/layout_parsing/setting.py +1 -0
- paddlex/inference/pipelines/layout_parsing/utils.py +108 -312
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +302 -247
- paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +156 -104
- paddlex/inference/pipelines/ocr/result.py +2 -2
- paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +1 -1
- paddlex/inference/serving/basic_serving/_app.py +47 -13
- paddlex/inference/serving/infra/utils.py +22 -17
- paddlex/inference/utils/hpi.py +60 -25
- paddlex/inference/utils/hpi_model_info_collection.json +627 -204
- paddlex/inference/utils/misc.py +20 -0
- paddlex/inference/utils/mkldnn_blocklist.py +36 -2
- paddlex/inference/utils/official_models.py +126 -5
- paddlex/inference/utils/pp_option.py +81 -21
- paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
- paddlex/ops/__init__.py +6 -3
- paddlex/utils/deps.py +2 -2
- paddlex/utils/device.py +4 -19
- paddlex/utils/download.py +10 -7
- paddlex/utils/flags.py +9 -0
- paddlex/utils/subclass_register.py +2 -2
- {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/METADATA +307 -162
- {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/RECORD +37 -35
- {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/WHEEL +1 -1
- {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/entry_points.txt +1 -0
- {paddlex-3.0.1.dist-info/licenses → paddlex-3.0.3.dist-info}/LICENSE +0 -0
- {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,6 @@
|
|
14
14
|
from __future__ import annotations
|
15
15
|
|
16
16
|
import copy
|
17
|
-
import math
|
18
17
|
import re
|
19
18
|
from functools import partial
|
20
19
|
from typing import List
|
@@ -30,7 +29,8 @@ from ...common.result import (
|
|
30
29
|
MarkdownMixin,
|
31
30
|
XlsxMixin,
|
32
31
|
)
|
33
|
-
from .
|
32
|
+
from .layout_objects import LayoutBlock
|
33
|
+
from .utils import get_seg_flag
|
34
34
|
|
35
35
|
|
36
36
|
def compile_title_pattern():
|
@@ -140,58 +140,6 @@ def format_first_line_func(block, templates, format_func, spliter):
|
|
140
140
|
return spliter.join(lines)
|
141
141
|
|
142
142
|
|
143
|
-
def get_seg_flag(block: LayoutParsingBlock, prev_block: LayoutParsingBlock):
|
144
|
-
|
145
|
-
seg_start_flag = True
|
146
|
-
seg_end_flag = True
|
147
|
-
|
148
|
-
block_box = block.bbox
|
149
|
-
context_left_coordinate = block_box[0]
|
150
|
-
context_right_coordinate = block_box[2]
|
151
|
-
seg_start_coordinate = block.seg_start_coordinate
|
152
|
-
seg_end_coordinate = block.seg_end_coordinate
|
153
|
-
|
154
|
-
if prev_block is not None:
|
155
|
-
prev_block_bbox = prev_block.bbox
|
156
|
-
num_of_prev_lines = prev_block.num_of_lines
|
157
|
-
pre_block_seg_end_coordinate = prev_block.seg_end_coordinate
|
158
|
-
prev_end_space_small = (
|
159
|
-
abs(prev_block_bbox[2] - pre_block_seg_end_coordinate) < 10
|
160
|
-
)
|
161
|
-
prev_lines_more_than_one = num_of_prev_lines > 1
|
162
|
-
|
163
|
-
overlap_blocks = context_left_coordinate < prev_block_bbox[2]
|
164
|
-
|
165
|
-
# update context_left_coordinate and context_right_coordinate
|
166
|
-
if overlap_blocks:
|
167
|
-
context_left_coordinate = min(prev_block_bbox[0], context_left_coordinate)
|
168
|
-
context_right_coordinate = max(prev_block_bbox[2], context_right_coordinate)
|
169
|
-
prev_end_space_small = (
|
170
|
-
abs(context_right_coordinate - pre_block_seg_end_coordinate) < 10
|
171
|
-
)
|
172
|
-
edge_distance = 0
|
173
|
-
else:
|
174
|
-
edge_distance = abs(block_box[0] - prev_block_bbox[2])
|
175
|
-
|
176
|
-
current_start_space_small = seg_start_coordinate - context_left_coordinate < 10
|
177
|
-
|
178
|
-
if (
|
179
|
-
prev_end_space_small
|
180
|
-
and current_start_space_small
|
181
|
-
and prev_lines_more_than_one
|
182
|
-
and edge_distance < max(prev_block.width, block.width)
|
183
|
-
):
|
184
|
-
seg_start_flag = False
|
185
|
-
else:
|
186
|
-
if seg_start_coordinate - context_left_coordinate < 10:
|
187
|
-
seg_start_flag = False
|
188
|
-
|
189
|
-
if context_right_coordinate - seg_end_coordinate < 10:
|
190
|
-
seg_end_flag = False
|
191
|
-
|
192
|
-
return seg_start_flag, seg_end_flag
|
193
|
-
|
194
|
-
|
195
143
|
class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
196
144
|
"""Layout Parsing Result V2"""
|
197
145
|
|
@@ -247,7 +195,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
247
195
|
draw = ImageDraw.Draw(image, "RGBA")
|
248
196
|
font_size = int(0.018 * int(image.width)) + 2
|
249
197
|
font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
|
250
|
-
parsing_result: List[
|
198
|
+
parsing_result: List[LayoutBlock] = self["parsing_res_list"]
|
251
199
|
for block in parsing_result:
|
252
200
|
bbox = block.bbox
|
253
201
|
index = block.order_index
|
@@ -456,6 +404,9 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
456
404
|
"table_title": format_text_func,
|
457
405
|
"figure_title": format_text_func,
|
458
406
|
"chart_title": format_text_func,
|
407
|
+
"vision_footnote": lambda block: block.content.replace(
|
408
|
+
"\n\n", "\n"
|
409
|
+
).replace("\n", "\n\n"),
|
459
410
|
"text": lambda block: block.content.replace("\n\n", "\n").replace(
|
460
411
|
"\n", "\n\n"
|
461
412
|
),
|
@@ -484,8 +435,8 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
484
435
|
|
485
436
|
markdown_content = ""
|
486
437
|
last_label = None
|
487
|
-
seg_start_flag =
|
488
|
-
seg_end_flag =
|
438
|
+
seg_start_flag = True
|
439
|
+
seg_end_flag = True
|
489
440
|
prev_block = None
|
490
441
|
page_first_element_seg_start_flag = None
|
491
442
|
page_last_element_seg_end_flag = None
|
@@ -517,6 +468,11 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
517
468
|
else handle_func(block)
|
518
469
|
)
|
519
470
|
last_label = label
|
471
|
+
page_first_element_seg_start_flag = (
|
472
|
+
True
|
473
|
+
if page_first_element_seg_start_flag is None
|
474
|
+
else page_first_element_seg_start_flag
|
475
|
+
)
|
520
476
|
page_last_element_seg_end_flag = seg_end_flag
|
521
477
|
|
522
478
|
markdown_info["markdown_texts"] = markdown_content
|
@@ -528,218 +484,3 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
|
|
528
484
|
markdown_info["markdown_images"][img["path"]] = img["img"]
|
529
485
|
|
530
486
|
return markdown_info
|
531
|
-
|
532
|
-
|
533
|
-
class LayoutParsingBlock:
|
534
|
-
|
535
|
-
def __init__(self, label, bbox, content="") -> None:
|
536
|
-
self.label = label
|
537
|
-
self.order_label = None
|
538
|
-
self.bbox = list(map(int, bbox))
|
539
|
-
self.content = content
|
540
|
-
self.seg_start_coordinate = float("inf")
|
541
|
-
self.seg_end_coordinate = float("-inf")
|
542
|
-
self.width = bbox[2] - bbox[0]
|
543
|
-
self.height = bbox[3] - bbox[1]
|
544
|
-
self.area = self.width * self.height
|
545
|
-
self.num_of_lines = 1
|
546
|
-
self.image = None
|
547
|
-
self.index = None
|
548
|
-
self.order_index = None
|
549
|
-
self.text_line_width = 1
|
550
|
-
self.text_line_height = 1
|
551
|
-
self.direction = self.get_bbox_direction()
|
552
|
-
self.child_blocks = []
|
553
|
-
self.update_direction_info()
|
554
|
-
|
555
|
-
def __str__(self) -> str:
|
556
|
-
return f"{self.__dict__}"
|
557
|
-
|
558
|
-
def __repr__(self) -> str:
|
559
|
-
_str = f"\n\n#################\nindex:\t{self.index}\nlabel:\t{self.label}\nregion_label:\t{self.order_label}\nbbox:\t{self.bbox}\ncontent:\t{self.content}\n#################"
|
560
|
-
return _str
|
561
|
-
|
562
|
-
def to_dict(self) -> dict:
|
563
|
-
return self.__dict__
|
564
|
-
|
565
|
-
def update_direction_info(self) -> None:
|
566
|
-
if self.direction == "horizontal":
|
567
|
-
self.secondary_direction = "vertical"
|
568
|
-
self.short_side_length = self.height
|
569
|
-
self.long_side_length = self.width
|
570
|
-
self.start_coordinate = self.bbox[0]
|
571
|
-
self.end_coordinate = self.bbox[2]
|
572
|
-
self.secondary_direction_start_coordinate = self.bbox[1]
|
573
|
-
self.secondary_direction_end_coordinate = self.bbox[3]
|
574
|
-
else:
|
575
|
-
self.secondary_direction = "horizontal"
|
576
|
-
self.short_side_length = self.width
|
577
|
-
self.long_side_length = self.height
|
578
|
-
self.start_coordinate = self.bbox[1]
|
579
|
-
self.end_coordinate = self.bbox[3]
|
580
|
-
self.secondary_direction_start_coordinate = self.bbox[0]
|
581
|
-
self.secondary_direction_end_coordinate = self.bbox[2]
|
582
|
-
|
583
|
-
def append_child_block(self, child_block: LayoutParsingBlock) -> None:
|
584
|
-
if not self.child_blocks:
|
585
|
-
self.ori_bbox = self.bbox.copy()
|
586
|
-
x1, y1, x2, y2 = self.bbox
|
587
|
-
x1_child, y1_child, x2_child, y2_child = child_block.bbox
|
588
|
-
union_bbox = (
|
589
|
-
min(x1, x1_child),
|
590
|
-
min(y1, y1_child),
|
591
|
-
max(x2, x2_child),
|
592
|
-
max(y2, y2_child),
|
593
|
-
)
|
594
|
-
self.bbox = union_bbox
|
595
|
-
self.update_direction_info()
|
596
|
-
child_blocks = [child_block]
|
597
|
-
if child_block.child_blocks:
|
598
|
-
child_blocks.extend(child_block.get_child_blocks())
|
599
|
-
self.child_blocks.extend(child_blocks)
|
600
|
-
|
601
|
-
def get_child_blocks(self) -> list:
|
602
|
-
self.bbox = self.ori_bbox
|
603
|
-
child_blocks = self.child_blocks.copy()
|
604
|
-
self.child_blocks = []
|
605
|
-
return child_blocks
|
606
|
-
|
607
|
-
def get_centroid(self) -> tuple:
|
608
|
-
x1, y1, x2, y2 = self.bbox
|
609
|
-
centroid = ((x1 + x2) / 2, (y1 + y2) / 2)
|
610
|
-
return centroid
|
611
|
-
|
612
|
-
def get_bbox_direction(self, direction_ratio: float = 1.0) -> bool:
|
613
|
-
"""
|
614
|
-
Determine if a bounding box is horizontal or vertical.
|
615
|
-
|
616
|
-
Args:
|
617
|
-
bbox (List[float]): Bounding box [x_min, y_min, x_max, y_max].
|
618
|
-
direction_ratio (float): Ratio for determining direction. Default is 1.0.
|
619
|
-
|
620
|
-
Returns:
|
621
|
-
str: "horizontal" or "vertical".
|
622
|
-
"""
|
623
|
-
return (
|
624
|
-
"horizontal" if self.width * direction_ratio >= self.height else "vertical"
|
625
|
-
)
|
626
|
-
|
627
|
-
|
628
|
-
class LayoutParsingRegion:
|
629
|
-
|
630
|
-
def __init__(
|
631
|
-
self, bbox, blocks: List[LayoutParsingBlock] = [], image_shape=None
|
632
|
-
) -> None:
|
633
|
-
self.bbox = bbox
|
634
|
-
self.block_map = {}
|
635
|
-
self.direction = "horizontal"
|
636
|
-
self.calculate_bbox_metrics(image_shape)
|
637
|
-
self.doc_title_block_idxes = []
|
638
|
-
self.paragraph_title_block_idxes = []
|
639
|
-
self.vision_block_idxes = []
|
640
|
-
self.unordered_block_idxes = []
|
641
|
-
self.vision_title_block_idxes = []
|
642
|
-
self.normal_text_block_idxes = []
|
643
|
-
self.header_block_idxes = []
|
644
|
-
self.footer_block_idxes = []
|
645
|
-
self.text_line_width = 20
|
646
|
-
self.text_line_height = 10
|
647
|
-
self.init_region_info_from_layout(blocks)
|
648
|
-
self.init_direction_info()
|
649
|
-
|
650
|
-
def init_region_info_from_layout(self, blocks: List[LayoutParsingBlock]):
|
651
|
-
horizontal_normal_text_block_num = 0
|
652
|
-
text_line_height_list = []
|
653
|
-
text_line_width_list = []
|
654
|
-
for idx, block in enumerate(blocks):
|
655
|
-
self.block_map[idx] = block
|
656
|
-
block.index = idx
|
657
|
-
if block.label in BLOCK_LABEL_MAP["header_labels"]:
|
658
|
-
self.header_block_idxes.append(idx)
|
659
|
-
elif block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
|
660
|
-
self.doc_title_block_idxes.append(idx)
|
661
|
-
elif block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]:
|
662
|
-
self.paragraph_title_block_idxes.append(idx)
|
663
|
-
elif block.label in BLOCK_LABEL_MAP["vision_labels"]:
|
664
|
-
self.vision_block_idxes.append(idx)
|
665
|
-
elif block.label in BLOCK_LABEL_MAP["vision_title_labels"]:
|
666
|
-
self.vision_title_block_idxes.append(idx)
|
667
|
-
elif block.label in BLOCK_LABEL_MAP["footer_labels"]:
|
668
|
-
self.footer_block_idxes.append(idx)
|
669
|
-
elif block.label in BLOCK_LABEL_MAP["unordered_labels"]:
|
670
|
-
self.unordered_block_idxes.append(idx)
|
671
|
-
else:
|
672
|
-
self.normal_text_block_idxes.append(idx)
|
673
|
-
text_line_height_list.append(block.text_line_height)
|
674
|
-
text_line_width_list.append(block.text_line_width)
|
675
|
-
if block.direction == "horizontal":
|
676
|
-
horizontal_normal_text_block_num += 1
|
677
|
-
self.direction = (
|
678
|
-
"horizontal"
|
679
|
-
if horizontal_normal_text_block_num
|
680
|
-
>= len(self.normal_text_block_idxes) * 0.5
|
681
|
-
else "vertical"
|
682
|
-
)
|
683
|
-
self.text_line_width = (
|
684
|
-
np.mean(text_line_width_list) if text_line_width_list else 20
|
685
|
-
)
|
686
|
-
self.text_line_height = (
|
687
|
-
np.mean(text_line_height_list) if text_line_height_list else 10
|
688
|
-
)
|
689
|
-
|
690
|
-
def init_direction_info(self):
|
691
|
-
if self.direction == "horizontal":
|
692
|
-
self.direction_start_index = 0
|
693
|
-
self.direction_end_index = 2
|
694
|
-
self.secondary_direction_start_index = 1
|
695
|
-
self.secondary_direction_end_index = 3
|
696
|
-
self.secondary_direction = "vertical"
|
697
|
-
else:
|
698
|
-
self.direction_start_index = 1
|
699
|
-
self.direction_end_index = 3
|
700
|
-
self.secondary_direction_start_index = 0
|
701
|
-
self.secondary_direction_end_index = 2
|
702
|
-
self.secondary_direction = "horizontal"
|
703
|
-
|
704
|
-
self.direction_center_coordinate = (
|
705
|
-
self.bbox[self.direction_start_index] + self.bbox[self.direction_end_index]
|
706
|
-
) / 2
|
707
|
-
self.secondary_direction_center_coordinate = (
|
708
|
-
self.bbox[self.secondary_direction_start_index]
|
709
|
-
+ self.bbox[self.secondary_direction_end_index]
|
710
|
-
) / 2
|
711
|
-
|
712
|
-
def calculate_bbox_metrics(self, image_shape):
|
713
|
-
x1, y1, x2, y2 = self.bbox
|
714
|
-
image_height, image_width = image_shape
|
715
|
-
width = x2 - x1
|
716
|
-
x_center, y_center = (x1 + x2) / 2, (y1 + y2) / 2
|
717
|
-
self.euclidean_distance = math.sqrt(((x1) ** 2 + (y1) ** 2))
|
718
|
-
self.center_euclidean_distance = math.sqrt(((x_center) ** 2 + (y_center) ** 2))
|
719
|
-
self.angle_rad = math.atan2(y_center, x_center)
|
720
|
-
self.weighted_distance = (
|
721
|
-
y2 + width + (x1 // (image_width // 10)) * (image_width // 10) * 1.5
|
722
|
-
)
|
723
|
-
|
724
|
-
def sort_normal_blocks(self, blocks):
|
725
|
-
if self.direction == "horizontal":
|
726
|
-
blocks.sort(
|
727
|
-
key=lambda x: (
|
728
|
-
x.bbox[1] // self.text_line_height,
|
729
|
-
x.bbox[0] // self.text_line_width,
|
730
|
-
x.bbox[1] ** 2 + x.bbox[0] ** 2,
|
731
|
-
),
|
732
|
-
)
|
733
|
-
else:
|
734
|
-
blocks.sort(
|
735
|
-
key=lambda x: (
|
736
|
-
-x.bbox[0] // self.text_line_width,
|
737
|
-
x.bbox[1] // self.text_line_height,
|
738
|
-
-(x.bbox[2] ** 2 + x.bbox[1] ** 2),
|
739
|
-
),
|
740
|
-
)
|
741
|
-
|
742
|
-
def sort(self):
|
743
|
-
from .xycut_enhanced import xycut_enhanced
|
744
|
-
|
745
|
-
return xycut_enhanced(self)
|