paddlex 3.0.1__py3-none-any.whl → 3.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. paddlex/.version +1 -1
  2. paddlex/inference/models/base/predictor/base_predictor.py +2 -0
  3. paddlex/inference/models/common/static_infer.py +20 -14
  4. paddlex/inference/models/common/ts/funcs.py +19 -8
  5. paddlex/inference/models/formula_recognition/predictor.py +1 -1
  6. paddlex/inference/models/formula_recognition/processors.py +2 -2
  7. paddlex/inference/models/text_recognition/result.py +1 -1
  8. paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
  9. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +144 -205
  10. paddlex/inference/pipelines/layout_parsing/result_v2.py +13 -272
  11. paddlex/inference/pipelines/layout_parsing/setting.py +1 -0
  12. paddlex/inference/pipelines/layout_parsing/utils.py +108 -312
  13. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +302 -247
  14. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +156 -104
  15. paddlex/inference/pipelines/ocr/result.py +2 -2
  16. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +1 -1
  17. paddlex/inference/serving/basic_serving/_app.py +47 -13
  18. paddlex/inference/serving/infra/utils.py +22 -17
  19. paddlex/inference/utils/hpi.py +60 -25
  20. paddlex/inference/utils/hpi_model_info_collection.json +627 -204
  21. paddlex/inference/utils/misc.py +20 -0
  22. paddlex/inference/utils/mkldnn_blocklist.py +36 -2
  23. paddlex/inference/utils/official_models.py +126 -5
  24. paddlex/inference/utils/pp_option.py +81 -21
  25. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
  26. paddlex/ops/__init__.py +6 -3
  27. paddlex/utils/deps.py +2 -2
  28. paddlex/utils/device.py +4 -19
  29. paddlex/utils/download.py +10 -7
  30. paddlex/utils/flags.py +9 -0
  31. paddlex/utils/subclass_register.py +2 -2
  32. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/METADATA +307 -162
  33. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/RECORD +37 -35
  34. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/WHEEL +1 -1
  35. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/entry_points.txt +1 -0
  36. {paddlex-3.0.1.dist-info/licenses → paddlex-3.0.3.dist-info}/LICENSE +0 -0
  37. {paddlex-3.0.1.dist-info → paddlex-3.0.3.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,6 @@
14
14
  from __future__ import annotations
15
15
 
16
16
  import copy
17
- import math
18
17
  import re
19
18
  from functools import partial
20
19
  from typing import List
@@ -30,7 +29,8 @@ from ...common.result import (
30
29
  MarkdownMixin,
31
30
  XlsxMixin,
32
31
  )
33
- from .setting import BLOCK_LABEL_MAP
32
+ from .layout_objects import LayoutBlock
33
+ from .utils import get_seg_flag
34
34
 
35
35
 
36
36
  def compile_title_pattern():
@@ -140,58 +140,6 @@ def format_first_line_func(block, templates, format_func, spliter):
140
140
  return spliter.join(lines)
141
141
 
142
142
 
143
- def get_seg_flag(block: LayoutParsingBlock, prev_block: LayoutParsingBlock):
144
-
145
- seg_start_flag = True
146
- seg_end_flag = True
147
-
148
- block_box = block.bbox
149
- context_left_coordinate = block_box[0]
150
- context_right_coordinate = block_box[2]
151
- seg_start_coordinate = block.seg_start_coordinate
152
- seg_end_coordinate = block.seg_end_coordinate
153
-
154
- if prev_block is not None:
155
- prev_block_bbox = prev_block.bbox
156
- num_of_prev_lines = prev_block.num_of_lines
157
- pre_block_seg_end_coordinate = prev_block.seg_end_coordinate
158
- prev_end_space_small = (
159
- abs(prev_block_bbox[2] - pre_block_seg_end_coordinate) < 10
160
- )
161
- prev_lines_more_than_one = num_of_prev_lines > 1
162
-
163
- overlap_blocks = context_left_coordinate < prev_block_bbox[2]
164
-
165
- # update context_left_coordinate and context_right_coordinate
166
- if overlap_blocks:
167
- context_left_coordinate = min(prev_block_bbox[0], context_left_coordinate)
168
- context_right_coordinate = max(prev_block_bbox[2], context_right_coordinate)
169
- prev_end_space_small = (
170
- abs(context_right_coordinate - pre_block_seg_end_coordinate) < 10
171
- )
172
- edge_distance = 0
173
- else:
174
- edge_distance = abs(block_box[0] - prev_block_bbox[2])
175
-
176
- current_start_space_small = seg_start_coordinate - context_left_coordinate < 10
177
-
178
- if (
179
- prev_end_space_small
180
- and current_start_space_small
181
- and prev_lines_more_than_one
182
- and edge_distance < max(prev_block.width, block.width)
183
- ):
184
- seg_start_flag = False
185
- else:
186
- if seg_start_coordinate - context_left_coordinate < 10:
187
- seg_start_flag = False
188
-
189
- if context_right_coordinate - seg_end_coordinate < 10:
190
- seg_end_flag = False
191
-
192
- return seg_start_flag, seg_end_flag
193
-
194
-
195
143
  class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
196
144
  """Layout Parsing Result V2"""
197
145
 
@@ -247,7 +195,7 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
247
195
  draw = ImageDraw.Draw(image, "RGBA")
248
196
  font_size = int(0.018 * int(image.width)) + 2
249
197
  font = ImageFont.truetype(PINGFANG_FONT_FILE_PATH, font_size, encoding="utf-8")
250
- parsing_result: List[LayoutParsingBlock] = self["parsing_res_list"]
198
+ parsing_result: List[LayoutBlock] = self["parsing_res_list"]
251
199
  for block in parsing_result:
252
200
  bbox = block.bbox
253
201
  index = block.order_index
@@ -456,6 +404,9 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
456
404
  "table_title": format_text_func,
457
405
  "figure_title": format_text_func,
458
406
  "chart_title": format_text_func,
407
+ "vision_footnote": lambda block: block.content.replace(
408
+ "\n\n", "\n"
409
+ ).replace("\n", "\n\n"),
459
410
  "text": lambda block: block.content.replace("\n\n", "\n").replace(
460
411
  "\n", "\n\n"
461
412
  ),
@@ -484,8 +435,8 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
484
435
 
485
436
  markdown_content = ""
486
437
  last_label = None
487
- seg_start_flag = None
488
- seg_end_flag = None
438
+ seg_start_flag = True
439
+ seg_end_flag = True
489
440
  prev_block = None
490
441
  page_first_element_seg_start_flag = None
491
442
  page_last_element_seg_end_flag = None
@@ -517,6 +468,11 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
517
468
  else handle_func(block)
518
469
  )
519
470
  last_label = label
471
+ page_first_element_seg_start_flag = (
472
+ True
473
+ if page_first_element_seg_start_flag is None
474
+ else page_first_element_seg_start_flag
475
+ )
520
476
  page_last_element_seg_end_flag = seg_end_flag
521
477
 
522
478
  markdown_info["markdown_texts"] = markdown_content
@@ -528,218 +484,3 @@ class LayoutParsingResultV2(BaseCVResult, HtmlMixin, XlsxMixin, MarkdownMixin):
528
484
  markdown_info["markdown_images"][img["path"]] = img["img"]
529
485
 
530
486
  return markdown_info
531
-
532
-
533
- class LayoutParsingBlock:
534
-
535
- def __init__(self, label, bbox, content="") -> None:
536
- self.label = label
537
- self.order_label = None
538
- self.bbox = list(map(int, bbox))
539
- self.content = content
540
- self.seg_start_coordinate = float("inf")
541
- self.seg_end_coordinate = float("-inf")
542
- self.width = bbox[2] - bbox[0]
543
- self.height = bbox[3] - bbox[1]
544
- self.area = self.width * self.height
545
- self.num_of_lines = 1
546
- self.image = None
547
- self.index = None
548
- self.order_index = None
549
- self.text_line_width = 1
550
- self.text_line_height = 1
551
- self.direction = self.get_bbox_direction()
552
- self.child_blocks = []
553
- self.update_direction_info()
554
-
555
- def __str__(self) -> str:
556
- return f"{self.__dict__}"
557
-
558
- def __repr__(self) -> str:
559
- _str = f"\n\n#################\nindex:\t{self.index}\nlabel:\t{self.label}\nregion_label:\t{self.order_label}\nbbox:\t{self.bbox}\ncontent:\t{self.content}\n#################"
560
- return _str
561
-
562
- def to_dict(self) -> dict:
563
- return self.__dict__
564
-
565
- def update_direction_info(self) -> None:
566
- if self.direction == "horizontal":
567
- self.secondary_direction = "vertical"
568
- self.short_side_length = self.height
569
- self.long_side_length = self.width
570
- self.start_coordinate = self.bbox[0]
571
- self.end_coordinate = self.bbox[2]
572
- self.secondary_direction_start_coordinate = self.bbox[1]
573
- self.secondary_direction_end_coordinate = self.bbox[3]
574
- else:
575
- self.secondary_direction = "horizontal"
576
- self.short_side_length = self.width
577
- self.long_side_length = self.height
578
- self.start_coordinate = self.bbox[1]
579
- self.end_coordinate = self.bbox[3]
580
- self.secondary_direction_start_coordinate = self.bbox[0]
581
- self.secondary_direction_end_coordinate = self.bbox[2]
582
-
583
- def append_child_block(self, child_block: LayoutParsingBlock) -> None:
584
- if not self.child_blocks:
585
- self.ori_bbox = self.bbox.copy()
586
- x1, y1, x2, y2 = self.bbox
587
- x1_child, y1_child, x2_child, y2_child = child_block.bbox
588
- union_bbox = (
589
- min(x1, x1_child),
590
- min(y1, y1_child),
591
- max(x2, x2_child),
592
- max(y2, y2_child),
593
- )
594
- self.bbox = union_bbox
595
- self.update_direction_info()
596
- child_blocks = [child_block]
597
- if child_block.child_blocks:
598
- child_blocks.extend(child_block.get_child_blocks())
599
- self.child_blocks.extend(child_blocks)
600
-
601
- def get_child_blocks(self) -> list:
602
- self.bbox = self.ori_bbox
603
- child_blocks = self.child_blocks.copy()
604
- self.child_blocks = []
605
- return child_blocks
606
-
607
- def get_centroid(self) -> tuple:
608
- x1, y1, x2, y2 = self.bbox
609
- centroid = ((x1 + x2) / 2, (y1 + y2) / 2)
610
- return centroid
611
-
612
- def get_bbox_direction(self, direction_ratio: float = 1.0) -> bool:
613
- """
614
- Determine if a bounding box is horizontal or vertical.
615
-
616
- Args:
617
- bbox (List[float]): Bounding box [x_min, y_min, x_max, y_max].
618
- direction_ratio (float): Ratio for determining direction. Default is 1.0.
619
-
620
- Returns:
621
- str: "horizontal" or "vertical".
622
- """
623
- return (
624
- "horizontal" if self.width * direction_ratio >= self.height else "vertical"
625
- )
626
-
627
-
628
- class LayoutParsingRegion:
629
-
630
- def __init__(
631
- self, bbox, blocks: List[LayoutParsingBlock] = [], image_shape=None
632
- ) -> None:
633
- self.bbox = bbox
634
- self.block_map = {}
635
- self.direction = "horizontal"
636
- self.calculate_bbox_metrics(image_shape)
637
- self.doc_title_block_idxes = []
638
- self.paragraph_title_block_idxes = []
639
- self.vision_block_idxes = []
640
- self.unordered_block_idxes = []
641
- self.vision_title_block_idxes = []
642
- self.normal_text_block_idxes = []
643
- self.header_block_idxes = []
644
- self.footer_block_idxes = []
645
- self.text_line_width = 20
646
- self.text_line_height = 10
647
- self.init_region_info_from_layout(blocks)
648
- self.init_direction_info()
649
-
650
- def init_region_info_from_layout(self, blocks: List[LayoutParsingBlock]):
651
- horizontal_normal_text_block_num = 0
652
- text_line_height_list = []
653
- text_line_width_list = []
654
- for idx, block in enumerate(blocks):
655
- self.block_map[idx] = block
656
- block.index = idx
657
- if block.label in BLOCK_LABEL_MAP["header_labels"]:
658
- self.header_block_idxes.append(idx)
659
- elif block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
660
- self.doc_title_block_idxes.append(idx)
661
- elif block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]:
662
- self.paragraph_title_block_idxes.append(idx)
663
- elif block.label in BLOCK_LABEL_MAP["vision_labels"]:
664
- self.vision_block_idxes.append(idx)
665
- elif block.label in BLOCK_LABEL_MAP["vision_title_labels"]:
666
- self.vision_title_block_idxes.append(idx)
667
- elif block.label in BLOCK_LABEL_MAP["footer_labels"]:
668
- self.footer_block_idxes.append(idx)
669
- elif block.label in BLOCK_LABEL_MAP["unordered_labels"]:
670
- self.unordered_block_idxes.append(idx)
671
- else:
672
- self.normal_text_block_idxes.append(idx)
673
- text_line_height_list.append(block.text_line_height)
674
- text_line_width_list.append(block.text_line_width)
675
- if block.direction == "horizontal":
676
- horizontal_normal_text_block_num += 1
677
- self.direction = (
678
- "horizontal"
679
- if horizontal_normal_text_block_num
680
- >= len(self.normal_text_block_idxes) * 0.5
681
- else "vertical"
682
- )
683
- self.text_line_width = (
684
- np.mean(text_line_width_list) if text_line_width_list else 20
685
- )
686
- self.text_line_height = (
687
- np.mean(text_line_height_list) if text_line_height_list else 10
688
- )
689
-
690
- def init_direction_info(self):
691
- if self.direction == "horizontal":
692
- self.direction_start_index = 0
693
- self.direction_end_index = 2
694
- self.secondary_direction_start_index = 1
695
- self.secondary_direction_end_index = 3
696
- self.secondary_direction = "vertical"
697
- else:
698
- self.direction_start_index = 1
699
- self.direction_end_index = 3
700
- self.secondary_direction_start_index = 0
701
- self.secondary_direction_end_index = 2
702
- self.secondary_direction = "horizontal"
703
-
704
- self.direction_center_coordinate = (
705
- self.bbox[self.direction_start_index] + self.bbox[self.direction_end_index]
706
- ) / 2
707
- self.secondary_direction_center_coordinate = (
708
- self.bbox[self.secondary_direction_start_index]
709
- + self.bbox[self.secondary_direction_end_index]
710
- ) / 2
711
-
712
- def calculate_bbox_metrics(self, image_shape):
713
- x1, y1, x2, y2 = self.bbox
714
- image_height, image_width = image_shape
715
- width = x2 - x1
716
- x_center, y_center = (x1 + x2) / 2, (y1 + y2) / 2
717
- self.euclidean_distance = math.sqrt(((x1) ** 2 + (y1) ** 2))
718
- self.center_euclidean_distance = math.sqrt(((x_center) ** 2 + (y_center) ** 2))
719
- self.angle_rad = math.atan2(y_center, x_center)
720
- self.weighted_distance = (
721
- y2 + width + (x1 // (image_width // 10)) * (image_width // 10) * 1.5
722
- )
723
-
724
- def sort_normal_blocks(self, blocks):
725
- if self.direction == "horizontal":
726
- blocks.sort(
727
- key=lambda x: (
728
- x.bbox[1] // self.text_line_height,
729
- x.bbox[0] // self.text_line_width,
730
- x.bbox[1] ** 2 + x.bbox[0] ** 2,
731
- ),
732
- )
733
- else:
734
- blocks.sort(
735
- key=lambda x: (
736
- -x.bbox[0] // self.text_line_width,
737
- x.bbox[1] // self.text_line_height,
738
- -(x.bbox[2] ** 2 + x.bbox[1] ** 2),
739
- ),
740
- )
741
-
742
- def sort(self):
743
- from .xycut_enhanced import xycut_enhanced
744
-
745
- return xycut_enhanced(self)
@@ -21,6 +21,7 @@ XYCUT_SETTINGS = {
21
21
  "up_edge_weight": 1,
22
22
  "down_edge_weight": 0.0001,
23
23
  },
24
+ "cross_layout_ref_text_block_words_num_threshold": 10,
24
25
  }
25
26
 
26
27
  REGION_SETTINGS = {