paddlex 3.0.1__py3-none-any.whl → 3.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. paddlex/.version +1 -1
  2. paddlex/inference/models/common/static_infer.py +18 -14
  3. paddlex/inference/models/common/ts/funcs.py +19 -8
  4. paddlex/inference/models/formula_recognition/predictor.py +1 -1
  5. paddlex/inference/models/formula_recognition/processors.py +2 -2
  6. paddlex/inference/models/text_recognition/result.py +1 -1
  7. paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
  8. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +144 -205
  9. paddlex/inference/pipelines/layout_parsing/result_v2.py +6 -270
  10. paddlex/inference/pipelines/layout_parsing/setting.py +1 -0
  11. paddlex/inference/pipelines/layout_parsing/utils.py +108 -312
  12. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +302 -247
  13. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +156 -104
  14. paddlex/inference/pipelines/ocr/result.py +2 -2
  15. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +1 -1
  16. paddlex/inference/serving/basic_serving/_app.py +46 -13
  17. paddlex/inference/utils/hpi.py +23 -16
  18. paddlex/inference/utils/hpi_model_info_collection.json +627 -202
  19. paddlex/inference/utils/misc.py +20 -0
  20. paddlex/inference/utils/mkldnn_blocklist.py +36 -2
  21. paddlex/inference/utils/official_models.py +126 -5
  22. paddlex/inference/utils/pp_option.py +48 -4
  23. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
  24. paddlex/ops/__init__.py +6 -3
  25. paddlex/utils/deps.py +2 -2
  26. paddlex/utils/device.py +4 -19
  27. paddlex/utils/flags.py +9 -0
  28. paddlex/utils/subclass_register.py +2 -2
  29. {paddlex-3.0.1.dist-info → paddlex-3.0.2.dist-info}/METADATA +307 -162
  30. {paddlex-3.0.1.dist-info → paddlex-3.0.2.dist-info}/RECORD +34 -32
  31. {paddlex-3.0.1.dist-info → paddlex-3.0.2.dist-info}/WHEEL +1 -1
  32. {paddlex-3.0.1.dist-info → paddlex-3.0.2.dist-info}/entry_points.txt +1 -0
  33. {paddlex-3.0.1.dist-info/licenses → paddlex-3.0.2.dist-info}/LICENSE +0 -0
  34. {paddlex-3.0.1.dist-info → paddlex-3.0.2.dist-info}/top_level.txt +0 -0
@@ -30,23 +30,22 @@ from ...utils.pp_option import PaddlePredictorOption
30
30
  from .._parallel import AutoParallelImageSimpleInferencePipeline
31
31
  from ..base import BasePipeline
32
32
  from ..ocr.result import OCRResult
33
- from .result_v2 import LayoutParsingBlock, LayoutParsingRegion, LayoutParsingResultV2
34
- from .setting import BLOCK_LABEL_MAP, BLOCK_SETTINGS, LINE_SETTINGS, REGION_SETTINGS
33
+ from .layout_objects import LayoutBlock, LayoutRegion
34
+ from .result_v2 import LayoutParsingResultV2
35
+ from .setting import BLOCK_LABEL_MAP, BLOCK_SETTINGS, REGION_SETTINGS
35
36
  from .utils import (
36
37
  caculate_bbox_area,
37
38
  calculate_minimum_enclosing_bbox,
38
39
  calculate_overlap_ratio,
39
40
  convert_formula_res_to_ocr_format,
40
- format_line,
41
41
  gather_imgs,
42
42
  get_bbox_intersection,
43
43
  get_sub_regions_ocr_res,
44
- group_boxes_into_lines,
45
44
  remove_overlap_blocks,
46
45
  shrink_supplement_region_bbox,
47
- split_boxes_by_projection,
48
46
  update_region_box,
49
47
  )
48
+ from .xycut_enhanced import xycut_enhanced
50
49
 
51
50
 
52
51
  class _LayoutParsingPipelineV2(BasePipeline):
@@ -424,9 +423,12 @@ class _LayoutParsingPipelineV2(BasePipeline):
424
423
  else:
425
424
  # the other matched ocr be appended to the overall ocr result
426
425
  overall_ocr_res["dt_polys"].append(crop_img_dt_poly)
427
- overall_ocr_res["rec_boxes"] = np.vstack(
428
- (overall_ocr_res["rec_boxes"], crop_box)
429
- )
426
+ if len(overall_ocr_res["rec_boxes"]) == 0:
427
+ overall_ocr_res["rec_boxes"] = np.array([crop_box])
428
+ else:
429
+ overall_ocr_res["rec_boxes"] = np.vstack(
430
+ (overall_ocr_res["rec_boxes"], crop_box)
431
+ )
430
432
  overall_ocr_res["rec_polys"].append(crop_img_dt_poly)
431
433
  overall_ocr_res["rec_scores"].append(crop_img_rec_score)
432
434
  overall_ocr_res["rec_texts"].append(crop_img_rec_text)
@@ -461,9 +463,12 @@ class _LayoutParsingPipelineV2(BasePipeline):
461
463
  else (self.general_ocr_pipeline.text_rec_score_thresh)
462
464
  )
463
465
  if crop_img_rec_score >= text_rec_score_thresh:
464
- overall_ocr_res["rec_boxes"] = np.vstack(
465
- (overall_ocr_res["rec_boxes"], crop_box)
466
- )
466
+ if len(overall_ocr_res["rec_boxes"]) == 0:
467
+ overall_ocr_res["rec_boxes"] = np.array([crop_box])
468
+ else:
469
+ overall_ocr_res["rec_boxes"] = np.vstack(
470
+ (overall_ocr_res["rec_boxes"], crop_box)
471
+ )
467
472
  overall_ocr_res["rec_polys"].append(crop_img_dt_poly)
468
473
  overall_ocr_res["rec_scores"].append(crop_img_rec_score)
469
474
  overall_ocr_res["rec_texts"].append(crop_img_rec_text)
@@ -485,6 +490,11 @@ class _LayoutParsingPipelineV2(BasePipeline):
485
490
  )
486
491
  block_to_ocr_map[idx] = [idx]
487
492
 
493
+ mask_labels = (
494
+ BLOCK_LABEL_MAP.get("unordered_labels", [])
495
+ + BLOCK_LABEL_MAP.get("header_labels", [])
496
+ + BLOCK_LABEL_MAP.get("footer_labels", [])
497
+ )
488
498
  block_bboxes = [box["coordinate"] for box in layout_det_res["boxes"]]
489
499
  region_det_res["boxes"] = sorted(
490
500
  region_det_res["boxes"],
@@ -507,58 +517,117 @@ class _LayoutParsingPipelineV2(BasePipeline):
507
517
  region_to_block_map[region_idx] = []
508
518
  region_bbox = region_info["coordinate"]
509
519
  for block_idx in block_idxes_set:
520
+ if layout_det_res["boxes"][block_idx]["label"] in mask_labels:
521
+ continue
510
522
  overlap_ratio = calculate_overlap_ratio(
511
523
  region_bbox, block_bboxes[block_idx], mode="small"
512
524
  )
513
525
  if overlap_ratio > REGION_SETTINGS.get(
514
526
  "match_block_overlap_ratio_threshold", 0.8
515
527
  ):
516
- region_to_block_map[region_idx].append(block_idx)
517
528
  matched_idxes.append(block_idx)
529
+ old_region_bbox_matched_idxes = []
518
530
  if len(matched_idxes) > 0:
531
+ while len(old_region_bbox_matched_idxes) != len(matched_idxes):
532
+ old_region_bbox_matched_idxes = copy.deepcopy(matched_idxes)
533
+ matched_idxes = []
534
+ matched_bboxes = [
535
+ block_bboxes[idx] for idx in old_region_bbox_matched_idxes
536
+ ]
537
+ new_region_bbox = calculate_minimum_enclosing_bbox(
538
+ matched_bboxes
539
+ )
540
+ for block_idx in block_idxes_set:
541
+ if (
542
+ layout_det_res["boxes"][block_idx]["label"]
543
+ in mask_labels
544
+ ):
545
+ continue
546
+ overlap_ratio = calculate_overlap_ratio(
547
+ new_region_bbox, block_bboxes[block_idx], mode="small"
548
+ )
549
+ if overlap_ratio > REGION_SETTINGS.get(
550
+ "match_block_overlap_ratio_threshold", 0.8
551
+ ):
552
+ matched_idxes.append(block_idx)
519
553
  for block_idx in matched_idxes:
520
554
  block_idxes_set.remove(block_idx)
521
- matched_bboxes = [block_bboxes[idx] for idx in matched_idxes]
522
- new_region_bbox = calculate_minimum_enclosing_bbox(matched_bboxes)
555
+ region_to_block_map[region_idx] = matched_idxes
523
556
  region_det_res["boxes"][region_idx]["coordinate"] = new_region_bbox
524
557
  # Supplement region when there is no matched block
525
- if len(block_idxes_set) > 0:
526
- while len(block_idxes_set) > 0:
527
- matched_idxes = []
528
- unmatched_bboxes = [block_bboxes[idx] for idx in block_idxes_set]
529
- supplement_region_bbox = calculate_minimum_enclosing_bbox(
530
- unmatched_bboxes
558
+ while len(block_idxes_set) > 0:
559
+ unmatched_bboxes = [block_bboxes[idx] for idx in block_idxes_set]
560
+ if len(unmatched_bboxes) == 0:
561
+ break
562
+ supplement_region_bbox = calculate_minimum_enclosing_bbox(
563
+ unmatched_bboxes
564
+ )
565
+ matched_idxes = []
566
+ # check if the new region bbox is overlapped with other region bbox, if have, then shrink the new region bbox
567
+ for region_idx, region_info in enumerate(region_det_res["boxes"]):
568
+ if len(region_to_block_map[region_idx]) == 0:
569
+ continue
570
+ region_bbox = region_info["coordinate"]
571
+ overlap_ratio = calculate_overlap_ratio(
572
+ supplement_region_bbox, region_bbox
531
573
  )
532
- # check if the new region bbox is overlapped with other region bbox, if have, then shrink the new region bbox
533
- for region_info in region_det_res["boxes"]:
534
- region_bbox = region_info["coordinate"]
535
- overlap_ratio = calculate_overlap_ratio(
536
- supplement_region_bbox, region_bbox
537
- )
538
- if overlap_ratio > 0:
539
- supplement_region_bbox, matched_idxes = (
540
- shrink_supplement_region_bbox(
541
- supplement_region_bbox,
542
- region_bbox,
543
- image.shape[1],
544
- image.shape[0],
545
- block_idxes_set,
546
- block_bboxes,
547
- )
574
+ if overlap_ratio > 0:
575
+ supplement_region_bbox, matched_idxes = (
576
+ shrink_supplement_region_bbox(
577
+ supplement_region_bbox,
578
+ region_bbox,
579
+ image.shape[1],
580
+ image.shape[0],
581
+ block_idxes_set,
582
+ block_bboxes,
548
583
  )
584
+ )
585
+
586
+ matched_idxes = [
587
+ idx
588
+ for idx in matched_idxes
589
+ if layout_det_res["boxes"][idx]["label"] not in mask_labels
590
+ ]
591
+ if len(matched_idxes) == 0:
592
+ matched_idxes = [
593
+ idx
594
+ for idx in block_idxes_set
595
+ if layout_det_res["boxes"][idx]["label"] not in mask_labels
596
+ ]
549
597
  if len(matched_idxes) == 0:
550
- matched_idxes = list(block_idxes_set)
551
- region_idx = len(region_det_res["boxes"])
552
- region_to_block_map[region_idx] = list(matched_idxes)
553
- for block_idx in matched_idxes:
554
- block_idxes_set.remove(block_idx)
555
- region_det_res["boxes"].append(
556
- {
557
- "coordinate": supplement_region_bbox,
558
- "label": "SupplementaryRegion",
559
- "score": 1,
560
- }
561
- )
598
+ break
599
+ matched_bboxes = [block_bboxes[idx] for idx in matched_idxes]
600
+ supplement_region_bbox = calculate_minimum_enclosing_bbox(
601
+ matched_bboxes
602
+ )
603
+ region_idx = len(region_det_res["boxes"])
604
+ region_to_block_map[region_idx] = list(matched_idxes)
605
+ for block_idx in matched_idxes:
606
+ block_idxes_set.remove(block_idx)
607
+ region_det_res["boxes"].append(
608
+ {
609
+ "coordinate": supplement_region_bbox,
610
+ "label": "SupplementaryRegion",
611
+ "score": 1,
612
+ }
613
+ )
614
+
615
+ mask_idxes = [
616
+ idx
617
+ for idx in range(len(layout_det_res["boxes"]))
618
+ if layout_det_res["boxes"][idx]["label"] in mask_labels
619
+ ]
620
+ for idx in mask_idxes:
621
+ bbox = layout_det_res["boxes"][idx]["coordinate"]
622
+ region_idx = len(region_det_res["boxes"])
623
+ region_to_block_map[region_idx] = [idx]
624
+ region_det_res["boxes"].append(
625
+ {
626
+ "coordinate": bbox,
627
+ "label": "SupplementaryRegion",
628
+ "score": 1,
629
+ }
630
+ )
562
631
 
563
632
  region_block_ocr_idx_map = dict(
564
633
  region_to_block_map=region_to_block_map,
@@ -567,142 +636,7 @@ class _LayoutParsingPipelineV2(BasePipeline):
567
636
 
568
637
  return region_block_ocr_idx_map, region_det_res, layout_det_res
569
638
 
570
- def sort_line_by_projection(
571
- self,
572
- line: List[List[Union[List[int], str]]],
573
- input_img: np.ndarray,
574
- text_rec_model: Any,
575
- text_rec_score_thresh: Union[float, None] = None,
576
- direction: str = "vertical",
577
- ) -> None:
578
- """
579
- Sort a line of text spans based on their vertical position within the layout bounding box.
580
-
581
- Args:
582
- line (list): A list of spans, where each span is a list containing a bounding box and text.
583
- input_img (ndarray): The input image used for OCR.
584
- general_ocr_pipeline (Any): The general OCR pipeline used for text recognition.
585
-
586
- Returns:
587
- list: The sorted line of text spans.
588
- """
589
- sort_index = 0 if direction == "horizontal" else 1
590
- splited_boxes = split_boxes_by_projection(line, direction)
591
- splited_lines = []
592
- if len(line) != len(splited_boxes):
593
- splited_boxes.sort(key=lambda span: span[0][sort_index])
594
- for span in splited_boxes:
595
- bbox, text, label = span
596
- if label == "text":
597
- crop_img = input_img[
598
- int(bbox[1]) : int(bbox[3]),
599
- int(bbox[0]) : int(bbox[2]),
600
- ]
601
- crop_img_rec_res = list(text_rec_model([crop_img]))[0]
602
- crop_img_rec_score = crop_img_rec_res["rec_score"]
603
- crop_img_rec_text = crop_img_rec_res["rec_text"]
604
- text = (
605
- crop_img_rec_text
606
- if crop_img_rec_score >= text_rec_score_thresh
607
- else ""
608
- )
609
- span[1] = text
610
-
611
- splited_lines.append(span)
612
- else:
613
- splited_lines = line
614
-
615
- return splited_lines
616
-
617
- def get_block_rec_content(
618
- self,
619
- image: list,
620
- ocr_rec_res: dict,
621
- block: LayoutParsingBlock,
622
- text_rec_model: Any,
623
- text_rec_score_thresh: Union[float, None] = None,
624
- ) -> str:
625
-
626
- if len(ocr_rec_res["rec_texts"]) == 0:
627
- block.content = ""
628
- return block
629
-
630
- lines, text_direction, text_line_height = group_boxes_into_lines(
631
- ocr_rec_res,
632
- LINE_SETTINGS.get("line_height_iou_threshold", 0.8),
633
- )
634
-
635
- # format line
636
- text_lines = []
637
- need_new_line_num = 0
638
- # words start coordinate and stop coordinate in the line
639
- words_start_index = 0 if text_direction == "horizontal" else 1
640
- words_stop_index = words_start_index + 2
641
- lines_start_index = 1 if text_direction == "horizontal" else 3
642
- line_width_list = []
643
-
644
- if block.label == "reference":
645
- rec_boxes = ocr_rec_res["boxes"]
646
- block_start_coordinate = min([box[words_start_index] for box in rec_boxes])
647
- block_stop_coordinate = max([box[words_stop_index] for box in rec_boxes])
648
- else:
649
- block_start_coordinate = block.bbox[words_start_index]
650
- block_stop_coordinate = block.bbox[words_stop_index]
651
-
652
- for idx, line in enumerate(lines):
653
- line.sort(
654
- key=lambda span: (
655
- span[0][words_start_index] // 2,
656
- (
657
- span[0][lines_start_index]
658
- if text_direction == "horizontal"
659
- else -span[0][lines_start_index]
660
- ),
661
- )
662
- )
663
-
664
- line_width = line[-1][0][words_stop_index] - line[0][0][words_start_index]
665
- line_width_list.append(line_width)
666
- # merge formula and text
667
- ocr_labels = [span[2] for span in line]
668
- if "formula" in ocr_labels:
669
- line = self.sort_line_by_projection(
670
- line, image, text_rec_model, text_rec_score_thresh, text_direction
671
- )
672
-
673
- line_text, need_new_line = format_line(
674
- line,
675
- text_direction,
676
- np.max(line_width_list),
677
- block_start_coordinate,
678
- block_stop_coordinate,
679
- line_gap_limit=text_line_height * 1.5,
680
- block_label=block.label,
681
- )
682
- if need_new_line:
683
- need_new_line_num += 1
684
- if idx == 0:
685
- line_start_coordinate = line[0][0][0]
686
- block.seg_start_coordinate = line_start_coordinate
687
- elif idx == len(lines) - 1:
688
- line_end_coordinate = line[-1][0][2]
689
- block.seg_end_coordinate = line_end_coordinate
690
- text_lines.append(line_text)
691
-
692
- delim = LINE_SETTINGS["delimiter_map"].get(block.label, "")
693
- if need_new_line_num > len(text_lines) * 0.5 and delim == "":
694
- text_lines = [text.replace("\n", "") for text in text_lines]
695
- delim = "\n"
696
- content = delim.join(text_lines)
697
- block.content = content
698
- block.num_of_lines = len(text_lines)
699
- block.direction = text_direction
700
- block.text_line_height = text_line_height
701
- block.text_line_width = np.mean(line_width_list)
702
-
703
- return block
704
-
705
- def get_layout_parsing_blocks(
639
+ def get_layout_parsing_objects(
706
640
  self,
707
641
  image: list,
708
642
  region_block_ocr_idx_map: dict,
@@ -746,7 +680,7 @@ class _LayoutParsingPipelineV2(BasePipeline):
746
680
  table_index = 0
747
681
  seal_index = 0
748
682
  chart_index = 0
749
- layout_parsing_blocks: List[LayoutParsingBlock] = []
683
+ layout_parsing_blocks: List[LayoutBlock] = []
750
684
 
751
685
  for box_idx, box_info in enumerate(layout_det_res["boxes"]):
752
686
 
@@ -754,7 +688,7 @@ class _LayoutParsingPipelineV2(BasePipeline):
754
688
  block_bbox = box_info["coordinate"]
755
689
  rec_res = {"boxes": [], "rec_texts": [], "rec_labels": []}
756
690
 
757
- block = LayoutParsingBlock(label=label, bbox=block_bbox)
691
+ block = LayoutBlock(label=label, bbox=block_bbox)
758
692
 
759
693
  if label == "table" and len(table_res_list) > 0:
760
694
  block.content = table_res_list[table_index]["pred_html"]
@@ -783,9 +717,8 @@ class _LayoutParsingPipelineV2(BasePipeline):
783
717
  rec_res["rec_labels"].append(
784
718
  overall_ocr_res["rec_labels"][box_no],
785
719
  )
786
- block = self.get_block_rec_content(
720
+ block.update_text_content(
787
721
  image=image,
788
- block=block,
789
722
  ocr_rec_res=rec_res,
790
723
  text_rec_model=text_rec_model,
791
724
  text_rec_score_thresh=text_rec_score_thresh,
@@ -805,26 +738,35 @@ class _LayoutParsingPipelineV2(BasePipeline):
805
738
 
806
739
  layout_parsing_blocks.append(block)
807
740
 
808
- region_list: List[LayoutParsingRegion] = []
741
+ page_region_bbox = [65535, 65535, 0, 0]
742
+ layout_parsing_regions: List[LayoutRegion] = []
809
743
  for region_idx, region_info in enumerate(region_det_res["boxes"]):
810
- region_bbox = region_info["coordinate"]
744
+ region_bbox = np.array(region_info["coordinate"]).astype("int")
811
745
  region_blocks = [
812
746
  layout_parsing_blocks[idx]
813
747
  for idx in region_block_ocr_idx_map["region_to_block_map"][region_idx]
814
748
  ]
815
- region = LayoutParsingRegion(
816
- bbox=region_bbox,
817
- blocks=region_blocks,
818
- image_shape=image.shape[:2],
819
- )
820
- region_list.append(region)
749
+ if region_blocks:
750
+ page_region_bbox = update_region_box(region_bbox, page_region_bbox)
751
+ region = LayoutRegion(bbox=region_bbox, blocks=region_blocks)
752
+ layout_parsing_regions.append(region)
821
753
 
822
- region_list = sorted(
823
- region_list,
824
- key=lambda r: (r.weighted_distance),
754
+ layout_parsing_page = LayoutRegion(
755
+ bbox=np.array(page_region_bbox).astype("int"), blocks=layout_parsing_regions
825
756
  )
826
757
 
827
- return region_list
758
+ return layout_parsing_page
759
+
760
+ def sort_layout_parsing_blocks(
761
+ self, layout_parsing_page: LayoutRegion
762
+ ) -> List[LayoutBlock]:
763
+ layout_parsing_regions = xycut_enhanced(layout_parsing_page)
764
+ parsing_res_list = []
765
+ for region in layout_parsing_regions:
766
+ layout_parsing_blocks = xycut_enhanced(region)
767
+ parsing_res_list.extend(layout_parsing_blocks)
768
+
769
+ return parsing_res_list
828
770
 
829
771
  def get_layout_parsing_res(
830
772
  self,
@@ -866,7 +808,7 @@ class _LayoutParsingPipelineV2(BasePipeline):
866
808
  )
867
809
 
868
810
  # Format layout parsing block
869
- region_list = self.get_layout_parsing_blocks(
811
+ layout_parsing_page = self.get_layout_parsing_objects(
870
812
  image=image,
871
813
  region_block_ocr_idx_map=region_block_ocr_idx_map,
872
814
  region_det_res=region_det_res,
@@ -879,9 +821,7 @@ class _LayoutParsingPipelineV2(BasePipeline):
879
821
  text_rec_score_thresh=self.general_ocr_pipeline.text_rec_score_thresh,
880
822
  )
881
823
 
882
- parsing_res_list = []
883
- for region in region_list:
884
- parsing_res_list.extend(region.sort())
824
+ parsing_res_list = self.sort_layout_parsing_blocks(layout_parsing_page)
885
825
 
886
826
  index = 1
887
827
  for block in parsing_res_list:
@@ -1024,7 +964,6 @@ class _LayoutParsingPipelineV2(BasePipeline):
1024
964
  Returns:
1025
965
  LayoutParsingResultV2: The predicted layout parsing result.
1026
966
  """
1027
-
1028
967
  model_settings = self.get_model_settings(
1029
968
  use_doc_orientation_classify,
1030
969
  use_doc_unwarping,