paddlex 3.0.0rc1__py3-none-any.whl → 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +1 -1
  3. paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
  4. paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
  5. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
  6. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
  7. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
  8. paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
  9. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
  10. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
  11. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
  12. paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
  13. paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
  14. paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
  15. paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
  16. paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
  17. paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
  18. paddlex/configs/pipelines/OCR.yaml +7 -6
  19. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
  20. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
  21. paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
  22. paddlex/configs/pipelines/doc_understanding.yaml +1 -1
  23. paddlex/configs/pipelines/formula_recognition.yaml +2 -2
  24. paddlex/configs/pipelines/layout_parsing.yaml +3 -2
  25. paddlex/configs/pipelines/seal_recognition.yaml +1 -0
  26. paddlex/configs/pipelines/table_recognition.yaml +2 -1
  27. paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
  28. paddlex/hpip_links.html +20 -20
  29. paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +33 -10
  30. paddlex/inference/common/batch_sampler/image_batch_sampler.py +34 -25
  31. paddlex/inference/common/result/mixin.py +19 -12
  32. paddlex/inference/models/base/predictor/base_predictor.py +2 -8
  33. paddlex/inference/models/common/static_infer.py +11 -59
  34. paddlex/inference/models/common/tokenizer/__init__.py +2 -0
  35. paddlex/inference/models/common/tokenizer/clip_tokenizer.py +1 -1
  36. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +2 -2
  37. paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
  38. paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +7 -1
  39. paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
  40. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +13 -13
  41. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3 -3
  42. paddlex/inference/models/common/tokenizer/vocab.py +7 -7
  43. paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
  44. paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
  45. paddlex/inference/models/common/vlm/generation/configuration_utils.py +1 -1
  46. paddlex/inference/models/common/vlm/generation/logits_process.py +1 -1
  47. paddlex/inference/models/common/vlm/generation/utils.py +1 -1
  48. paddlex/inference/models/common/vlm/transformers/configuration_utils.py +3 -3
  49. paddlex/inference/models/common/vlm/transformers/conversion_utils.py +3 -3
  50. paddlex/inference/models/common/vlm/transformers/model_outputs.py +2 -2
  51. paddlex/inference/models/common/vlm/transformers/model_utils.py +7 -31
  52. paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
  53. paddlex/inference/models/doc_vlm/modeling/__init__.py +2 -0
  54. paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
  55. paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
  56. paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +0 -105
  57. paddlex/inference/models/doc_vlm/predictor.py +79 -24
  58. paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
  59. paddlex/inference/models/doc_vlm/processors/__init__.py +2 -0
  60. paddlex/inference/models/doc_vlm/processors/common.py +189 -0
  61. paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
  62. paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +21 -176
  63. paddlex/inference/models/formula_recognition/predictor.py +7 -1
  64. paddlex/inference/models/formula_recognition/processors.py +92 -79
  65. paddlex/inference/models/formula_recognition/result.py +28 -27
  66. paddlex/inference/models/image_feature/processors.py +3 -4
  67. paddlex/inference/models/keypoint_detection/predictor.py +3 -0
  68. paddlex/inference/models/object_detection/predictor.py +2 -0
  69. paddlex/inference/models/object_detection/processors.py +28 -3
  70. paddlex/inference/models/object_detection/utils.py +2 -0
  71. paddlex/inference/models/table_structure_recognition/result.py +0 -10
  72. paddlex/inference/models/text_detection/predictor.py +8 -0
  73. paddlex/inference/models/text_detection/processors.py +44 -10
  74. paddlex/inference/models/text_detection/result.py +0 -10
  75. paddlex/inference/pipelines/__init__.py +9 -5
  76. paddlex/inference/pipelines/_parallel.py +172 -0
  77. paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
  78. paddlex/inference/pipelines/attribute_recognition/pipeline.py +11 -1
  79. paddlex/inference/pipelines/base.py +14 -4
  80. paddlex/inference/pipelines/components/faisser.py +1 -1
  81. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +53 -27
  82. paddlex/inference/pipelines/formula_recognition/pipeline.py +120 -82
  83. paddlex/inference/pipelines/formula_recognition/result.py +1 -11
  84. paddlex/inference/pipelines/image_classification/pipeline.py +16 -6
  85. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +16 -6
  86. paddlex/inference/pipelines/instance_segmentation/pipeline.py +16 -6
  87. paddlex/inference/pipelines/keypoint_detection/pipeline.py +16 -6
  88. paddlex/inference/pipelines/layout_parsing/pipeline.py +34 -47
  89. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +893 -260
  90. paddlex/inference/pipelines/layout_parsing/result.py +4 -17
  91. paddlex/inference/pipelines/layout_parsing/result_v2.py +523 -245
  92. paddlex/inference/pipelines/layout_parsing/setting.py +87 -0
  93. paddlex/inference/pipelines/layout_parsing/utils.py +565 -1998
  94. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
  95. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1144 -0
  96. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +563 -0
  97. paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +2 -2
  98. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +2 -2
  99. paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
  100. paddlex/inference/pipelines/ocr/pipeline.py +127 -70
  101. paddlex/inference/pipelines/ocr/result.py +19 -16
  102. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +2 -2
  103. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +2 -2
  104. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +2 -2
  105. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +2 -5
  106. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +5 -5
  107. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +16 -6
  108. paddlex/inference/pipelines/seal_recognition/pipeline.py +109 -53
  109. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +16 -6
  110. paddlex/inference/pipelines/small_object_detection/pipeline.py +16 -6
  111. paddlex/inference/pipelines/table_recognition/pipeline.py +26 -18
  112. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +624 -53
  113. paddlex/inference/pipelines/table_recognition/result.py +1 -1
  114. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +9 -5
  115. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +2 -2
  116. paddlex/inference/pipelines/ts_classification/pipeline.py +2 -2
  117. paddlex/inference/pipelines/ts_forecasting/pipeline.py +2 -2
  118. paddlex/inference/pipelines/video_classification/pipeline.py +2 -2
  119. paddlex/inference/pipelines/video_detection/pipeline.py +2 -2
  120. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +5 -1
  121. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +0 -1
  122. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +0 -1
  123. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +1 -1
  124. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +6 -2
  125. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +1 -5
  126. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -5
  127. paddlex/inference/serving/infra/utils.py +20 -22
  128. paddlex/inference/serving/schemas/formula_recognition.py +1 -1
  129. paddlex/inference/serving/schemas/layout_parsing.py +1 -2
  130. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -2
  131. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +2 -2
  132. paddlex/inference/serving/schemas/pp_structurev3.py +10 -6
  133. paddlex/inference/serving/schemas/seal_recognition.py +1 -1
  134. paddlex/inference/serving/schemas/table_recognition.py +2 -6
  135. paddlex/inference/serving/schemas/table_recognition_v2.py +5 -6
  136. paddlex/inference/utils/hpi.py +8 -1
  137. paddlex/inference/utils/hpi_model_info_collection.json +81 -2
  138. paddlex/inference/utils/io/readers.py +12 -12
  139. paddlex/inference/utils/mkldnn_blocklist.py +25 -0
  140. paddlex/inference/utils/official_models.py +14 -0
  141. paddlex/inference/utils/pp_option.py +29 -8
  142. paddlex/model.py +2 -2
  143. paddlex/modules/__init__.py +1 -1
  144. paddlex/modules/anomaly_detection/evaluator.py +2 -2
  145. paddlex/modules/base/__init__.py +1 -1
  146. paddlex/modules/base/evaluator.py +5 -5
  147. paddlex/modules/base/trainer.py +1 -1
  148. paddlex/modules/doc_vlm/dataset_checker.py +2 -2
  149. paddlex/modules/doc_vlm/evaluator.py +2 -2
  150. paddlex/modules/doc_vlm/exportor.py +2 -2
  151. paddlex/modules/doc_vlm/model_list.py +1 -1
  152. paddlex/modules/doc_vlm/trainer.py +2 -2
  153. paddlex/modules/face_recognition/evaluator.py +2 -2
  154. paddlex/modules/formula_recognition/evaluator.py +5 -2
  155. paddlex/modules/formula_recognition/model_list.py +3 -0
  156. paddlex/modules/formula_recognition/trainer.py +3 -0
  157. paddlex/modules/general_recognition/evaluator.py +1 -1
  158. paddlex/modules/image_classification/evaluator.py +2 -2
  159. paddlex/modules/image_classification/model_list.py +1 -0
  160. paddlex/modules/instance_segmentation/evaluator.py +1 -1
  161. paddlex/modules/keypoint_detection/evaluator.py +1 -1
  162. paddlex/modules/m_3d_bev_detection/evaluator.py +2 -2
  163. paddlex/modules/multilabel_classification/evaluator.py +2 -2
  164. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +4 -4
  165. paddlex/modules/object_detection/evaluator.py +2 -2
  166. paddlex/modules/object_detection/model_list.py +2 -0
  167. paddlex/modules/semantic_segmentation/evaluator.py +2 -2
  168. paddlex/modules/table_recognition/evaluator.py +2 -2
  169. paddlex/modules/text_detection/evaluator.py +2 -2
  170. paddlex/modules/text_detection/model_list.py +2 -0
  171. paddlex/modules/text_recognition/evaluator.py +2 -2
  172. paddlex/modules/text_recognition/model_list.py +2 -0
  173. paddlex/modules/ts_anomaly_detection/evaluator.py +2 -2
  174. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  175. paddlex/modules/ts_classification/evaluator.py +2 -2
  176. paddlex/modules/ts_forecast/evaluator.py +2 -2
  177. paddlex/modules/video_classification/evaluator.py +2 -2
  178. paddlex/modules/video_detection/evaluator.py +2 -2
  179. paddlex/ops/__init__.py +2 -2
  180. paddlex/paddlex_cli.py +19 -13
  181. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +2 -2
  182. paddlex/repo_apis/PaddleClas_api/cls/config.py +1 -1
  183. paddlex/repo_apis/PaddleClas_api/cls/model.py +1 -1
  184. paddlex/repo_apis/PaddleClas_api/cls/register.py +10 -0
  185. paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -1
  186. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +1 -1
  187. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -1
  188. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +1 -1
  189. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +1 -1
  190. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +25 -0
  191. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +30 -0
  192. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -1
  193. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +3 -3
  194. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +5 -9
  195. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +27 -0
  196. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -1
  197. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +1 -1
  198. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +1 -1
  199. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +1 -1
  200. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
  201. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +1 -1
  202. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +3 -3
  203. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +5 -9
  204. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +18 -0
  205. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -1
  206. paddlex/repo_apis/PaddleSeg_api/seg/model.py +1 -1
  207. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -1
  208. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +3 -3
  209. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -2
  210. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +4 -4
  211. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +1 -1
  212. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +1 -1
  213. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -1
  214. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +1 -1
  215. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +1 -1
  216. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -1
  217. paddlex/repo_apis/base/config.py +1 -1
  218. paddlex/repo_manager/core.py +3 -3
  219. paddlex/repo_manager/meta.py +6 -2
  220. paddlex/repo_manager/repo.py +17 -16
  221. paddlex/utils/custom_device_list.py +26 -2
  222. paddlex/utils/deps.py +1 -1
  223. paddlex/utils/device.py +15 -8
  224. paddlex/utils/env.py +4 -0
  225. paddlex/utils/flags.py +2 -4
  226. paddlex/utils/fonts/__init__.py +34 -4
  227. paddlex/utils/misc.py +1 -1
  228. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/METADATA +52 -56
  229. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/RECORD +233 -206
  230. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/WHEEL +1 -1
  231. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/entry_points.txt +0 -0
  232. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/licenses/LICENSE +0 -0
  233. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,563 @@
1
+ # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from copy import deepcopy
16
+ from typing import Dict, List, Tuple
17
+
18
+ import numpy as np
19
+
20
+ from ..result_v2 import LayoutParsingBlock, LayoutParsingRegion
21
+ from ..setting import BLOCK_LABEL_MAP
22
+ from ..utils import calculate_overlap_ratio, calculate_projection_overlap_ratio
23
+ from .utils import (
24
+ calculate_discontinuous_projection,
25
+ get_cut_blocks,
26
+ insert_child_blocks,
27
+ manhattan_insert,
28
+ projection_by_bboxes,
29
+ recursive_xy_cut,
30
+ recursive_yx_cut,
31
+ reference_insert,
32
+ shrink_overlapping_boxes,
33
+ sort_normal_blocks,
34
+ split_projection_profile,
35
+ update_doc_title_child_blocks,
36
+ update_paragraph_title_child_blocks,
37
+ update_vision_child_blocks,
38
+ weighted_distance_insert,
39
+ )
40
+
41
+
42
+ def pre_process(
43
+ region: LayoutParsingRegion,
44
+ ) -> List:
45
+ """
46
+ Preprocess the layout for sorting purposes.
47
+
48
+ This function performs two main tasks:
49
+ 1. Pre-cuts the layout to ensure the document is correctly partitioned and sorted.
50
+ 2. Match the blocks with their children.
51
+
52
+ Args:
53
+ region: LayoutParsingRegion, the layout region to be pre-processed.
54
+
55
+ Returns:
56
+ List: A list of pre-cutted layout blocks list.
57
+ """
58
+ mask_labels = [
59
+ "header",
60
+ "unordered",
61
+ "footer",
62
+ "vision_footnote",
63
+ "sub_paragraph_title",
64
+ "doc_title_text",
65
+ "vision_title",
66
+ ]
67
+ pre_cut_block_idxes = []
68
+ block_map = region.block_map
69
+ blocks: List[LayoutParsingBlock] = list(block_map.values())
70
+ for block in blocks:
71
+ if block.order_label not in mask_labels:
72
+ update_region_label(block, region)
73
+
74
+ block_direction = block.direction
75
+ if block_direction == "horizontal":
76
+ tolerance_len = block.long_side_length // 5
77
+ else:
78
+ tolerance_len = block.short_side_length // 10
79
+
80
+ block_center = (
81
+ block.bbox[region.direction_start_index]
82
+ + block.bbox[region.direction_end_index]
83
+ ) / 2
84
+ center_offset = abs(block_center - region.direction_center_coordinate)
85
+ is_centered = center_offset <= tolerance_len
86
+
87
+ if is_centered:
88
+ pre_cut_block_idxes.append(block.index)
89
+
90
+ pre_cut_list = []
91
+ cut_direction = region.secondary_direction
92
+ cut_coordinates = []
93
+ discontinuous = []
94
+ all_boxes = np.array(
95
+ [block.bbox for block in blocks if block.order_label not in mask_labels]
96
+ )
97
+ if len(all_boxes) == 0:
98
+ return pre_cut_list
99
+ if pre_cut_block_idxes:
100
+ discontinuous, num_list = calculate_discontinuous_projection(
101
+ all_boxes, direction=cut_direction, return_num=True
102
+ )
103
+ for idx in pre_cut_block_idxes:
104
+ block = block_map[idx]
105
+ if (
106
+ block.order_label not in mask_labels
107
+ and block.secondary_direction == cut_direction
108
+ ):
109
+ if (
110
+ block.secondary_direction_start_coordinate,
111
+ block.secondary_direction_end_coordinate,
112
+ ) in discontinuous:
113
+ idx = discontinuous.index(
114
+ (
115
+ block.secondary_direction_start_coordinate,
116
+ block.secondary_direction_end_coordinate,
117
+ )
118
+ )
119
+ if num_list[idx] == 1:
120
+ cut_coordinates.append(
121
+ block.secondary_direction_start_coordinate
122
+ )
123
+ cut_coordinates.append(block.secondary_direction_end_coordinate)
124
+ secondary_discontinuous = calculate_discontinuous_projection(
125
+ all_boxes, direction=region.direction
126
+ )
127
+ if len(secondary_discontinuous) == 1:
128
+ if not discontinuous:
129
+ discontinuous = calculate_discontinuous_projection(
130
+ all_boxes, direction=cut_direction
131
+ )
132
+ current_interval = discontinuous[0]
133
+ for interval in discontinuous[1:]:
134
+ gap_len = interval[0] - current_interval[1]
135
+ if gap_len >= region.text_line_height * 3:
136
+ cut_coordinates.append(current_interval[1])
137
+ elif gap_len > region.text_line_height * 1.2:
138
+ (pre_blocks, post_blocks) = get_cut_blocks(
139
+ list(block_map.values()), cut_direction, [current_interval[1]], []
140
+ )
141
+ pre_bboxes = np.array([block.bbox for block in pre_blocks])
142
+ post_bboxes = np.array([block.bbox for block in post_blocks])
143
+ projection_index = 1 if cut_direction == "horizontal" else 0
144
+ pre_projection = projection_by_bboxes(pre_bboxes, projection_index)
145
+ post_projection = projection_by_bboxes(post_bboxes, projection_index)
146
+ pre_projection_min = np.min(pre_projection)
147
+ post_projection_min = np.min(post_projection)
148
+ pre_projection_min += 5 if pre_projection_min != 0 else 0
149
+ post_projection_min += 5 if post_projection_min != 0 else 0
150
+ pre_intervals = split_projection_profile(
151
+ pre_projection, pre_projection_min, 1
152
+ )
153
+ post_intervals = split_projection_profile(
154
+ post_projection, post_projection_min, 1
155
+ )
156
+ pre_gap_boxes = []
157
+ if pre_intervals is not None:
158
+ for start, end in zip(*pre_intervals):
159
+ bbox = [0] * 4
160
+ bbox[projection_index] = start
161
+ bbox[projection_index + 2] = end
162
+ pre_gap_boxes.append(bbox)
163
+ post_gap_boxes = []
164
+ if post_intervals is not None:
165
+ for start, end in zip(*post_intervals):
166
+ bbox = [0] * 4
167
+ bbox[projection_index] = start
168
+ bbox[projection_index + 2] = end
169
+ post_gap_boxes.append(bbox)
170
+ max_gap_boxes_num = max(len(pre_gap_boxes), len(post_gap_boxes))
171
+ if max_gap_boxes_num > 0:
172
+ discontinuous_intervals = calculate_discontinuous_projection(
173
+ pre_gap_boxes + post_gap_boxes, direction=region.direction
174
+ )
175
+ if len(discontinuous_intervals) != max_gap_boxes_num:
176
+ cut_coordinates.append(current_interval[1])
177
+ current_interval = interval
178
+ cut_list = get_cut_blocks(blocks, cut_direction, cut_coordinates, mask_labels)
179
+ pre_cut_list.extend(cut_list)
180
+ if region.direction == "vertical":
181
+ pre_cut_list = pre_cut_list[::-1]
182
+
183
+ return pre_cut_list
184
+
185
+
186
+ def update_region_label(
187
+ block: LayoutParsingBlock,
188
+ region: LayoutParsingRegion,
189
+ ) -> None:
190
+ """
191
+ Update the region label of a block based on its label and match the block with its children.
192
+
193
+ Args:
194
+ blocks (List[LayoutParsingBlock]): The list of blocks to process.
195
+ config (Dict[str, Any]): The configuration dictionary containing the necessary information.
196
+ block_idx (int): The index of the current block being processed.
197
+
198
+ Returns:
199
+ None
200
+ """
201
+ if block.label in BLOCK_LABEL_MAP["header_labels"]:
202
+ block.order_label = "header"
203
+ elif block.label in BLOCK_LABEL_MAP["doc_title_labels"]:
204
+ block.order_label = "doc_title"
205
+ elif (
206
+ block.label in BLOCK_LABEL_MAP["paragraph_title_labels"]
207
+ and block.order_label is None
208
+ ):
209
+ block.order_label = "paragraph_title"
210
+ elif block.label in BLOCK_LABEL_MAP["vision_labels"]:
211
+ block.order_label = "vision"
212
+ block.num_of_lines = 1
213
+ block.direction = region.direction
214
+ block.update_direction_info()
215
+ elif block.label in BLOCK_LABEL_MAP["footer_labels"]:
216
+ block.order_label = "footer"
217
+ elif block.label in BLOCK_LABEL_MAP["unordered_labels"]:
218
+ block.order_label = "unordered"
219
+ else:
220
+ block.order_label = "normal_text"
221
+
222
+ # only vision and doc title block can have child block
223
+ if block.order_label not in ["vision", "doc_title", "paragraph_title"]:
224
+ return
225
+
226
+ # match doc title text block
227
+ if block.order_label == "doc_title":
228
+ update_doc_title_child_blocks(block, region)
229
+ # match sub title block
230
+ elif block.order_label == "paragraph_title":
231
+ update_paragraph_title_child_blocks(block, region)
232
+ # match vision title block and vision footnote block
233
+ elif block.order_label == "vision":
234
+ update_vision_child_blocks(block, region)
235
+
236
+
237
+ def get_layout_structure(
238
+ blocks: List[LayoutParsingBlock],
239
+ region_direction: str,
240
+ region_secondary_direction: str,
241
+ ) -> Tuple[List[Dict[str, any]], bool]:
242
+ """
243
+ Determine the layout cross column of blocks.
244
+
245
+ Args:
246
+ blocks (List[Dict[str, any]]): List of block dictionaries containing 'label' and 'block_bbox'.
247
+
248
+ Returns:
249
+ Tuple[List[Dict[str, any]], bool]: Updated list of blocks with layout information and a boolean
250
+ indicating if the cross layout area is greater than the single layout area.
251
+ """
252
+ blocks.sort(
253
+ key=lambda x: (x.bbox[0], x.width),
254
+ )
255
+
256
+ mask_labels = ["doc_title", "cross_layout", "cross_reference"]
257
+ for block_idx, block in enumerate(blocks):
258
+ if block.order_label in mask_labels:
259
+ continue
260
+
261
+ for ref_idx, ref_block in enumerate(blocks):
262
+ if block_idx == ref_idx or ref_block.order_label in mask_labels:
263
+ continue
264
+
265
+ bbox_iou = calculate_overlap_ratio(block.bbox, ref_block.bbox)
266
+ if bbox_iou > 0:
267
+ if ref_block.order_label == "vision":
268
+ ref_block.order_label = "cross_layout"
269
+ break
270
+ if block.order_label == "vision" or block.area < ref_block.area:
271
+ block.order_label = "cross_layout"
272
+ break
273
+
274
+ match_projection_iou = calculate_projection_overlap_ratio(
275
+ block.bbox,
276
+ ref_block.bbox,
277
+ region_direction,
278
+ )
279
+ if match_projection_iou > 0:
280
+ for second_ref_idx, second_ref_block in enumerate(blocks):
281
+ if (
282
+ second_ref_idx in [block_idx, ref_idx]
283
+ or second_ref_block.order_label in mask_labels
284
+ ):
285
+ continue
286
+
287
+ bbox_iou = calculate_overlap_ratio(
288
+ block.bbox, second_ref_block.bbox
289
+ )
290
+ if bbox_iou > 0.1:
291
+ if second_ref_block.order_label == "vision":
292
+ second_ref_block.order_label = "cross_layout"
293
+ break
294
+ if (
295
+ block.order_label == "vision"
296
+ or block.area < second_ref_block.area
297
+ ):
298
+ block.order_label = "cross_layout"
299
+ break
300
+
301
+ second_match_projection_iou = calculate_projection_overlap_ratio(
302
+ block.bbox,
303
+ second_ref_block.bbox,
304
+ region_direction,
305
+ )
306
+ ref_match_projection_iou = calculate_projection_overlap_ratio(
307
+ ref_block.bbox,
308
+ second_ref_block.bbox,
309
+ region_direction,
310
+ )
311
+ secondary_direction_ref_match_projection_overlap_ratio = (
312
+ calculate_projection_overlap_ratio(
313
+ ref_block.bbox,
314
+ second_ref_block.bbox,
315
+ region_secondary_direction,
316
+ )
317
+ )
318
+ if (
319
+ second_match_projection_iou > 0
320
+ and ref_match_projection_iou == 0
321
+ and secondary_direction_ref_match_projection_overlap_ratio > 0
322
+ ):
323
+ if block.order_label == "vision" or (
324
+ ref_block.order_label == "normal_text"
325
+ and second_ref_block.order_label == "normal_text"
326
+ and ref_block.text_line_width
327
+ > ref_block.text_line_height * 5
328
+ and second_ref_block.text_line_width
329
+ > second_ref_block.text_line_height * 5
330
+ ):
331
+ block.order_label = (
332
+ "cross_reference"
333
+ if block.label == "reference"
334
+ else "cross_layout"
335
+ )
336
+
337
+
338
+ def sort_by_xycut(
339
+ block_bboxes: List,
340
+ direction: str = "vertical",
341
+ min_gap: int = 1,
342
+ ) -> List[int]:
343
+ """
344
+ Sort bounding boxes using recursive XY cut method based on the specified direction.
345
+
346
+ Args:
347
+ block_bboxes (Union[np.ndarray, List[List[int]]]): An array or list of bounding boxes,
348
+ where each box is represented as
349
+ [x_min, y_min, x_max, y_max].
350
+ direction (int): direction for the initial cut. Use 1 for Y-axis first and 0 for X-axis first.
351
+ Defaults to 0.
352
+ min_gap (int): Minimum gap width to consider a separation between segments. Defaults to 1.
353
+
354
+ Returns:
355
+ List[int]: A list of indices representing the order of sorted bounding boxes.
356
+ """
357
+ block_bboxes = np.asarray(block_bboxes).astype(int)
358
+ res = []
359
+ if direction == "vertical":
360
+ recursive_yx_cut(
361
+ block_bboxes,
362
+ np.arange(len(block_bboxes)).tolist(),
363
+ res,
364
+ min_gap,
365
+ )
366
+ else:
367
+ recursive_xy_cut(
368
+ block_bboxes,
369
+ np.arange(len(block_bboxes)).tolist(),
370
+ res,
371
+ min_gap,
372
+ )
373
+ return res
374
+
375
+
376
+ def match_unsorted_blocks(
377
+ sorted_blocks: List[LayoutParsingBlock],
378
+ unsorted_blocks: List[LayoutParsingBlock],
379
+ region: LayoutParsingRegion,
380
+ ) -> List[LayoutParsingBlock]:
381
+ """
382
+ Match special blocks with the sorted blocks based on their region labels.
383
+ Args:
384
+ sorted_blocks (List[LayoutParsingBlock]): Sorted blocks to be matched.
385
+ unsorted_blocks (List[LayoutParsingBlock]): Unsorted blocks to be matched.
386
+ config (Dict): Configuration dictionary containing various parameters.
387
+ median_width (int): Median width value used for calculations.
388
+
389
+ Returns:
390
+ List[LayoutParsingBlock]: The updated sorted blocks after matching special blocks.
391
+ """
392
+ distance_type_map = {
393
+ "cross_layout": weighted_distance_insert,
394
+ "paragraph_title": weighted_distance_insert,
395
+ "doc_title": weighted_distance_insert,
396
+ "vision_title": weighted_distance_insert,
397
+ "vision": weighted_distance_insert,
398
+ "cross_reference": reference_insert,
399
+ "unordered": manhattan_insert,
400
+ "other": manhattan_insert,
401
+ }
402
+
403
+ unsorted_blocks = sort_normal_blocks(
404
+ unsorted_blocks,
405
+ region.text_line_height,
406
+ region.text_line_width,
407
+ region.direction,
408
+ )
409
+ for idx, block in enumerate(unsorted_blocks):
410
+ order_label = block.order_label
411
+ if idx == 0 and order_label == "doc_title":
412
+ sorted_blocks.insert(0, block)
413
+ continue
414
+ sorted_blocks = distance_type_map[order_label](block, sorted_blocks, region)
415
+ return sorted_blocks
416
+
417
+
418
+ def xycut_enhanced(
419
+ region: LayoutParsingRegion,
420
+ ) -> LayoutParsingRegion:
421
+ """
422
+ xycut_enhance function performs the following steps:
423
+ 1. Preprocess the input blocks by extracting headers, footers, and pre-cut blocks.
424
+ 2. Mask blocks that are crossing different blocks.
425
+ 3. Perform xycut_enhanced algorithm on the remaining blocks.
426
+ 4. Match unsorted blocks with the sorted blocks based on their order labels.
427
+ 5. Update child blocks of the sorted blocks based on their parent blocks.
428
+ 6. Return the ordered result list.
429
+
430
+ Args:
431
+ blocks (List[LayoutParsingBlock]): Input blocks to be processed.
432
+
433
+ Returns:
434
+ List[LayoutParsingBlock]: Ordered result list after processing.
435
+ """
436
+ if len(region.block_map) == 0:
437
+ return []
438
+
439
+ pre_cut_list: List[List[LayoutParsingBlock]] = pre_process(region)
440
+ final_order_res_list: List[LayoutParsingBlock] = []
441
+
442
+ header_blocks: List[LayoutParsingBlock] = [
443
+ region.block_map[idx] for idx in region.header_block_idxes
444
+ ]
445
+ unordered_blocks: List[LayoutParsingBlock] = [
446
+ region.block_map[idx] for idx in region.unordered_block_idxes
447
+ ]
448
+ footer_blocks: List[LayoutParsingBlock] = [
449
+ region.block_map[idx] for idx in region.footer_block_idxes
450
+ ]
451
+
452
+ header_blocks: List[LayoutParsingBlock] = sort_normal_blocks(
453
+ header_blocks, region.text_line_height, region.text_line_width, region.direction
454
+ )
455
+ footer_blocks: List[LayoutParsingBlock] = sort_normal_blocks(
456
+ footer_blocks, region.text_line_height, region.text_line_width, region.direction
457
+ )
458
+ unordered_blocks: List[LayoutParsingBlock] = sort_normal_blocks(
459
+ unordered_blocks,
460
+ region.text_line_height,
461
+ region.text_line_width,
462
+ region.direction,
463
+ )
464
+ final_order_res_list.extend(header_blocks)
465
+
466
+ unsorted_blocks: List[LayoutParsingBlock] = []
467
+ sorted_blocks_by_pre_cuts: List[LayoutParsingBlock] = []
468
+ for pre_cut_blocks in pre_cut_list:
469
+ sorted_blocks: List[LayoutParsingBlock] = []
470
+ doc_title_blocks: List[LayoutParsingBlock] = []
471
+ xy_cut_blocks: List[LayoutParsingBlock] = []
472
+
473
+ get_layout_structure(
474
+ pre_cut_blocks, region.direction, region.secondary_direction
475
+ )
476
+
477
+ # Get xy cut blocks and add other blocks in special_block_map
478
+ for block in pre_cut_blocks:
479
+ if block.order_label not in [
480
+ "cross_layout",
481
+ "cross_reference",
482
+ "doc_title",
483
+ "unordered",
484
+ ]:
485
+ xy_cut_blocks.append(block)
486
+ elif block.label == "doc_title":
487
+ doc_title_blocks.append(block)
488
+ else:
489
+ unsorted_blocks.append(block)
490
+
491
+ if len(xy_cut_blocks) > 0:
492
+ block_bboxes = np.array([block.bbox for block in xy_cut_blocks])
493
+ block_text_lines = [block.num_of_lines for block in xy_cut_blocks]
494
+ discontinuous = calculate_discontinuous_projection(
495
+ block_bboxes, direction=region.direction
496
+ )
497
+ if len(discontinuous) > 1:
498
+ xy_cut_blocks = [block for block in xy_cut_blocks]
499
+ blocks_to_sort = deepcopy(xy_cut_blocks)
500
+ if region.direction == "vertical":
501
+ for block in blocks_to_sort:
502
+ block.bbox = np.array(
503
+ [-block.bbox[0], block.bbox[1], -block.bbox[2], block.bbox[3]]
504
+ )
505
+ if len(discontinuous) == 1 or max(block_text_lines) == 1:
506
+ blocks_to_sort.sort(
507
+ key=lambda x: (
508
+ x.bbox[region.secondary_direction_start_index]
509
+ // (region.text_line_height // 2),
510
+ x.bbox[region.direction_start_index],
511
+ )
512
+ )
513
+ blocks_to_sort = shrink_overlapping_boxes(
514
+ blocks_to_sort, region.secondary_direction
515
+ )
516
+ block_bboxes = np.array([block.bbox for block in blocks_to_sort])
517
+ sorted_indexes = sort_by_xycut(
518
+ block_bboxes, direction=region.secondary_direction, min_gap=1
519
+ )
520
+ else:
521
+ blocks_to_sort.sort(
522
+ key=lambda x: (
523
+ x.bbox[region.direction_start_index]
524
+ // (region.text_line_width // 2),
525
+ x.bbox[region.secondary_direction_start_index],
526
+ )
527
+ )
528
+ blocks_to_sort = shrink_overlapping_boxes(
529
+ blocks_to_sort, region.direction
530
+ )
531
+ block_bboxes = np.array([block.bbox for block in blocks_to_sort])
532
+ sorted_indexes = sort_by_xycut(
533
+ block_bboxes, direction=region.direction, min_gap=1
534
+ )
535
+
536
+ sorted_blocks = [
537
+ region.block_map[blocks_to_sort[i].index] for i in sorted_indexes
538
+ ]
539
+
540
+ sorted_blocks = match_unsorted_blocks(
541
+ sorted_blocks,
542
+ doc_title_blocks,
543
+ region=region,
544
+ )
545
+
546
+ sorted_blocks_by_pre_cuts.extend(sorted_blocks)
547
+
548
+ final_sorted_blocks = match_unsorted_blocks(
549
+ sorted_blocks_by_pre_cuts,
550
+ unsorted_blocks,
551
+ region=region,
552
+ )
553
+
554
+ final_order_res_list.extend(final_sorted_blocks)
555
+ final_order_res_list.extend(footer_blocks)
556
+ final_order_res_list.extend(unordered_blocks)
557
+
558
+ for block_idx, block in enumerate(final_order_res_list):
559
+ final_order_res_list = insert_child_blocks(
560
+ block, block_idx, final_order_res_list
561
+ )
562
+ block = final_order_res_list[block_idx]
563
+ return final_order_res_list
@@ -45,9 +45,9 @@ class BEVDet3DPipeline(BasePipeline):
45
45
  device (str): The device to run the prediction on. Default is None.
46
46
  pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
47
47
  use_hpip (bool, optional): Whether to use the high-performance
48
- inference plugin (HPIP). Defaults to False.
48
+ inference plugin (HPIP) by default. Defaults to False.
49
49
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
50
- The high-performance inference configuration dictionary.
50
+ The default high-performance inference configuration dictionary.
51
51
  Defaults to None.
52
52
  """
53
53
  super().__init__(
@@ -45,9 +45,9 @@ class MultilingualSpeechRecognitionPipeline(BasePipeline):
45
45
  device (str): The device to run the prediction on. Default is None.
46
46
  pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
47
47
  use_hpip (bool, optional): Whether to use the high-performance
48
- inference plugin (HPIP). Defaults to False.
48
+ inference plugin (HPIP) by default. Defaults to False.
49
49
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
50
- The high-performance inference configuration dictionary.
50
+ The default high-performance inference configuration dictionary.
51
51
  Defaults to None.
52
52
  """
53
53
  super().__init__(
@@ -20,15 +20,13 @@ from ....utils.deps import pipeline_requires_extra
20
20
  from ...models.object_detection.result import DetResult
21
21
  from ...utils.hpi import HPIConfig
22
22
  from ...utils.pp_option import PaddlePredictorOption
23
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
23
24
  from ..base import BasePipeline
24
25
 
25
26
 
26
- @pipeline_requires_extra("cv")
27
- class ObjectDetectionPipeline(BasePipeline):
27
+ class _ObjectDetectionPipeline(BasePipeline):
28
28
  """Object Detection Pipeline"""
29
29
 
30
- entities = "object_detection"
31
-
32
30
  def __init__(
33
31
  self,
34
32
  config: Dict,
@@ -45,9 +43,9 @@ class ObjectDetectionPipeline(BasePipeline):
45
43
  device (str): The device to run the prediction on. Default is None.
46
44
  pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
47
45
  use_hpip (bool, optional): Whether to use the high-performance
48
- inference plugin (HPIP). Defaults to False.
46
+ inference plugin (HPIP) by default. Defaults to False.
49
47
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
50
- The high-performance inference configuration dictionary.
48
+ The default high-performance inference configuration dictionary.
51
49
  Defaults to None.
52
50
  """
53
51
  super().__init__(
@@ -103,3 +101,15 @@ class ObjectDetectionPipeline(BasePipeline):
103
101
  layout_merge_bboxes_mode=layout_merge_bboxes_mode,
104
102
  **kwargs,
105
103
  )
104
+
105
+
106
+ @pipeline_requires_extra("cv")
107
+ class ObjectDetectionPipeline(AutoParallelImageSimpleInferencePipeline):
108
+ entities = "object_detection"
109
+
110
+ @property
111
+ def _pipeline_cls(self):
112
+ return _ObjectDetectionPipeline
113
+
114
+ def _get_batch_size(self, config):
115
+ return config["SubModules"]["ObjectDetection"].get("batch_size", 1)