paddlex 3.0.0rc1__py3-none-any.whl → 3.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. paddlex/.version +1 -1
  2. paddlex/__init__.py +1 -1
  3. paddlex/configs/modules/chart_parsing/PP-Chart2Table.yaml +13 -0
  4. paddlex/configs/modules/doc_vlm/PP-DocBee2-3B.yaml +14 -0
  5. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-L.yaml +40 -0
  6. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-M.yaml +40 -0
  7. paddlex/configs/modules/formula_recognition/PP-FormulaNet_plus-S.yaml +40 -0
  8. paddlex/configs/modules/layout_detection/PP-DocBlockLayout.yaml +40 -0
  9. paddlex/configs/modules/layout_detection/PP-DocLayout-L.yaml +2 -2
  10. paddlex/configs/modules/layout_detection/PP-DocLayout-M.yaml +2 -2
  11. paddlex/configs/modules/layout_detection/PP-DocLayout-S.yaml +2 -2
  12. paddlex/configs/modules/layout_detection/PP-DocLayout_plus-L.yaml +40 -0
  13. paddlex/configs/modules/text_detection/PP-OCRv5_mobile_det.yaml +40 -0
  14. paddlex/configs/modules/text_detection/PP-OCRv5_server_det.yaml +40 -0
  15. paddlex/configs/modules/text_recognition/PP-OCRv5_mobile_rec.yaml +39 -0
  16. paddlex/configs/modules/text_recognition/PP-OCRv5_server_rec.yaml +39 -0
  17. paddlex/configs/modules/textline_orientation/PP-LCNet_x1_0_textline_ori.yaml +41 -0
  18. paddlex/configs/pipelines/OCR.yaml +7 -6
  19. paddlex/configs/pipelines/PP-ChatOCRv3-doc.yaml +3 -1
  20. paddlex/configs/pipelines/PP-ChatOCRv4-doc.yaml +91 -34
  21. paddlex/configs/pipelines/PP-StructureV3.yaml +72 -72
  22. paddlex/configs/pipelines/doc_understanding.yaml +1 -1
  23. paddlex/configs/pipelines/formula_recognition.yaml +2 -2
  24. paddlex/configs/pipelines/layout_parsing.yaml +3 -2
  25. paddlex/configs/pipelines/seal_recognition.yaml +1 -0
  26. paddlex/configs/pipelines/table_recognition.yaml +2 -1
  27. paddlex/configs/pipelines/table_recognition_v2.yaml +7 -1
  28. paddlex/hpip_links.html +20 -20
  29. paddlex/inference/common/batch_sampler/doc_vlm_batch_sampler.py +33 -10
  30. paddlex/inference/common/batch_sampler/image_batch_sampler.py +34 -25
  31. paddlex/inference/common/result/mixin.py +19 -12
  32. paddlex/inference/models/base/predictor/base_predictor.py +2 -8
  33. paddlex/inference/models/common/static_infer.py +29 -73
  34. paddlex/inference/models/common/tokenizer/__init__.py +2 -0
  35. paddlex/inference/models/common/tokenizer/clip_tokenizer.py +1 -1
  36. paddlex/inference/models/common/tokenizer/gpt_tokenizer.py +2 -2
  37. paddlex/inference/models/common/tokenizer/qwen2_5_tokenizer.py +112 -0
  38. paddlex/inference/models/common/tokenizer/qwen2_tokenizer.py +7 -1
  39. paddlex/inference/models/common/tokenizer/qwen_tokenizer.py +288 -0
  40. paddlex/inference/models/common/tokenizer/tokenizer_utils.py +13 -13
  41. paddlex/inference/models/common/tokenizer/tokenizer_utils_base.py +3 -3
  42. paddlex/inference/models/common/tokenizer/vocab.py +7 -7
  43. paddlex/inference/models/common/ts/funcs.py +19 -8
  44. paddlex/inference/models/common/vlm/conversion_utils.py +99 -0
  45. paddlex/inference/models/common/vlm/fusion_ops.py +205 -0
  46. paddlex/inference/models/common/vlm/generation/configuration_utils.py +1 -1
  47. paddlex/inference/models/common/vlm/generation/logits_process.py +1 -1
  48. paddlex/inference/models/common/vlm/generation/utils.py +1 -1
  49. paddlex/inference/models/common/vlm/transformers/configuration_utils.py +3 -3
  50. paddlex/inference/models/common/vlm/transformers/conversion_utils.py +3 -3
  51. paddlex/inference/models/common/vlm/transformers/model_outputs.py +2 -2
  52. paddlex/inference/models/common/vlm/transformers/model_utils.py +7 -31
  53. paddlex/inference/models/doc_vlm/modeling/GOT_ocr_2_0.py +830 -0
  54. paddlex/inference/models/doc_vlm/modeling/__init__.py +2 -0
  55. paddlex/inference/models/doc_vlm/modeling/qwen2.py +1606 -0
  56. paddlex/inference/models/doc_vlm/modeling/qwen2_5_vl.py +3006 -0
  57. paddlex/inference/models/doc_vlm/modeling/qwen2_vl.py +0 -105
  58. paddlex/inference/models/doc_vlm/predictor.py +79 -24
  59. paddlex/inference/models/doc_vlm/processors/GOT_ocr_2_0.py +97 -0
  60. paddlex/inference/models/doc_vlm/processors/__init__.py +2 -0
  61. paddlex/inference/models/doc_vlm/processors/common.py +189 -0
  62. paddlex/inference/models/doc_vlm/processors/qwen2_5_vl.py +548 -0
  63. paddlex/inference/models/doc_vlm/processors/qwen2_vl.py +21 -176
  64. paddlex/inference/models/formula_recognition/predictor.py +8 -2
  65. paddlex/inference/models/formula_recognition/processors.py +90 -77
  66. paddlex/inference/models/formula_recognition/result.py +28 -27
  67. paddlex/inference/models/image_feature/processors.py +3 -4
  68. paddlex/inference/models/keypoint_detection/predictor.py +3 -0
  69. paddlex/inference/models/object_detection/predictor.py +2 -0
  70. paddlex/inference/models/object_detection/processors.py +28 -3
  71. paddlex/inference/models/object_detection/utils.py +2 -0
  72. paddlex/inference/models/table_structure_recognition/result.py +0 -10
  73. paddlex/inference/models/text_detection/predictor.py +8 -0
  74. paddlex/inference/models/text_detection/processors.py +44 -10
  75. paddlex/inference/models/text_detection/result.py +0 -10
  76. paddlex/inference/models/text_recognition/result.py +1 -1
  77. paddlex/inference/pipelines/__init__.py +9 -5
  78. paddlex/inference/pipelines/_parallel.py +172 -0
  79. paddlex/inference/pipelines/anomaly_detection/pipeline.py +16 -6
  80. paddlex/inference/pipelines/attribute_recognition/pipeline.py +11 -1
  81. paddlex/inference/pipelines/base.py +14 -4
  82. paddlex/inference/pipelines/components/faisser.py +1 -1
  83. paddlex/inference/pipelines/doc_preprocessor/pipeline.py +53 -27
  84. paddlex/inference/pipelines/formula_recognition/pipeline.py +120 -82
  85. paddlex/inference/pipelines/formula_recognition/result.py +1 -11
  86. paddlex/inference/pipelines/image_classification/pipeline.py +16 -6
  87. paddlex/inference/pipelines/image_multilabel_classification/pipeline.py +16 -6
  88. paddlex/inference/pipelines/instance_segmentation/pipeline.py +16 -6
  89. paddlex/inference/pipelines/keypoint_detection/pipeline.py +16 -6
  90. paddlex/inference/pipelines/layout_parsing/layout_objects.py +859 -0
  91. paddlex/inference/pipelines/layout_parsing/pipeline.py +34 -47
  92. paddlex/inference/pipelines/layout_parsing/pipeline_v2.py +832 -260
  93. paddlex/inference/pipelines/layout_parsing/result.py +4 -17
  94. paddlex/inference/pipelines/layout_parsing/result_v2.py +259 -245
  95. paddlex/inference/pipelines/layout_parsing/setting.py +88 -0
  96. paddlex/inference/pipelines/layout_parsing/utils.py +391 -2028
  97. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/__init__.py +16 -0
  98. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/utils.py +1199 -0
  99. paddlex/inference/pipelines/layout_parsing/xycut_enhanced/xycuts.py +615 -0
  100. paddlex/inference/pipelines/m_3d_bev_detection/pipeline.py +2 -2
  101. paddlex/inference/pipelines/multilingual_speech_recognition/pipeline.py +2 -2
  102. paddlex/inference/pipelines/object_detection/pipeline.py +16 -6
  103. paddlex/inference/pipelines/ocr/pipeline.py +127 -70
  104. paddlex/inference/pipelines/ocr/result.py +21 -18
  105. paddlex/inference/pipelines/open_vocabulary_detection/pipeline.py +2 -2
  106. paddlex/inference/pipelines/open_vocabulary_segmentation/pipeline.py +2 -2
  107. paddlex/inference/pipelines/pp_chatocr/pipeline_base.py +2 -2
  108. paddlex/inference/pipelines/pp_chatocr/pipeline_v3.py +2 -5
  109. paddlex/inference/pipelines/pp_chatocr/pipeline_v4.py +6 -6
  110. paddlex/inference/pipelines/rotated_object_detection/pipeline.py +16 -6
  111. paddlex/inference/pipelines/seal_recognition/pipeline.py +109 -53
  112. paddlex/inference/pipelines/semantic_segmentation/pipeline.py +16 -6
  113. paddlex/inference/pipelines/small_object_detection/pipeline.py +16 -6
  114. paddlex/inference/pipelines/table_recognition/pipeline.py +26 -18
  115. paddlex/inference/pipelines/table_recognition/pipeline_v2.py +624 -53
  116. paddlex/inference/pipelines/table_recognition/result.py +1 -1
  117. paddlex/inference/pipelines/table_recognition/table_recognition_post_processing_v2.py +9 -5
  118. paddlex/inference/pipelines/ts_anomaly_detection/pipeline.py +2 -2
  119. paddlex/inference/pipelines/ts_classification/pipeline.py +2 -2
  120. paddlex/inference/pipelines/ts_forecasting/pipeline.py +2 -2
  121. paddlex/inference/pipelines/video_classification/pipeline.py +2 -2
  122. paddlex/inference/pipelines/video_detection/pipeline.py +2 -2
  123. paddlex/inference/serving/basic_serving/_app.py +46 -13
  124. paddlex/inference/serving/basic_serving/_pipeline_apps/_common/common.py +5 -1
  125. paddlex/inference/serving/basic_serving/_pipeline_apps/layout_parsing.py +0 -1
  126. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv3_doc.py +0 -1
  127. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_chatocrv4_doc.py +1 -1
  128. paddlex/inference/serving/basic_serving/_pipeline_apps/pp_structurev3.py +6 -2
  129. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition.py +1 -5
  130. paddlex/inference/serving/basic_serving/_pipeline_apps/table_recognition_v2.py +4 -5
  131. paddlex/inference/serving/infra/utils.py +20 -22
  132. paddlex/inference/serving/schemas/formula_recognition.py +1 -1
  133. paddlex/inference/serving/schemas/layout_parsing.py +1 -2
  134. paddlex/inference/serving/schemas/pp_chatocrv3_doc.py +1 -2
  135. paddlex/inference/serving/schemas/pp_chatocrv4_doc.py +2 -2
  136. paddlex/inference/serving/schemas/pp_structurev3.py +10 -6
  137. paddlex/inference/serving/schemas/seal_recognition.py +1 -1
  138. paddlex/inference/serving/schemas/table_recognition.py +2 -6
  139. paddlex/inference/serving/schemas/table_recognition_v2.py +5 -6
  140. paddlex/inference/utils/hpi.py +30 -16
  141. paddlex/inference/utils/hpi_model_info_collection.json +666 -162
  142. paddlex/inference/utils/io/readers.py +12 -12
  143. paddlex/inference/utils/misc.py +20 -0
  144. paddlex/inference/utils/mkldnn_blocklist.py +59 -0
  145. paddlex/inference/utils/official_models.py +140 -5
  146. paddlex/inference/utils/pp_option.py +74 -9
  147. paddlex/model.py +2 -2
  148. paddlex/modules/__init__.py +1 -1
  149. paddlex/modules/anomaly_detection/evaluator.py +2 -2
  150. paddlex/modules/base/__init__.py +1 -1
  151. paddlex/modules/base/evaluator.py +5 -5
  152. paddlex/modules/base/trainer.py +1 -1
  153. paddlex/modules/doc_vlm/dataset_checker.py +2 -2
  154. paddlex/modules/doc_vlm/evaluator.py +2 -2
  155. paddlex/modules/doc_vlm/exportor.py +2 -2
  156. paddlex/modules/doc_vlm/model_list.py +1 -1
  157. paddlex/modules/doc_vlm/trainer.py +2 -2
  158. paddlex/modules/face_recognition/evaluator.py +2 -2
  159. paddlex/modules/formula_recognition/evaluator.py +5 -2
  160. paddlex/modules/formula_recognition/model_list.py +3 -0
  161. paddlex/modules/formula_recognition/trainer.py +3 -0
  162. paddlex/modules/general_recognition/evaluator.py +1 -1
  163. paddlex/modules/image_classification/evaluator.py +2 -2
  164. paddlex/modules/image_classification/model_list.py +1 -0
  165. paddlex/modules/instance_segmentation/evaluator.py +1 -1
  166. paddlex/modules/keypoint_detection/evaluator.py +1 -1
  167. paddlex/modules/m_3d_bev_detection/evaluator.py +2 -2
  168. paddlex/modules/multilabel_classification/evaluator.py +2 -2
  169. paddlex/modules/object_detection/dataset_checker/dataset_src/convert_dataset.py +4 -4
  170. paddlex/modules/object_detection/evaluator.py +2 -2
  171. paddlex/modules/object_detection/model_list.py +2 -0
  172. paddlex/modules/semantic_segmentation/dataset_checker/__init__.py +12 -2
  173. paddlex/modules/semantic_segmentation/evaluator.py +2 -2
  174. paddlex/modules/table_recognition/evaluator.py +2 -2
  175. paddlex/modules/text_detection/evaluator.py +2 -2
  176. paddlex/modules/text_detection/model_list.py +2 -0
  177. paddlex/modules/text_recognition/evaluator.py +2 -2
  178. paddlex/modules/text_recognition/model_list.py +2 -0
  179. paddlex/modules/ts_anomaly_detection/evaluator.py +2 -2
  180. paddlex/modules/ts_classification/dataset_checker/dataset_src/split_dataset.py +1 -1
  181. paddlex/modules/ts_classification/evaluator.py +2 -2
  182. paddlex/modules/ts_forecast/evaluator.py +2 -2
  183. paddlex/modules/video_classification/evaluator.py +2 -2
  184. paddlex/modules/video_detection/evaluator.py +2 -2
  185. paddlex/ops/__init__.py +8 -5
  186. paddlex/paddlex_cli.py +19 -13
  187. paddlex/repo_apis/Paddle3D_api/bev_fusion/model.py +2 -2
  188. paddlex/repo_apis/PaddleClas_api/cls/config.py +1 -1
  189. paddlex/repo_apis/PaddleClas_api/cls/model.py +1 -1
  190. paddlex/repo_apis/PaddleClas_api/cls/register.py +10 -0
  191. paddlex/repo_apis/PaddleClas_api/cls/runner.py +1 -1
  192. paddlex/repo_apis/PaddleDetection_api/instance_seg/model.py +1 -1
  193. paddlex/repo_apis/PaddleDetection_api/instance_seg/runner.py +1 -1
  194. paddlex/repo_apis/PaddleDetection_api/object_det/config.py +1 -1
  195. paddlex/repo_apis/PaddleDetection_api/object_det/model.py +1 -1
  196. paddlex/repo_apis/PaddleDetection_api/object_det/official_categories.py +25 -0
  197. paddlex/repo_apis/PaddleDetection_api/object_det/register.py +30 -0
  198. paddlex/repo_apis/PaddleDetection_api/object_det/runner.py +1 -1
  199. paddlex/repo_apis/PaddleOCR_api/formula_rec/config.py +3 -3
  200. paddlex/repo_apis/PaddleOCR_api/formula_rec/model.py +5 -9
  201. paddlex/repo_apis/PaddleOCR_api/formula_rec/register.py +27 -0
  202. paddlex/repo_apis/PaddleOCR_api/formula_rec/runner.py +1 -1
  203. paddlex/repo_apis/PaddleOCR_api/table_rec/model.py +1 -1
  204. paddlex/repo_apis/PaddleOCR_api/table_rec/runner.py +1 -1
  205. paddlex/repo_apis/PaddleOCR_api/text_det/model.py +1 -1
  206. paddlex/repo_apis/PaddleOCR_api/text_det/register.py +18 -0
  207. paddlex/repo_apis/PaddleOCR_api/text_det/runner.py +1 -1
  208. paddlex/repo_apis/PaddleOCR_api/text_rec/config.py +3 -3
  209. paddlex/repo_apis/PaddleOCR_api/text_rec/model.py +5 -9
  210. paddlex/repo_apis/PaddleOCR_api/text_rec/register.py +18 -0
  211. paddlex/repo_apis/PaddleOCR_api/text_rec/runner.py +1 -1
  212. paddlex/repo_apis/PaddleSeg_api/seg/model.py +1 -1
  213. paddlex/repo_apis/PaddleSeg_api/seg/runner.py +1 -1
  214. paddlex/repo_apis/PaddleTS_api/ts_ad/config.py +3 -3
  215. paddlex/repo_apis/PaddleTS_api/ts_cls/config.py +2 -2
  216. paddlex/repo_apis/PaddleTS_api/ts_fc/config.py +4 -4
  217. paddlex/repo_apis/PaddleVideo_api/video_cls/config.py +1 -1
  218. paddlex/repo_apis/PaddleVideo_api/video_cls/model.py +1 -1
  219. paddlex/repo_apis/PaddleVideo_api/video_cls/runner.py +1 -1
  220. paddlex/repo_apis/PaddleVideo_api/video_det/config.py +1 -1
  221. paddlex/repo_apis/PaddleVideo_api/video_det/model.py +1 -1
  222. paddlex/repo_apis/PaddleVideo_api/video_det/runner.py +1 -1
  223. paddlex/repo_apis/base/config.py +1 -1
  224. paddlex/repo_manager/core.py +3 -3
  225. paddlex/repo_manager/meta.py +6 -2
  226. paddlex/repo_manager/repo.py +17 -16
  227. paddlex/utils/custom_device_list.py +26 -2
  228. paddlex/utils/deps.py +3 -3
  229. paddlex/utils/device.py +5 -13
  230. paddlex/utils/env.py +4 -0
  231. paddlex/utils/flags.py +11 -4
  232. paddlex/utils/fonts/__init__.py +34 -4
  233. paddlex/utils/misc.py +1 -1
  234. paddlex/utils/subclass_register.py +2 -2
  235. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/METADATA +349 -208
  236. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/RECORD +240 -211
  237. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/WHEEL +1 -1
  238. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/entry_points.txt +1 -0
  239. {paddlex-3.0.0rc1.dist-info/licenses → paddlex-3.0.2.dist-info}/LICENSE +0 -0
  240. {paddlex-3.0.0rc1.dist-info → paddlex-3.0.2.dist-info}/top_level.txt +0 -0
@@ -22,6 +22,7 @@ from ...common.batch_sampler import ImageBatchSampler
22
22
  from ...common.reader import ReadImage
23
23
  from ...utils.hpi import HPIConfig
24
24
  from ...utils.pp_option import PaddlePredictorOption
25
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
25
26
  from ..base import BasePipeline
26
27
  from ..components import (
27
28
  CropByPolys,
@@ -33,12 +34,9 @@ from ..components import (
33
34
  from .result import OCRResult
34
35
 
35
36
 
36
- @pipeline_requires_extra("ocr")
37
- class OCRPipeline(BasePipeline):
37
+ class _OCRPipeline(BasePipeline):
38
38
  """OCR Pipeline"""
39
39
 
40
- entities = "OCR"
41
-
42
40
  def __init__(
43
41
  self,
44
42
  config: Dict,
@@ -55,9 +53,9 @@ class OCRPipeline(BasePipeline):
55
53
  device (str, optional): Device to run the predictions on. Defaults to None.
56
54
  pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
57
55
  use_hpip (bool, optional): Whether to use the high-performance
58
- inference plugin (HPIP). Defaults to False.
56
+ inference plugin (HPIP) by default. Defaults to False.
59
57
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
60
- The high-performance inference configuration dictionary.
58
+ The default high-performance inference configuration dictionary.
61
59
  Defaults to None.
62
60
  """
63
61
  super().__init__(
@@ -93,6 +91,7 @@ class OCRPipeline(BasePipeline):
93
91
  if self.text_type == "general":
94
92
  self.text_det_limit_side_len = text_det_config.get("limit_side_len", 960)
95
93
  self.text_det_limit_type = text_det_config.get("limit_type", "max")
94
+ self.text_det_max_side_limit = text_det_config.get("max_side_limit", 4000)
96
95
  self.text_det_thresh = text_det_config.get("thresh", 0.3)
97
96
  self.text_det_box_thresh = text_det_config.get("box_thresh", 0.6)
98
97
  self.input_shape = text_det_config.get("input_shape", None)
@@ -102,6 +101,7 @@ class OCRPipeline(BasePipeline):
102
101
  elif self.text_type == "seal":
103
102
  self.text_det_limit_side_len = text_det_config.get("limit_side_len", 736)
104
103
  self.text_det_limit_type = text_det_config.get("limit_type", "min")
104
+ self.text_det_max_side_limit = text_det_config.get("max_side_limit", 4000)
105
105
  self.text_det_thresh = text_det_config.get("thresh", 0.2)
106
106
  self.text_det_box_thresh = text_det_config.get("box_thresh", 0.6)
107
107
  self.text_det_unclip_ratio = text_det_config.get("unclip_ratio", 0.5)
@@ -115,6 +115,7 @@ class OCRPipeline(BasePipeline):
115
115
  text_det_config,
116
116
  limit_side_len=self.text_det_limit_side_len,
117
117
  limit_type=self.text_det_limit_type,
118
+ max_side_limit=self.text_det_max_side_limit,
118
119
  thresh=self.text_det_thresh,
119
120
  box_thresh=self.text_det_box_thresh,
120
121
  unclip_ratio=self.text_det_unclip_ratio,
@@ -131,7 +132,7 @@ class OCRPipeline(BasePipeline):
131
132
  text_rec_config, input_shape=self.input_shape
132
133
  )
133
134
 
134
- self.batch_sampler = ImageBatchSampler(batch_size=1)
135
+ self.batch_sampler = ImageBatchSampler(batch_size=config.get("batch_size", 1))
135
136
  self.img_reader = ReadImage(format="BGR")
136
137
 
137
138
  def rotate_image(
@@ -234,6 +235,7 @@ class OCRPipeline(BasePipeline):
234
235
  self,
235
236
  text_det_limit_side_len: Optional[int] = None,
236
237
  text_det_limit_type: Optional[str] = None,
238
+ text_det_max_side_limit: Optional[int] = None,
237
239
  text_det_thresh: Optional[float] = None,
238
240
  text_det_box_thresh: Optional[float] = None,
239
241
  text_det_unclip_ratio: Optional[float] = None,
@@ -246,6 +248,7 @@ class OCRPipeline(BasePipeline):
246
248
  Args:
247
249
  text_det_limit_side_len (Optional[int]): The maximum side length of the text box.
248
250
  text_det_limit_type (Optional[str]): The type of limit to apply to the text box.
251
+ text_det_max_side_limit (Optional[int]): The maximum side length of the text box.
249
252
  text_det_thresh (Optional[float]): The threshold for text detection.
250
253
  text_det_box_thresh (Optional[float]): The threshold for the bounding box.
251
254
  text_det_unclip_ratio (Optional[float]): The ratio for unclipping the text box.
@@ -257,6 +260,8 @@ class OCRPipeline(BasePipeline):
257
260
  text_det_limit_side_len = self.text_det_limit_side_len
258
261
  if text_det_limit_type is None:
259
262
  text_det_limit_type = self.text_det_limit_type
263
+ if text_det_max_side_limit is None:
264
+ text_det_max_side_limit = self.text_det_max_side_limit
260
265
  if text_det_thresh is None:
261
266
  text_det_thresh = self.text_det_thresh
262
267
  if text_det_box_thresh is None:
@@ -267,6 +272,7 @@ class OCRPipeline(BasePipeline):
267
272
  limit_side_len=text_det_limit_side_len,
268
273
  limit_type=text_det_limit_type,
269
274
  thresh=text_det_thresh,
275
+ max_side_limit=text_det_max_side_limit,
270
276
  box_thresh=text_det_box_thresh,
271
277
  unclip_ratio=text_det_unclip_ratio,
272
278
  )
@@ -279,6 +285,7 @@ class OCRPipeline(BasePipeline):
279
285
  use_textline_orientation: Optional[bool] = None,
280
286
  text_det_limit_side_len: Optional[int] = None,
281
287
  text_det_limit_type: Optional[str] = None,
288
+ text_det_max_side_limit: Optional[int] = None,
282
289
  text_det_thresh: Optional[float] = None,
283
290
  text_det_box_thresh: Optional[float] = None,
284
291
  text_det_unclip_ratio: Optional[float] = None,
@@ -294,6 +301,7 @@ class OCRPipeline(BasePipeline):
294
301
  use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
295
302
  text_det_limit_side_len (Optional[int]): Maximum side length for text detection.
296
303
  text_det_limit_type (Optional[str]): Type of limit to apply for text detection.
304
+ text_det_max_side_limit (Optional[int]): Maximum side length for text detection.
297
305
  text_det_thresh (Optional[float]): Threshold for text detection.
298
306
  text_det_box_thresh (Optional[float]): Threshold for text detection boxes.
299
307
  text_det_unclip_ratio (Optional[float]): Ratio for unclipping text detection boxes.
@@ -312,6 +320,7 @@ class OCRPipeline(BasePipeline):
312
320
  text_det_params = self.get_text_det_params(
313
321
  text_det_limit_side_len,
314
322
  text_det_limit_type,
323
+ text_det_max_side_limit,
315
324
  text_det_thresh,
316
325
  text_det_box_thresh,
317
326
  text_det_unclip_ratio,
@@ -320,87 +329,135 @@ class OCRPipeline(BasePipeline):
320
329
  if text_rec_score_thresh is None:
321
330
  text_rec_score_thresh = self.text_rec_score_thresh
322
331
 
323
- for img_id, batch_data in enumerate(self.batch_sampler(input)):
324
- image_array = self.img_reader(batch_data.instances)[0]
332
+ for _, batch_data in enumerate(self.batch_sampler(input)):
333
+ image_arrays = self.img_reader(batch_data.instances)
325
334
 
326
335
  if model_settings["use_doc_preprocessor"]:
327
- doc_preprocessor_res = next(
336
+ doc_preprocessor_results = list(
328
337
  self.doc_preprocessor_pipeline(
329
- image_array,
338
+ image_arrays,
330
339
  use_doc_orientation_classify=use_doc_orientation_classify,
331
340
  use_doc_unwarping=use_doc_unwarping,
332
341
  )
333
342
  )
334
343
  else:
335
- doc_preprocessor_res = {"output_img": image_array}
344
+ doc_preprocessor_results = [{"output_img": arr} for arr in image_arrays]
336
345
 
337
- doc_preprocessor_image = doc_preprocessor_res["output_img"]
346
+ doc_preprocessor_images = [
347
+ item["output_img"] for item in doc_preprocessor_results
348
+ ]
338
349
 
339
- det_res = next(
340
- self.text_det_model(doc_preprocessor_image, **text_det_params)
350
+ det_results = list(
351
+ self.text_det_model(doc_preprocessor_images, **text_det_params)
341
352
  )
342
353
 
343
- dt_polys = det_res["dt_polys"]
344
- det_res["dt_scores"]
345
-
346
- dt_polys = self._sort_boxes(dt_polys)
347
-
348
- single_img_res = {
349
- "input_path": batch_data.input_paths[0],
350
- "page_index": batch_data.page_indexes[0],
351
- "doc_preprocessor_res": doc_preprocessor_res,
352
- "dt_polys": dt_polys,
353
- "model_settings": model_settings,
354
- "text_det_params": text_det_params,
355
- "text_type": self.text_type,
356
- "text_rec_score_thresh": text_rec_score_thresh,
357
- }
358
-
359
- single_img_res["rec_texts"] = []
360
- single_img_res["rec_scores"] = []
361
- single_img_res["rec_polys"] = []
362
- if len(dt_polys) > 0:
363
- all_subs_of_img = list(
364
- self._crop_by_polys(doc_preprocessor_image, dt_polys)
354
+ dt_polys_list = [item["dt_polys"] for item in det_results]
355
+
356
+ dt_polys_list = [self._sort_boxes(item) for item in dt_polys_list]
357
+
358
+ results = [
359
+ {
360
+ "input_path": input_path,
361
+ "page_index": page_index,
362
+ "doc_preprocessor_res": doc_preprocessor_res,
363
+ "dt_polys": dt_polys,
364
+ "model_settings": model_settings,
365
+ "text_det_params": text_det_params,
366
+ "text_type": self.text_type,
367
+ "text_rec_score_thresh": text_rec_score_thresh,
368
+ "rec_texts": [],
369
+ "rec_scores": [],
370
+ "rec_polys": [],
371
+ }
372
+ for input_path, page_index, doc_preprocessor_res, dt_polys in zip(
373
+ batch_data.input_paths,
374
+ batch_data.page_indexes,
375
+ doc_preprocessor_results,
376
+ dt_polys_list,
365
377
  )
378
+ ]
379
+
380
+ indices = list(range(len(doc_preprocessor_images)))
381
+ indices = [idx for idx in indices if len(dt_polys_list[idx]) > 0]
382
+
383
+ if indices:
384
+ all_subs_of_imgs = []
385
+ chunk_indices = [0]
386
+ for idx in indices:
387
+ all_subs_of_img = list(
388
+ self._crop_by_polys(
389
+ doc_preprocessor_images[idx], dt_polys_list[idx]
390
+ )
391
+ )
392
+ all_subs_of_imgs.extend(all_subs_of_img)
393
+ chunk_indices.append(chunk_indices[-1] + len(all_subs_of_img))
394
+
366
395
  # use textline orientation model
367
396
  if model_settings["use_textline_orientation"]:
368
397
  angles = [
369
398
  int(textline_angle_info["class_ids"][0])
370
399
  for textline_angle_info in self.textline_orientation_model(
371
- all_subs_of_img
400
+ all_subs_of_imgs
372
401
  )
373
402
  ]
374
- all_subs_of_img = self.rotate_image(all_subs_of_img, angles)
403
+ all_subs_of_imgs = self.rotate_image(all_subs_of_imgs, angles)
375
404
  else:
376
- angles = [-1] * len(all_subs_of_img)
377
- single_img_res["textline_orientation_angles"] = angles
378
-
379
- sub_img_info_list = [
380
- {
381
- "sub_img_id": img_id,
382
- "sub_img_ratio": sub_img.shape[1] / float(sub_img.shape[0]),
383
- }
384
- for img_id, sub_img in enumerate(all_subs_of_img)
385
- ]
386
- sorted_subs_info = sorted(
387
- sub_img_info_list, key=lambda x: x["sub_img_ratio"]
388
- )
389
- sorted_subs_of_img = [
390
- all_subs_of_img[x["sub_img_id"]] for x in sorted_subs_info
391
- ]
392
- for idx, rec_res in enumerate(self.text_rec_model(sorted_subs_of_img)):
393
- sub_img_id = sorted_subs_info[idx]["sub_img_id"]
394
- sub_img_info_list[sub_img_id]["rec_res"] = rec_res
395
- for sno in range(len(sub_img_info_list)):
396
- rec_res = sub_img_info_list[sno]["rec_res"]
397
- if rec_res["rec_score"] >= text_rec_score_thresh:
398
- single_img_res["rec_texts"].append(rec_res["rec_text"])
399
- single_img_res["rec_scores"].append(rec_res["rec_score"])
400
- single_img_res["rec_polys"].append(dt_polys[sno])
401
- if self.text_type == "general":
402
- rec_boxes = convert_points_to_boxes(single_img_res["rec_polys"])
403
- single_img_res["rec_boxes"] = rec_boxes
404
- else:
405
- single_img_res["rec_boxes"] = np.array([])
406
- yield OCRResult(single_img_res)
405
+ angles = [-1] * len(all_subs_of_imgs)
406
+ for i, idx in enumerate(indices):
407
+ res = results[idx]
408
+ res["textline_orientation_angles"] = angles[
409
+ chunk_indices[i] : chunk_indices[i + 1]
410
+ ]
411
+
412
+ # TODO: Process all sub-images in the batch together
413
+ for i, idx in enumerate(indices):
414
+ all_subs_of_img = all_subs_of_imgs[
415
+ chunk_indices[i] : chunk_indices[i + 1]
416
+ ]
417
+ res = results[idx]
418
+ dt_polys = dt_polys_list[idx]
419
+ sub_img_info_list = [
420
+ {
421
+ "sub_img_id": img_id,
422
+ "sub_img_ratio": sub_img.shape[1] / float(sub_img.shape[0]),
423
+ }
424
+ for img_id, sub_img in enumerate(all_subs_of_img)
425
+ ]
426
+ sorted_subs_info = sorted(
427
+ sub_img_info_list, key=lambda x: x["sub_img_ratio"]
428
+ )
429
+ sorted_subs_of_img = [
430
+ all_subs_of_img[x["sub_img_id"]] for x in sorted_subs_info
431
+ ]
432
+ for i, rec_res in enumerate(
433
+ self.text_rec_model(sorted_subs_of_img)
434
+ ):
435
+ sub_img_id = sorted_subs_info[i]["sub_img_id"]
436
+ sub_img_info_list[sub_img_id]["rec_res"] = rec_res
437
+ for sno in range(len(sub_img_info_list)):
438
+ rec_res = sub_img_info_list[sno]["rec_res"]
439
+ if rec_res["rec_score"] >= text_rec_score_thresh:
440
+ res["rec_texts"].append(rec_res["rec_text"])
441
+ res["rec_scores"].append(rec_res["rec_score"])
442
+ res["rec_polys"].append(dt_polys[sno])
443
+
444
+ for res in results:
445
+ if self.text_type == "general":
446
+ rec_boxes = convert_points_to_boxes(res["rec_polys"])
447
+ res["rec_boxes"] = rec_boxes
448
+ else:
449
+ res["rec_boxes"] = np.array([])
450
+
451
+ yield OCRResult(res)
452
+
453
+
454
+ @pipeline_requires_extra("ocr")
455
+ class OCRPipeline(AutoParallelImageSimpleInferencePipeline):
456
+ entities = "OCR"
457
+
458
+ @property
459
+ def _pipeline_cls(self):
460
+ return _OCRPipeline
461
+
462
+ def _get_batch_size(self, config):
463
+ return config.get("batch_size", 1)
@@ -14,14 +14,13 @@
14
14
 
15
15
  import math
16
16
  import random
17
- from pathlib import Path
18
17
  from typing import Dict
19
18
 
20
19
  import numpy as np
21
20
  from PIL import Image, ImageDraw
22
21
 
23
22
  from ....utils.deps import class_requires_deps, function_requires_deps, is_dep_available
24
- from ....utils.fonts import SIMFANG_FONT_FILE_PATH, create_font
23
+ from ....utils.fonts import SIMFANG_FONT_FILE_PATH, create_font, create_font_vertical
25
24
  from ...common.result import BaseCVResult, JsonMixin
26
25
 
27
26
  if is_dep_available("opencv-contrib-python"):
@@ -32,15 +31,6 @@ if is_dep_available("opencv-contrib-python"):
32
31
  class OCRResult(BaseCVResult):
33
32
  """OCR result"""
34
33
 
35
- def _get_input_fn(self):
36
- fn = super()._get_input_fn()
37
- if (page_idx := self["page_index"]) is not None:
38
- fp = Path(fn)
39
- stem, suffix = fp.stem, fp.suffix
40
- return f"{stem}_{page_idx}{suffix}"
41
- else:
42
- return fn
43
-
44
34
  def get_minarea_rect(self, points: np.ndarray) -> np.ndarray:
45
35
  """
46
36
  Get the minimum area rectangle for the given points using OpenCV.
@@ -106,7 +96,9 @@ class OCRResult(BaseCVResult):
106
96
  height = int(0.5 * (max(box[:, 1]) - min(box[:, 1])))
107
97
  box[:2, 1] = np.mean(box[:, 1])
108
98
  box[2:, 1] = np.mean(box[:, 1]) + min(20, height)
109
- draw_left.polygon(box, fill=color)
99
+ box_pts = [(int(x), int(y)) for x, y in box.tolist()]
100
+ draw_left.polygon(box_pts, fill=color)
101
+
110
102
  img_right_text = draw_box_txt_fine(
111
103
  (w, h), box, txt, SIMFANG_FONT_FILE_PATH
112
104
  )
@@ -214,19 +206,20 @@ def draw_box_txt_fine(
214
206
  np.ndarray: An image with the text drawn in the specified box.
215
207
  """
216
208
  box_height = int(
217
- math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2)
209
+ math.sqrt(float(box[0][0] - box[3][0]) ** 2 + float(box[0][1] - box[3][1]) ** 2)
218
210
  )
219
211
  box_width = int(
220
- math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2)
212
+ math.sqrt(float(box[0][0] - box[1][0]) ** 2 + float(box[0][1] - box[1][1]) ** 2)
221
213
  )
222
214
 
223
215
  if box_height > 2 * box_width and box_height > 30:
224
- img_text = Image.new("RGB", (box_height, box_width), (255, 255, 255))
216
+ img_text = Image.new("RGB", (box_width, box_height), (255, 255, 255))
225
217
  draw_text = ImageDraw.Draw(img_text)
226
218
  if txt:
227
- font = create_font(txt, (box_height, box_width), font_path)
228
- draw_text.text([0, 0], txt, fill=(0, 0, 0), font=font)
229
- img_text = img_text.transpose(Image.ROTATE_270)
219
+ font = create_font_vertical(txt, (box_width, box_height), font_path)
220
+ draw_vertical_text(
221
+ draw_text, (0, 0), txt, font, fill=(0, 0, 0), line_spacing=2
222
+ )
230
223
  else:
231
224
  img_text = Image.new("RGB", (box_width, box_height), (255, 255, 255))
232
225
  draw_text = ImageDraw.Draw(img_text)
@@ -250,3 +243,13 @@ def draw_box_txt_fine(
250
243
  borderValue=(255, 255, 255),
251
244
  )
252
245
  return img_right_text
246
+
247
+
248
+ @function_requires_deps("opencv-contrib-python")
249
+ def draw_vertical_text(draw, position, text, font, fill=(0, 0, 0), line_spacing=2):
250
+ x, y = position
251
+ for char in text:
252
+ draw.text((x, y), char, font=font, fill=fill)
253
+ bbox = font.getbbox(char)
254
+ char_height = bbox[3] - bbox[1]
255
+ y += char_height + line_spacing
@@ -45,9 +45,9 @@ class OpenVocabularyDetectionPipeline(BasePipeline):
45
45
  device (str): The device to run the prediction on. Default is None.
46
46
  pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
47
47
  use_hpip (bool, optional): Whether to use the high-performance
48
- inference plugin (HPIP). Defaults to False.
48
+ inference plugin (HPIP) by default. Defaults to False.
49
49
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
50
- The high-performance inference configuration dictionary.
50
+ The default high-performance inference configuration dictionary.
51
51
  Defaults to None.
52
52
  """
53
53
  super().__init__(
@@ -47,9 +47,9 @@ class OpenVocabularySegmentationPipeline(BasePipeline):
47
47
  device (str): The device to run the prediction on. Default is None.
48
48
  pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
49
49
  use_hpip (bool, optional): Whether to use the high-performance
50
- inference plugin (HPIP). Defaults to False.
50
+ inference plugin (HPIP) by default. Defaults to False.
51
51
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
52
- The high-performance inference configuration dictionary.
52
+ The default high-performance inference configuration dictionary.
53
53
  Defaults to None.
54
54
  """
55
55
  super().__init__(
@@ -37,9 +37,9 @@ class PP_ChatOCR_Pipeline(BasePipeline):
37
37
  device (str, optional): Device to run the predictions on. Defaults to None.
38
38
  pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
39
39
  use_hpip (bool, optional): Whether to use the high-performance
40
- inference plugin (HPIP). Defaults to False.
40
+ inference plugin (HPIP) by default. Defaults to False.
41
41
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
42
- The high-performance inference configuration dictionary.
42
+ The default high-performance inference configuration dictionary.
43
43
  Defaults to None.
44
44
  """
45
45
 
@@ -54,9 +54,9 @@ class PP_ChatOCRv3_Pipeline(PP_ChatOCR_Pipeline):
54
54
  device (str, optional): Device to run the predictions on. Defaults to None.
55
55
  pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
56
56
  use_hpip (bool, optional): Whether to use the high-performance
57
- inference plugin (HPIP). Defaults to False.
57
+ inference plugin (HPIP) by default. Defaults to False.
58
58
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
59
- The high-performance inference configuration dictionary.
59
+ The default high-performance inference configuration dictionary.
60
60
  Defaults to None.
61
61
  initial_predictor (bool, optional): Whether to initialize the predictor. Defaults to True.
62
62
  """
@@ -206,7 +206,6 @@ class PP_ChatOCRv3_Pipeline(PP_ChatOCR_Pipeline):
206
206
  input: Union[str, List[str], np.ndarray, List[np.ndarray]],
207
207
  use_doc_orientation_classify: Optional[bool] = None,
208
208
  use_doc_unwarping: Optional[bool] = None,
209
- use_general_ocr: Optional[bool] = None,
210
209
  use_seal_recognition: Optional[bool] = None,
211
210
  use_table_recognition: Optional[bool] = None,
212
211
  layout_threshold: Optional[Union[float, dict]] = None,
@@ -237,7 +236,6 @@ class PP_ChatOCRv3_Pipeline(PP_ChatOCR_Pipeline):
237
236
  numpy array of an image, or list of numpy arrays.
238
237
  use_doc_orientation_classify (bool): Flag to use document orientation classification.
239
238
  use_doc_unwarping (bool): Flag to use document unwarping.
240
- use_general_ocr (bool): Flag to use general OCR.
241
239
  use_seal_recognition (bool): Flag to use seal recognition.
242
240
  use_table_recognition (bool): Flag to use table recognition.
243
241
  layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
@@ -280,7 +278,6 @@ class PP_ChatOCRv3_Pipeline(PP_ChatOCR_Pipeline):
280
278
  input,
281
279
  use_doc_orientation_classify=use_doc_orientation_classify,
282
280
  use_doc_unwarping=use_doc_unwarping,
283
- use_general_ocr=use_general_ocr,
284
281
  use_seal_recognition=use_seal_recognition,
285
282
  use_table_recognition=use_table_recognition,
286
283
  layout_threshold=layout_threshold,
@@ -62,9 +62,9 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
62
62
  device (str, optional): Device to run the predictions on. Defaults to None.
63
63
  pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
64
64
  use_hpip (bool, optional): Whether to use the high-performance
65
- inference plugin (HPIP). Defaults to False.
65
+ inference plugin (HPIP) by default. Defaults to False.
66
66
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
67
- The high-performance inference configuration dictionary.
67
+ The default high-performance inference configuration dictionary.
68
68
  Defaults to None.
69
69
  initial_predictor (bool, optional): Whether to initialize the predictor. Defaults to True.
70
70
  """
@@ -249,7 +249,7 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
249
249
  input: Union[str, List[str], np.ndarray, List[np.ndarray]],
250
250
  use_doc_orientation_classify: Optional[bool] = None,
251
251
  use_doc_unwarping: Optional[bool] = None,
252
- use_general_ocr: Optional[bool] = None,
252
+ use_textline_orientation: Optional[bool] = None,
253
253
  use_seal_recognition: Optional[bool] = None,
254
254
  use_table_recognition: Optional[bool] = None,
255
255
  layout_threshold: Optional[Union[float, dict]] = None,
@@ -280,7 +280,7 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
280
280
  numpy array of an image, or list of numpy arrays.
281
281
  use_doc_orientation_classify (bool): Flag to use document orientation classification.
282
282
  use_doc_unwarping (bool): Flag to use document unwarping.
283
- use_general_ocr (bool): Flag to use general OCR.
283
+ use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
284
284
  use_seal_recognition (bool): Flag to use seal recognition.
285
285
  use_table_recognition (bool): Flag to use table recognition.
286
286
  layout_threshold (Optional[float]): The threshold value to filter out low-confidence predictions. Default is None.
@@ -322,7 +322,7 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
322
322
  input,
323
323
  use_doc_orientation_classify=use_doc_orientation_classify,
324
324
  use_doc_unwarping=use_doc_unwarping,
325
- use_general_ocr=use_general_ocr,
325
+ use_textline_orientation=use_textline_orientation,
326
326
  use_seal_recognition=use_seal_recognition,
327
327
  use_table_recognition=use_table_recognition,
328
328
  layout_threshold=layout_threshold,
@@ -638,7 +638,7 @@ class PP_ChatOCRv4_Pipeline(PP_ChatOCR_Pipeline):
638
638
 
639
639
  for image_array in self.img_reader([input]):
640
640
 
641
- image_string = cv2.imencode(".jpg", image_array)[1].tostring()
641
+ image_string = cv2.imencode(".jpg", image_array)[1].tobytes()
642
642
  image_base64 = base64.b64encode(image_string).decode("utf-8")
643
643
  result = {}
644
644
  for key in key_list:
@@ -20,15 +20,13 @@ from ....utils.deps import pipeline_requires_extra
20
20
  from ...models.object_detection.result import DetResult
21
21
  from ...utils.hpi import HPIConfig
22
22
  from ...utils.pp_option import PaddlePredictorOption
23
+ from .._parallel import AutoParallelImageSimpleInferencePipeline
23
24
  from ..base import BasePipeline
24
25
 
25
26
 
26
- @pipeline_requires_extra("cv")
27
- class RotatedObjectDetectionPipeline(BasePipeline):
27
+ class _RotatedObjectDetectionPipeline(BasePipeline):
28
28
  """Rotated Object Detection Pipeline"""
29
29
 
30
- entities = "rotated_object_detection"
31
-
32
30
  def __init__(
33
31
  self,
34
32
  config: Dict,
@@ -45,9 +43,9 @@ class RotatedObjectDetectionPipeline(BasePipeline):
45
43
  device (str): The device to run the prediction on. Default is None.
46
44
  pp_option (PaddlePredictorOption): Options for PaddlePaddle predictor. Default is None.
47
45
  use_hpip (bool, optional): Whether to use the high-performance
48
- inference plugin (HPIP). Defaults to False.
46
+ inference plugin (HPIP) by default. Defaults to False.
49
47
  hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
50
- The high-performance inference configuration dictionary.
48
+ The default high-performance inference configuration dictionary.
51
49
  Defaults to None.
52
50
  """
53
51
  super().__init__(
@@ -83,3 +81,15 @@ class RotatedObjectDetectionPipeline(BasePipeline):
83
81
  DetResult: The predicted rotated object detection results.
84
82
  """
85
83
  yield from self.rotated_object_detection_model(input, threshold=threshold)
84
+
85
+
86
+ @pipeline_requires_extra("cv")
87
+ class RotatedObjectDetectionPipeline(AutoParallelImageSimpleInferencePipeline):
88
+ entities = "rotated_object_detection"
89
+
90
+ @property
91
+ def _pipeline_cls(self):
92
+ return _RotatedObjectDetectionPipeline
93
+
94
+ def _get_batch_size(self, config):
95
+ return config["SubModules"]["RotatedObjectDetection"].get("batch_size", 1)